diff options
Diffstat (limited to 'src')
95 files changed, 2838 insertions, 753 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0913be72c..3a57356ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -54,8 +54,10 @@ else() | |||
| 54 | add_compile_options( | 54 | add_compile_options( |
| 55 | -Wall | 55 | -Wall |
| 56 | -Werror=implicit-fallthrough | 56 | -Werror=implicit-fallthrough |
| 57 | -Werror=missing-declarations | ||
| 57 | -Werror=reorder | 58 | -Werror=reorder |
| 58 | -Wextra | 59 | -Wextra |
| 60 | -Wmissing-declarations | ||
| 59 | -Wno-attributes | 61 | -Wno-attributes |
| 60 | -Wno-unused-parameter | 62 | -Wno-unused-parameter |
| 61 | ) | 63 | ) |
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index c381dbe1d..5ef38a337 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt | |||
| @@ -7,9 +7,12 @@ add_library(audio_core STATIC | |||
| 7 | audio_out.h | 7 | audio_out.h |
| 8 | audio_renderer.cpp | 8 | audio_renderer.cpp |
| 9 | audio_renderer.h | 9 | audio_renderer.h |
| 10 | behavior_info.cpp | ||
| 11 | behavior_info.h | ||
| 10 | buffer.h | 12 | buffer.h |
| 11 | codec.cpp | 13 | codec.cpp |
| 12 | codec.h | 14 | codec.h |
| 15 | common.h | ||
| 13 | null_sink.h | 16 | null_sink.h |
| 14 | sink.h | 17 | sink.h |
| 15 | sink_details.cpp | 18 | sink_details.cpp |
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 7a9dc61d4..d18ef6940 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "audio_core/audio_out.h" | 6 | #include "audio_core/audio_out.h" |
| 7 | #include "audio_core/audio_renderer.h" | 7 | #include "audio_core/audio_renderer.h" |
| 8 | #include "audio_core/codec.h" | 8 | #include "audio_core/codec.h" |
| 9 | #include "audio_core/common.h" | ||
| 9 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 10 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 11 | #include "core/core.h" | 12 | #include "core/core.h" |
| @@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory | |||
| 79 | std::size_t instance_number) | 80 | std::size_t instance_number) |
| 80 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), | 81 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), |
| 81 | effects(params.effect_count), memory{memory_} { | 82 | effects(params.effect_count), memory{memory_} { |
| 82 | 83 | behavior_info.SetUserRevision(params.revision); | |
| 83 | audio_out = std::make_unique<AudioCore::AudioOut>(); | 84 | audio_out = std::make_unique<AudioCore::AudioOut>(); |
| 84 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, | 85 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, |
| 85 | fmt::format("AudioRenderer-Instance{}", instance_number), | 86 | fmt::format("AudioRenderer-Instance{}", instance_number), |
| @@ -109,17 +110,17 @@ Stream::State AudioRenderer::GetStreamState() const { | |||
| 109 | return stream->GetState(); | 110 | return stream->GetState(); |
| 110 | } | 111 | } |
| 111 | 112 | ||
| 112 | static constexpr u32 VersionFromRevision(u32_le rev) { | 113 | ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { |
| 113 | // "REV7" -> 7 | ||
| 114 | return ((rev >> 24) & 0xff) - 0x30; | ||
| 115 | } | ||
| 116 | |||
| 117 | std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { | ||
| 118 | // Copy UpdateDataHeader struct | 114 | // Copy UpdateDataHeader struct |
| 119 | UpdateDataHeader config{}; | 115 | UpdateDataHeader config{}; |
| 120 | std::memcpy(&config, input_params.data(), sizeof(UpdateDataHeader)); | 116 | std::memcpy(&config, input_params.data(), sizeof(UpdateDataHeader)); |
| 121 | u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4); | 117 | u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4); |
| 122 | 118 | ||
| 119 | if (!behavior_info.UpdateInput(input_params, sizeof(UpdateDataHeader))) { | ||
| 120 | LOG_ERROR(Audio, "Failed to update behavior info input parameters"); | ||
| 121 | return Audren::ERR_INVALID_PARAMETERS; | ||
| 122 | } | ||
| 123 | |||
| 123 | // Copy MemoryPoolInfo structs | 124 | // Copy MemoryPoolInfo structs |
| 124 | std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count); | 125 | std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count); |
| 125 | std::memcpy(mem_pool_info.data(), | 126 | std::memcpy(mem_pool_info.data(), |
| @@ -173,8 +174,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_ | |||
| 173 | // Copy output header | 174 | // Copy output header |
| 174 | UpdateDataHeader response_data{worker_params}; | 175 | UpdateDataHeader response_data{worker_params}; |
| 175 | std::vector<u8> output_params(response_data.total_size); | 176 | std::vector<u8> output_params(response_data.total_size); |
| 176 | const auto audren_revision = VersionFromRevision(config.revision); | 177 | if (behavior_info.IsElapsedFrameCountSupported()) { |
| 177 | if (audren_revision >= 5) { | ||
| 178 | response_data.frame_count = 0x10; | 178 | response_data.frame_count = 0x10; |
| 179 | response_data.total_size += 0x10; | 179 | response_data.total_size += 0x10; |
| 180 | } | 180 | } |
| @@ -200,7 +200,19 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_ | |||
| 200 | sizeof(EffectOutStatus)); | 200 | sizeof(EffectOutStatus)); |
| 201 | effect_out_status_offset += sizeof(EffectOutStatus); | 201 | effect_out_status_offset += sizeof(EffectOutStatus); |
| 202 | } | 202 | } |
| 203 | return output_params; | 203 | |
| 204 | // Update behavior info output | ||
| 205 | const std::size_t behavior_out_status_offset{ | ||
| 206 | sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size + | ||
| 207 | response_data.effects_size + response_data.sinks_size + | ||
| 208 | response_data.performance_manager_size}; | ||
| 209 | |||
| 210 | if (!behavior_info.UpdateOutput(output_params, behavior_out_status_offset)) { | ||
| 211 | LOG_ERROR(Audio, "Failed to update behavior info output parameters"); | ||
| 212 | return Audren::ERR_INVALID_PARAMETERS; | ||
| 213 | } | ||
| 214 | |||
| 215 | return MakeResult(output_params); | ||
| 204 | } | 216 | } |
| 205 | 217 | ||
| 206 | void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) { | 218 | void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) { |
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 62faf9f19..b42770fae 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h | |||
| @@ -8,11 +8,13 @@ | |||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "audio_core/behavior_info.h" | ||
| 11 | #include "audio_core/stream.h" | 12 | #include "audio_core/stream.h" |
| 12 | #include "common/common_funcs.h" | 13 | #include "common/common_funcs.h" |
| 13 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 14 | #include "common/swap.h" | 15 | #include "common/swap.h" |
| 15 | #include "core/hle/kernel/object.h" | 16 | #include "core/hle/kernel/object.h" |
| 17 | #include "core/hle/result.h" | ||
| 16 | 18 | ||
| 17 | namespace Core::Timing { | 19 | namespace Core::Timing { |
| 18 | class CoreTiming; | 20 | class CoreTiming; |
| @@ -226,7 +228,7 @@ public: | |||
| 226 | std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number); | 228 | std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number); |
| 227 | ~AudioRenderer(); | 229 | ~AudioRenderer(); |
| 228 | 230 | ||
| 229 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); | 231 | ResultVal<std::vector<u8>> UpdateAudioRenderer(const std::vector<u8>& input_params); |
| 230 | void QueueMixedBuffer(Buffer::Tag tag); | 232 | void QueueMixedBuffer(Buffer::Tag tag); |
| 231 | void ReleaseAndQueueBuffers(); | 233 | void ReleaseAndQueueBuffers(); |
| 232 | u32 GetSampleRate() const; | 234 | u32 GetSampleRate() const; |
| @@ -237,6 +239,7 @@ public: | |||
| 237 | private: | 239 | private: |
| 238 | class EffectState; | 240 | class EffectState; |
| 239 | class VoiceState; | 241 | class VoiceState; |
| 242 | BehaviorInfo behavior_info{}; | ||
| 240 | 243 | ||
| 241 | AudioRendererParameter worker_params; | 244 | AudioRendererParameter worker_params; |
| 242 | std::shared_ptr<Kernel::WritableEvent> buffer_event; | 245 | std::shared_ptr<Kernel::WritableEvent> buffer_event; |
diff --git a/src/audio_core/behavior_info.cpp b/src/audio_core/behavior_info.cpp new file mode 100644 index 000000000..94b7a3bf1 --- /dev/null +++ b/src/audio_core/behavior_info.cpp | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include "audio_core/behavior_info.h" | ||
| 7 | #include "audio_core/common.h" | ||
| 8 | #include "common/logging/log.h" | ||
| 9 | |||
| 10 | namespace AudioCore { | ||
| 11 | |||
| 12 | BehaviorInfo::BehaviorInfo() : process_revision(CURRENT_PROCESS_REVISION) {} | ||
| 13 | BehaviorInfo::~BehaviorInfo() = default; | ||
| 14 | |||
| 15 | bool BehaviorInfo::UpdateInput(const std::vector<u8>& buffer, std::size_t offset) { | ||
| 16 | if (!CanConsumeBuffer(buffer.size(), offset, sizeof(InParams))) { | ||
| 17 | LOG_ERROR(Audio, "Buffer is an invalid size!"); | ||
| 18 | return false; | ||
| 19 | } | ||
| 20 | InParams params{}; | ||
| 21 | std::memcpy(¶ms, buffer.data() + offset, sizeof(InParams)); | ||
| 22 | |||
| 23 | if (!IsValidRevision(params.revision)) { | ||
| 24 | LOG_ERROR(Audio, "Invalid input revision, revision=0x{:08X}", params.revision); | ||
| 25 | return false; | ||
| 26 | } | ||
| 27 | |||
| 28 | if (user_revision != params.revision) { | ||
| 29 | LOG_ERROR(Audio, | ||
| 30 | "User revision differs from input revision, expecting 0x{:08X} but got 0x{:08X}", | ||
| 31 | user_revision, params.revision); | ||
| 32 | return false; | ||
| 33 | } | ||
| 34 | |||
| 35 | ClearError(); | ||
| 36 | UpdateFlags(params.flags); | ||
| 37 | |||
| 38 | // TODO(ogniK): Check input params size when InfoUpdater is used | ||
| 39 | |||
| 40 | return true; | ||
| 41 | } | ||
| 42 | |||
| 43 | bool BehaviorInfo::UpdateOutput(std::vector<u8>& buffer, std::size_t offset) { | ||
| 44 | if (!CanConsumeBuffer(buffer.size(), offset, sizeof(OutParams))) { | ||
| 45 | LOG_ERROR(Audio, "Buffer is an invalid size!"); | ||
| 46 | return false; | ||
| 47 | } | ||
| 48 | |||
| 49 | OutParams params{}; | ||
| 50 | std::memcpy(params.errors.data(), errors.data(), sizeof(ErrorInfo) * errors.size()); | ||
| 51 | params.error_count = static_cast<u32_le>(error_count); | ||
| 52 | std::memcpy(buffer.data() + offset, ¶ms, sizeof(OutParams)); | ||
| 53 | return true; | ||
| 54 | } | ||
| 55 | |||
| 56 | void BehaviorInfo::ClearError() { | ||
| 57 | error_count = 0; | ||
| 58 | } | ||
| 59 | |||
| 60 | void BehaviorInfo::UpdateFlags(u64_le dest_flags) { | ||
| 61 | flags = dest_flags; | ||
| 62 | } | ||
| 63 | |||
| 64 | void BehaviorInfo::SetUserRevision(u32_le revision) { | ||
| 65 | user_revision = revision; | ||
| 66 | } | ||
| 67 | |||
| 68 | bool BehaviorInfo::IsAdpcmLoopContextBugFixed() const { | ||
| 69 | return IsRevisionSupported(2, user_revision); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool BehaviorInfo::IsSplitterSupported() const { | ||
| 73 | return IsRevisionSupported(2, user_revision); | ||
| 74 | } | ||
| 75 | |||
| 76 | bool BehaviorInfo::IsLongSizePreDelaySupported() const { | ||
| 77 | return IsRevisionSupported(3, user_revision); | ||
| 78 | } | ||
| 79 | |||
| 80 | bool BehaviorInfo::IsAudioRenererProcessingTimeLimit80PercentSupported() const { | ||
| 81 | return IsRevisionSupported(5, user_revision); | ||
| 82 | } | ||
| 83 | |||
| 84 | bool BehaviorInfo::IsAudioRenererProcessingTimeLimit75PercentSupported() const { | ||
| 85 | return IsRevisionSupported(4, user_revision); | ||
| 86 | } | ||
| 87 | |||
| 88 | bool BehaviorInfo::IsAudioRenererProcessingTimeLimit70PercentSupported() const { | ||
| 89 | return IsRevisionSupported(1, user_revision); | ||
| 90 | } | ||
| 91 | |||
| 92 | bool BehaviorInfo::IsElapsedFrameCountSupported() const { | ||
| 93 | return IsRevisionSupported(5, user_revision); | ||
| 94 | } | ||
| 95 | |||
| 96 | bool BehaviorInfo::IsMemoryPoolForceMappingEnabled() const { | ||
| 97 | return (flags & 1) != 0; | ||
| 98 | } | ||
| 99 | |||
| 100 | } // namespace AudioCore | ||
diff --git a/src/audio_core/behavior_info.h b/src/audio_core/behavior_info.h new file mode 100644 index 000000000..c5e91ab39 --- /dev/null +++ b/src/audio_core/behavior_info.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | |||
| 9 | #include <vector> | ||
| 10 | #include "common/common_funcs.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/swap.h" | ||
| 13 | |||
| 14 | namespace AudioCore { | ||
| 15 | class BehaviorInfo { | ||
| 16 | public: | ||
| 17 | explicit BehaviorInfo(); | ||
| 18 | ~BehaviorInfo(); | ||
| 19 | |||
| 20 | bool UpdateInput(const std::vector<u8>& buffer, std::size_t offset); | ||
| 21 | bool UpdateOutput(std::vector<u8>& buffer, std::size_t offset); | ||
| 22 | |||
| 23 | void ClearError(); | ||
| 24 | void UpdateFlags(u64_le dest_flags); | ||
| 25 | void SetUserRevision(u32_le revision); | ||
| 26 | |||
| 27 | bool IsAdpcmLoopContextBugFixed() const; | ||
| 28 | bool IsSplitterSupported() const; | ||
| 29 | bool IsLongSizePreDelaySupported() const; | ||
| 30 | bool IsAudioRenererProcessingTimeLimit80PercentSupported() const; | ||
| 31 | bool IsAudioRenererProcessingTimeLimit75PercentSupported() const; | ||
| 32 | bool IsAudioRenererProcessingTimeLimit70PercentSupported() const; | ||
| 33 | bool IsElapsedFrameCountSupported() const; | ||
| 34 | bool IsMemoryPoolForceMappingEnabled() const; | ||
| 35 | |||
| 36 | private: | ||
| 37 | u32_le process_revision{}; | ||
| 38 | u32_le user_revision{}; | ||
| 39 | u64_le flags{}; | ||
| 40 | |||
| 41 | struct ErrorInfo { | ||
| 42 | u32_le result{}; | ||
| 43 | INSERT_PADDING_WORDS(1); | ||
| 44 | u64_le result_info{}; | ||
| 45 | }; | ||
| 46 | static_assert(sizeof(ErrorInfo) == 0x10, "ErrorInfo is an invalid size"); | ||
| 47 | |||
| 48 | std::array<ErrorInfo, 10> errors{}; | ||
| 49 | std::size_t error_count{}; | ||
| 50 | |||
| 51 | struct InParams { | ||
| 52 | u32_le revision{}; | ||
| 53 | u32_le padding{}; | ||
| 54 | u64_le flags{}; | ||
| 55 | }; | ||
| 56 | static_assert(sizeof(InParams) == 0x10, "InParams is an invalid size"); | ||
| 57 | |||
| 58 | struct OutParams { | ||
| 59 | std::array<ErrorInfo, 10> errors{}; | ||
| 60 | u32_le error_count{}; | ||
| 61 | INSERT_PADDING_BYTES(12); | ||
| 62 | }; | ||
| 63 | static_assert(sizeof(OutParams) == 0xb0, "OutParams is an invalid size"); | ||
| 64 | }; | ||
| 65 | |||
| 66 | } // namespace AudioCore | ||
diff --git a/src/audio_core/common.h b/src/audio_core/common.h new file mode 100644 index 000000000..98478b66b --- /dev/null +++ b/src/audio_core/common.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | #include "common/common_funcs.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/swap.h" | ||
| 9 | #include "core/hle/result.h" | ||
| 10 | |||
| 11 | namespace AudioCore { | ||
| 12 | namespace Audren { | ||
| 13 | constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41}; | ||
| 14 | } | ||
| 15 | |||
| 16 | constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8'); | ||
| 17 | |||
| 18 | static constexpr u32 VersionFromRevision(u32_le rev) { | ||
| 19 | // "REV7" -> 7 | ||
| 20 | return ((rev >> 24) & 0xff) - 0x30; | ||
| 21 | } | ||
| 22 | |||
| 23 | static constexpr bool IsRevisionSupported(u32 required, u32_le user_revision) { | ||
| 24 | const auto base = VersionFromRevision(user_revision); | ||
| 25 | return required <= base; | ||
| 26 | } | ||
| 27 | |||
| 28 | static constexpr bool IsValidRevision(u32_le revision) { | ||
| 29 | const auto base = VersionFromRevision(revision); | ||
| 30 | constexpr auto max_rev = VersionFromRevision(CURRENT_PROCESS_REVISION); | ||
| 31 | return base <= max_rev; | ||
| 32 | } | ||
| 33 | |||
| 34 | static constexpr bool CanConsumeBuffer(std::size_t size, std::size_t offset, std::size_t required) { | ||
| 35 | if (offset > size) { | ||
| 36 | return false; | ||
| 37 | } | ||
| 38 | if (size < required) { | ||
| 39 | return false; | ||
| 40 | } | ||
| 41 | if ((size - offset) < required) { | ||
| 42 | return false; | ||
| 43 | } | ||
| 44 | return true; | ||
| 45 | } | ||
| 46 | |||
| 47 | } // namespace AudioCore | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 9add5d363..65cbfe5e6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "core/hle/kernel/scheduler.h" | 20 | #include "core/hle/kernel/scheduler.h" |
| 21 | #include "core/hle/kernel/svc.h" | 21 | #include "core/hle/kernel/svc.h" |
| 22 | #include "core/memory.h" | 22 | #include "core/memory.h" |
| 23 | #include "core/settings.h" | ||
| 23 | 24 | ||
| 24 | namespace Core { | 25 | namespace Core { |
| 25 | 26 | ||
| @@ -144,6 +145,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& | |||
| 144 | config.page_table_address_space_bits = address_space_bits; | 145 | config.page_table_address_space_bits = address_space_bits; |
| 145 | config.silently_mirror_page_table = false; | 146 | config.silently_mirror_page_table = false; |
| 146 | config.absolute_offset_page_table = true; | 147 | config.absolute_offset_page_table = true; |
| 148 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; | ||
| 149 | config.only_detect_misalignment_via_page_table_on_page_boundary = true; | ||
| 147 | 150 | ||
| 148 | // Multi-process state | 151 | // Multi-process state |
| 149 | config.processor_id = core_index; | 152 | config.processor_id = core_index; |
| @@ -159,8 +162,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& | |||
| 159 | // Unpredictable instructions | 162 | // Unpredictable instructions |
| 160 | config.define_unpredictable_behaviour = true; | 163 | config.define_unpredictable_behaviour = true; |
| 161 | 164 | ||
| 162 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; | 165 | // Optimizations |
| 163 | config.only_detect_misalignment_via_page_table_on_page_boundary = true; | 166 | if (Settings::values.disable_cpu_opt) { |
| 167 | config.enable_optimizations = false; | ||
| 168 | config.enable_fast_dispatch = false; | ||
| 169 | } | ||
| 164 | 170 | ||
| 165 | return std::make_shared<Dynarmic::A64::Jit>(config); | 171 | return std::make_shared<Dynarmic::A64::Jit>(config); |
| 166 | } | 172 | } |
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index d189efb63..b96583123 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "core/core_timing.h" | 11 | #include "core/core_timing.h" |
| 12 | #include "core/hle/kernel/scheduler.h" | 12 | #include "core/hle/kernel/scheduler.h" |
| 13 | #include "core/hle/kernel/svc.h" | 13 | #include "core/hle/kernel/svc.h" |
| 14 | #include "core/memory.h" | ||
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| 16 | 17 | ||
| @@ -171,7 +172,17 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64)); | |||
| 171 | 172 | ||
| 172 | void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { | 173 | void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { |
| 173 | MICROPROFILE_SCOPE(ARM_Jit_Unicorn); | 174 | MICROPROFILE_SCOPE(ARM_Jit_Unicorn); |
| 175 | |||
| 176 | // Temporarily map the code page for Unicorn | ||
| 177 | u64 map_addr{GetPC() & ~Memory::PAGE_MASK}; | ||
| 178 | std::vector<u8> page_buffer(Memory::PAGE_SIZE); | ||
| 179 | system.Memory().ReadBlock(map_addr, page_buffer.data(), page_buffer.size()); | ||
| 180 | |||
| 181 | CHECKED(uc_mem_map_ptr(uc, map_addr, page_buffer.size(), | ||
| 182 | UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data())); | ||
| 174 | CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); | 183 | CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); |
| 184 | CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size())); | ||
| 185 | |||
| 175 | system.CoreTiming().AddTicks(num_instructions); | 186 | system.CoreTiming().AddTicks(num_instructions); |
| 176 | if (GDBStub::IsServerEnabled()) { | 187 | if (GDBStub::IsServerEnabled()) { |
| 177 | if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { | 188 | if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { |
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index 87e6a1fd3..8997c7082 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp | |||
| @@ -1202,7 +1202,8 @@ const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager: | |||
| 1202 | {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyAreaKey), | 1202 | {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyAreaKey), |
| 1203 | static_cast<u64>(KeyAreaKeyType::System)}}, | 1203 | static_cast<u64>(KeyAreaKeyType::System)}}, |
| 1204 | {"titlekek_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::Titlekek), 0}}, | 1204 | {"titlekek_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::Titlekek), 0}}, |
| 1205 | {"keyblob_mac_key_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyblobMAC)}}, | 1205 | {"keyblob_mac_key_source", |
| 1206 | {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyblobMAC), 0}}, | ||
| 1206 | {"tsec_key", {S128KeyType::TSEC, 0, 0}}, | 1207 | {"tsec_key", {S128KeyType::TSEC, 0, 0}}, |
| 1207 | {"secure_boot_key", {S128KeyType::SecureBoot, 0, 0}}, | 1208 | {"secure_boot_key", {S128KeyType::SecureBoot, 0, 0}}, |
| 1208 | {"sd_seed", {S128KeyType::SDSeed, 0, 0}}, | 1209 | {"sd_seed", {S128KeyType::SDSeed, 0, 0}}, |
diff --git a/src/core/crypto/partition_data_manager.cpp b/src/core/crypto/partition_data_manager.cpp index d64302f2e..7ed71ac3a 100644 --- a/src/core/crypto/partition_data_manager.cpp +++ b/src/core/crypto/partition_data_manager.cpp | |||
| @@ -202,8 +202,8 @@ static std::array<Key128, 0x20> FindEncryptedMasterKeyFromHex(const std::vector< | |||
| 202 | return out; | 202 | return out; |
| 203 | } | 203 | } |
| 204 | 204 | ||
| 205 | FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir, | 205 | static FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir, |
| 206 | const std::string& name) { | 206 | const std::string& name) { |
| 207 | const auto upper = Common::ToUpper(name); | 207 | const auto upper = Common::ToUpper(name); |
| 208 | 208 | ||
| 209 | for (const auto& fname : {name, name + ".bin", upper, upper + ".BIN"}) { | 209 | for (const auto& fname : {name, name + ".bin", upper, upper + ".BIN"}) { |
| @@ -345,8 +345,7 @@ FileSys::VirtualFile PartitionDataManager::GetPackage2Raw(Package2Type type) con | |||
| 345 | return package2.at(static_cast<size_t>(type)); | 345 | return package2.at(static_cast<size_t>(type)); |
| 346 | } | 346 | } |
| 347 | 347 | ||
| 348 | bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) { | 348 | static bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) { |
| 349 | |||
| 350 | const std::vector<u8> iv(header.header_ctr.begin(), header.header_ctr.end()); | 349 | const std::vector<u8> iv(header.header_ctr.begin(), header.header_ctr.end()); |
| 351 | Package2Header temp = header; | 350 | Package2Header temp = header; |
| 352 | AESCipher<Key128> cipher(key, Mode::CTR); | 351 | AESCipher<Key128> cipher(key, Mode::CTR); |
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index 1d6c30962..43169bf9f 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp | |||
| @@ -51,6 +51,17 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) { | |||
| 51 | return Loader::ResultStatus::Success; | 51 | return Loader::ResultStatus::Success; |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | /*static*/ ProgramMetadata ProgramMetadata::GetDefault() { | ||
| 55 | ProgramMetadata result; | ||
| 56 | |||
| 57 | result.LoadManual( | ||
| 58 | true /*is_64_bit*/, FileSys::ProgramAddressSpaceType::Is39Bit /*address_space*/, | ||
| 59 | 0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x00100000 /*main_thread_stack_size*/, | ||
| 60 | {}, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/, {} /*capabilities*/); | ||
| 61 | |||
| 62 | return result; | ||
| 63 | } | ||
| 64 | |||
| 54 | void ProgramMetadata::LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, | 65 | void ProgramMetadata::LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, |
| 55 | s32 main_thread_prio, u32 main_thread_core, | 66 | s32 main_thread_prio, u32 main_thread_core, |
| 56 | u32 main_thread_stack_size, u64 title_id, | 67 | u32 main_thread_stack_size, u64 title_id, |
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index f8759a396..35069972b 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h | |||
| @@ -44,9 +44,13 @@ public: | |||
| 44 | ProgramMetadata(); | 44 | ProgramMetadata(); |
| 45 | ~ProgramMetadata(); | 45 | ~ProgramMetadata(); |
| 46 | 46 | ||
| 47 | /// Gets a default ProgramMetadata configuration, should only be used for homebrew formats where | ||
| 48 | /// we do not have an NPDM file | ||
| 49 | static ProgramMetadata GetDefault(); | ||
| 50 | |||
| 47 | Loader::ResultStatus Load(VirtualFile file); | 51 | Loader::ResultStatus Load(VirtualFile file); |
| 48 | 52 | ||
| 49 | // Load from parameters instead of NPDM file, used for KIP | 53 | /// Load from parameters instead of NPDM file, used for KIP |
| 50 | void LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, s32 main_thread_prio, | 54 | void LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, s32 main_thread_prio, |
| 51 | u32 main_thread_core, u32 main_thread_stack_size, u64 title_id, | 55 | u32 main_thread_core, u32 main_thread_stack_size, u64 title_id, |
| 52 | u64 filesystem_permissions, KernelCapabilityDescriptors capabilities); | 56 | u64 filesystem_permissions, KernelCapabilityDescriptors capabilities); |
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 2f15635c5..70c0f8b80 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp | |||
| @@ -1389,10 +1389,9 @@ void SendTrap(Kernel::Thread* thread, int trap) { | |||
| 1389 | return; | 1389 | return; |
| 1390 | } | 1390 | } |
| 1391 | 1391 | ||
| 1392 | if (!halt_loop || current_thread == thread) { | 1392 | current_thread = thread; |
| 1393 | current_thread = thread; | 1393 | SendSignal(thread, trap); |
| 1394 | SendSignal(thread, trap); | 1394 | |
| 1395 | } | ||
| 1396 | halt_loop = true; | 1395 | halt_loop = true; |
| 1397 | send_trap = false; | 1396 | send_trap = false; |
| 1398 | } | 1397 | } |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 4c0451c01..a919750a6 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -150,8 +150,7 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, | |||
| 150 | context.pc = entry_point; | 150 | context.pc = entry_point; |
| 151 | context.sp = stack_top; | 151 | context.sp = stack_top; |
| 152 | // TODO(merry): Perform a hardware test to determine the below value. | 152 | // TODO(merry): Perform a hardware test to determine the below value. |
| 153 | // AHP = 0, DN = 1, FTZ = 1, RMode = Round towards zero | 153 | context.fpcr = 0; |
| 154 | context.fpcr = 0x03C00000; | ||
| 155 | } | 154 | } |
| 156 | 155 | ||
| 157 | ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name, | 156 | ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name, |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 557608e76..3ece2cf3c 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -903,7 +903,7 @@ private: | |||
| 903 | void PopOutData(Kernel::HLERequestContext& ctx) { | 903 | void PopOutData(Kernel::HLERequestContext& ctx) { |
| 904 | LOG_DEBUG(Service_AM, "called"); | 904 | LOG_DEBUG(Service_AM, "called"); |
| 905 | 905 | ||
| 906 | const auto storage = applet->GetBroker().PopNormalDataToGame(); | 906 | auto storage = applet->GetBroker().PopNormalDataToGame(); |
| 907 | if (storage == nullptr) { | 907 | if (storage == nullptr) { |
| 908 | LOG_ERROR(Service_AM, | 908 | LOG_ERROR(Service_AM, |
| 909 | "storage is a nullptr. There is no data in the current normal channel"); | 909 | "storage is a nullptr. There is no data in the current normal channel"); |
| @@ -934,7 +934,7 @@ private: | |||
| 934 | void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { | 934 | void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { |
| 935 | LOG_DEBUG(Service_AM, "called"); | 935 | LOG_DEBUG(Service_AM, "called"); |
| 936 | 936 | ||
| 937 | const auto storage = applet->GetBroker().PopInteractiveDataToGame(); | 937 | auto storage = applet->GetBroker().PopInteractiveDataToGame(); |
| 938 | if (storage == nullptr) { | 938 | if (storage == nullptr) { |
| 939 | LOG_ERROR(Service_AM, | 939 | LOG_ERROR(Service_AM, |
| 940 | "storage is a nullptr. There is no data in the current interactive channel"); | 940 | "storage is a nullptr. There is no data in the current interactive channel"); |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 175cabf45..d8359abaa 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -92,11 +92,16 @@ private: | |||
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { | 94 | void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { |
| 95 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 95 | LOG_DEBUG(Service_Audio, "(STUBBED) called"); |
| 96 | |||
| 97 | auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer()); | ||
| 98 | |||
| 99 | if (result.Succeeded()) { | ||
| 100 | ctx.WriteBuffer(result.Unwrap()); | ||
| 101 | } | ||
| 96 | 102 | ||
| 97 | ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); | ||
| 98 | IPC::ResponseBuilder rb{ctx, 2}; | 103 | IPC::ResponseBuilder rb{ctx, 2}; |
| 99 | rb.Push(RESULT_SUCCESS); | 104 | rb.Push(result.Code()); |
| 100 | } | 105 | } |
| 101 | 106 | ||
| 102 | void Start(Kernel::HLERequestContext& ctx) { | 107 | void Start(Kernel::HLERequestContext& ctx) { |
| @@ -252,8 +257,6 @@ private: | |||
| 252 | } | 257 | } |
| 253 | 258 | ||
| 254 | void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { | 259 | void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { |
| 255 | IPC::RequestParser rp{ctx}; | ||
| 256 | |||
| 257 | const auto device_name_buffer = ctx.ReadBuffer(); | 260 | const auto device_name_buffer = ctx.ReadBuffer(); |
| 258 | const std::string name = Common::StringFromBuffer(device_name_buffer); | 261 | const std::string name = Common::StringFromBuffer(device_name_buffer); |
| 259 | 262 | ||
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index f589864ee..5febe8fc1 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "core/hle/service/bcat/backend/boxcat.h" | 18 | #include "core/hle/service/bcat/backend/boxcat.h" |
| 19 | #include "core/settings.h" | 19 | #include "core/settings.h" |
| 20 | 20 | ||
| 21 | namespace Service::BCAT { | ||
| 21 | namespace { | 22 | namespace { |
| 22 | 23 | ||
| 23 | // Prevents conflicts with windows macro called CreateFile | 24 | // Prevents conflicts with windows macro called CreateFile |
| @@ -30,10 +31,6 @@ bool VfsDeleteFileWrap(FileSys::VirtualDir dir, std::string_view name) { | |||
| 30 | return dir->DeleteFile(name); | 31 | return dir->DeleteFile(name); |
| 31 | } | 32 | } |
| 32 | 33 | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | namespace Service::BCAT { | ||
| 36 | |||
| 37 | constexpr ResultCode ERROR_GENERAL_BCAT_FAILURE{ErrorModule::BCAT, 1}; | 34 | constexpr ResultCode ERROR_GENERAL_BCAT_FAILURE{ErrorModule::BCAT, 1}; |
| 38 | 35 | ||
| 39 | constexpr char BOXCAT_HOSTNAME[] = "api.yuzu-emu.org"; | 36 | constexpr char BOXCAT_HOSTNAME[] = "api.yuzu-emu.org"; |
| @@ -90,8 +87,6 @@ constexpr u32 PORT = 443; | |||
| 90 | constexpr u32 TIMEOUT_SECONDS = 30; | 87 | constexpr u32 TIMEOUT_SECONDS = 30; |
| 91 | [[maybe_unused]] constexpr u64 VFS_COPY_BLOCK_SIZE = 1ULL << 24; // 4MB | 88 | [[maybe_unused]] constexpr u64 VFS_COPY_BLOCK_SIZE = 1ULL << 24; // 4MB |
| 92 | 89 | ||
| 93 | namespace { | ||
| 94 | |||
| 95 | std::string GetBINFilePath(u64 title_id) { | 90 | std::string GetBINFilePath(u64 title_id) { |
| 96 | return fmt::format("{}bcat/{:016X}/launchparam.bin", | 91 | return fmt::format("{}bcat/{:016X}/launchparam.bin", |
| 97 | FileUtil::GetUserPath(FileUtil::UserPath::CacheDir), title_id); | 92 | FileUtil::GetUserPath(FileUtil::UserPath::CacheDir), title_id); |
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index df00ae625..f8e9df4b1 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "core/crypto/key_manager.h" | 5 | #include "core/crypto/key_manager.h" |
| 6 | #include "core/hle/ipc_helpers.h" | 6 | #include "core/hle/ipc_helpers.h" |
| 7 | #include "core/hle/service/es/es.h" | ||
| 7 | #include "core/hle/service/service.h" | 8 | #include "core/hle/service/service.h" |
| 8 | 9 | ||
| 9 | namespace Service::ES { | 10 | namespace Service::ES { |
| @@ -76,7 +77,6 @@ private: | |||
| 76 | } | 77 | } |
| 77 | 78 | ||
| 78 | void ImportTicket(Kernel::HLERequestContext& ctx) { | 79 | void ImportTicket(Kernel::HLERequestContext& ctx) { |
| 79 | IPC::RequestParser rp{ctx}; | ||
| 80 | const auto ticket = ctx.ReadBuffer(); | 80 | const auto ticket = ctx.ReadBuffer(); |
| 81 | const auto cert = ctx.ReadBuffer(1); | 81 | const auto cert = ctx.ReadBuffer(1); |
| 82 | 82 | ||
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 2ccfffc19..c55d900e2 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp | |||
| @@ -502,7 +502,7 @@ void Controller_NPad::SetNpadMode(u32 npad_id, NPadAssignments assignment_mode) | |||
| 502 | 502 | ||
| 503 | void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids, | 503 | void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids, |
| 504 | const std::vector<Vibration>& vibrations) { | 504 | const std::vector<Vibration>& vibrations) { |
| 505 | LOG_WARNING(Service_HID, "(STUBBED) called"); | 505 | LOG_DEBUG(Service_HID, "(STUBBED) called"); |
| 506 | 506 | ||
| 507 | if (!can_controllers_vibrate) { | 507 | if (!can_controllers_vibrate) { |
| 508 | return; | 508 | return; |
diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp index c8159bcd5..69152d0ac 100644 --- a/src/core/hle/service/time/time_zone_manager.cpp +++ b/src/core/hle/service/time/time_zone_manager.cpp | |||
| @@ -518,8 +518,8 @@ static bool ParseTimeZoneBinary(TimeZoneRule& time_zone_rule, FileSys::VirtualFi | |||
| 518 | constexpr s32 time_zone_max_leaps{50}; | 518 | constexpr s32 time_zone_max_leaps{50}; |
| 519 | constexpr s32 time_zone_max_chars{50}; | 519 | constexpr s32 time_zone_max_chars{50}; |
| 520 | if (!(0 <= header.leap_count && header.leap_count < time_zone_max_leaps && | 520 | if (!(0 <= header.leap_count && header.leap_count < time_zone_max_leaps && |
| 521 | 0 < header.type_count && header.type_count < time_zone_rule.ttis.size() && | 521 | 0 < header.type_count && header.type_count < s32(time_zone_rule.ttis.size()) && |
| 522 | 0 <= header.time_count && header.time_count < time_zone_rule.ats.size() && | 522 | 0 <= header.time_count && header.time_count < s32(time_zone_rule.ats.size()) && |
| 523 | 0 <= header.char_count && header.char_count < time_zone_max_chars && | 523 | 0 <= header.char_count && header.char_count < time_zone_max_chars && |
| 524 | (header.ttis_std_count == header.type_count || header.ttis_std_count == 0) && | 524 | (header.ttis_std_count == header.type_count || header.ttis_std_count == 0) && |
| 525 | (header.ttis_gmt_count == header.type_count || header.ttis_gmt_count == 0))) { | 525 | (header.ttis_gmt_count == header.type_count || header.ttis_gmt_count == 0))) { |
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 1e9ed2837..8f7615115 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp | |||
| @@ -398,6 +398,11 @@ AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) { | |||
| 398 | Kernel::CodeSet codeset = elf_reader.LoadInto(base_address); | 398 | Kernel::CodeSet codeset = elf_reader.LoadInto(base_address); |
| 399 | const VAddr entry_point = codeset.entrypoint; | 399 | const VAddr entry_point = codeset.entrypoint; |
| 400 | 400 | ||
| 401 | // Setup the process code layout | ||
| 402 | if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), buffer.size()).IsError()) { | ||
| 403 | return {ResultStatus::ErrorNotInitialized, {}}; | ||
| 404 | } | ||
| 405 | |||
| 401 | process.LoadModule(std::move(codeset), entry_point); | 406 | process.LoadModule(std::move(codeset), entry_point); |
| 402 | 407 | ||
| 403 | is_loaded = true; | 408 | is_loaded = true; |
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 5d7e8136e..906544bc9 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp | |||
| @@ -131,7 +131,7 @@ static constexpr u32 PageAlignSize(u32 size) { | |||
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, | 133 | static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, |
| 134 | const std::string& name, VAddr load_base) { | 134 | const std::string& name) { |
| 135 | if (data.size() < sizeof(NroHeader)) { | 135 | if (data.size() < sizeof(NroHeader)) { |
| 136 | return {}; | 136 | return {}; |
| 137 | } | 137 | } |
| @@ -187,19 +187,25 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, | |||
| 187 | codeset.DataSegment().size += bss_size; | 187 | codeset.DataSegment().size += bss_size; |
| 188 | program_image.resize(static_cast<u32>(program_image.size()) + bss_size); | 188 | program_image.resize(static_cast<u32>(program_image.size()) + bss_size); |
| 189 | 189 | ||
| 190 | // Setup the process code layout | ||
| 191 | if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size()) | ||
| 192 | .IsError()) { | ||
| 193 | return false; | ||
| 194 | } | ||
| 195 | |||
| 190 | // Load codeset for current process | 196 | // Load codeset for current process |
| 191 | codeset.memory = std::move(program_image); | 197 | codeset.memory = std::move(program_image); |
| 192 | process.LoadModule(std::move(codeset), load_base); | 198 | process.LoadModule(std::move(codeset), process.PageTable().GetCodeRegionStart()); |
| 193 | 199 | ||
| 194 | // Register module with GDBStub | 200 | // Register module with GDBStub |
| 195 | GDBStub::RegisterModule(name, load_base, load_base); | 201 | GDBStub::RegisterModule(name, process.PageTable().GetCodeRegionStart(), |
| 202 | process.PageTable().GetCodeRegionEnd()); | ||
| 196 | 203 | ||
| 197 | return true; | 204 | return true; |
| 198 | } | 205 | } |
| 199 | 206 | ||
| 200 | bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& file, | 207 | bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& file) { |
| 201 | VAddr load_base) { | 208 | return LoadNroImpl(process, file.ReadAllBytes(), file.GetName()); |
| 202 | return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base); | ||
| 203 | } | 209 | } |
| 204 | 210 | ||
| 205 | AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { | 211 | AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { |
| @@ -207,10 +213,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { | |||
| 207 | return {ResultStatus::ErrorAlreadyLoaded, {}}; | 213 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 208 | } | 214 | } |
| 209 | 215 | ||
| 210 | // Load NRO | 216 | if (!LoadNro(process, *file)) { |
| 211 | const VAddr base_address = process.PageTable().GetCodeRegionStart(); | ||
| 212 | |||
| 213 | if (!LoadNro(process, *file, base_address)) { | ||
| 214 | return {ResultStatus::ErrorLoadingNRO, {}}; | 217 | return {ResultStatus::ErrorLoadingNRO, {}}; |
| 215 | } | 218 | } |
| 216 | 219 | ||
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 71811bc29..4593d48fb 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h | |||
| @@ -47,7 +47,7 @@ public: | |||
| 47 | bool IsRomFSUpdatable() const override; | 47 | bool IsRomFSUpdatable() const override; |
| 48 | 48 | ||
| 49 | private: | 49 | private: |
| 50 | bool LoadNro(Kernel::Process& process, const FileSys::VfsFile& file, VAddr load_base); | 50 | bool LoadNro(Kernel::Process& process, const FileSys::VfsFile& file); |
| 51 | 51 | ||
| 52 | std::vector<u8> icon_data; | 52 | std::vector<u8> icon_data; |
| 53 | std::unique_ptr<FileSys::NACP> nacp; | 53 | std::unique_ptr<FileSys::NACP> nacp; |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index c1282cb80..cd6c257f5 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -92,7 +92,7 @@ void LogSettings() { | |||
| 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); | 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); |
| 94 | LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | 94 | LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); |
| 95 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); | 95 | LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy); |
| 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", |
| 97 | Settings::values.use_asynchronous_gpu_emulation); | 97 | Settings::values.use_asynchronous_gpu_emulation); |
| 98 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); | 98 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); |
| @@ -109,4 +109,12 @@ void LogSettings() { | |||
| 109 | LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); | 109 | LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | bool IsGPULevelExtreme() { | ||
| 113 | return values.gpu_accuracy == GPUAccuracy::Extreme; | ||
| 114 | } | ||
| 115 | |||
| 116 | bool IsGPULevelHigh() { | ||
| 117 | return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High; | ||
| 118 | } | ||
| 119 | |||
| 112 | } // namespace Settings | 120 | } // namespace Settings |
diff --git a/src/core/settings.h b/src/core/settings.h index 79ec01731..7d09253f5 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -376,6 +376,12 @@ enum class RendererBackend { | |||
| 376 | Vulkan = 1, | 376 | Vulkan = 1, |
| 377 | }; | 377 | }; |
| 378 | 378 | ||
| 379 | enum class GPUAccuracy : u32 { | ||
| 380 | Normal = 0, | ||
| 381 | High = 1, | ||
| 382 | Extreme = 2, | ||
| 383 | }; | ||
| 384 | |||
| 379 | struct Values { | 385 | struct Values { |
| 380 | // System | 386 | // System |
| 381 | bool use_docked_mode; | 387 | bool use_docked_mode; |
| @@ -436,7 +442,7 @@ struct Values { | |||
| 436 | bool use_frame_limit; | 442 | bool use_frame_limit; |
| 437 | u16 frame_limit; | 443 | u16 frame_limit; |
| 438 | bool use_disk_shader_cache; | 444 | bool use_disk_shader_cache; |
| 439 | bool use_accurate_gpu_emulation; | 445 | GPUAccuracy gpu_accuracy; |
| 440 | bool use_asynchronous_gpu_emulation; | 446 | bool use_asynchronous_gpu_emulation; |
| 441 | bool use_vsync; | 447 | bool use_vsync; |
| 442 | bool force_30fps_mode; | 448 | bool force_30fps_mode; |
| @@ -464,6 +470,7 @@ struct Values { | |||
| 464 | bool dump_nso; | 470 | bool dump_nso; |
| 465 | bool reporting_services; | 471 | bool reporting_services; |
| 466 | bool quest_flag; | 472 | bool quest_flag; |
| 473 | bool disable_cpu_opt; | ||
| 467 | 474 | ||
| 468 | // BCAT | 475 | // BCAT |
| 469 | std::string bcat_backend; | 476 | std::string bcat_backend; |
| @@ -479,6 +486,9 @@ struct Values { | |||
| 479 | std::map<u64, std::vector<std::string>> disabled_addons; | 486 | std::map<u64, std::vector<std::string>> disabled_addons; |
| 480 | } extern values; | 487 | } extern values; |
| 481 | 488 | ||
| 489 | bool IsGPULevelExtreme(); | ||
| 490 | bool IsGPULevelHigh(); | ||
| 491 | |||
| 482 | void Apply(); | 492 | void Apply(); |
| 483 | void LogSettings(); | 493 | void LogSettings(); |
| 484 | } // namespace Settings | 494 | } // namespace Settings |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index fd5a3ee9f..1c3b03a1c 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) { | |||
| 56 | return "Unknown"; | 56 | return "Unknown"; |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { | ||
| 60 | switch (backend) { | ||
| 61 | case Settings::GPUAccuracy::Normal: | ||
| 62 | return "Normal"; | ||
| 63 | case Settings::GPUAccuracy::High: | ||
| 64 | return "High"; | ||
| 65 | case Settings::GPUAccuracy::Extreme: | ||
| 66 | return "Extreme"; | ||
| 67 | } | ||
| 68 | return "Unknown"; | ||
| 69 | } | ||
| 70 | |||
| 59 | u64 GetTelemetryId() { | 71 | u64 GetTelemetryId() { |
| 60 | u64 telemetry_id{}; | 72 | u64 telemetry_id{}; |
| 61 | const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + | 73 | const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + |
| @@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 184 | AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 196 | AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| 185 | AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); | 197 | AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); |
| 186 | AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | 198 | AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); |
| 187 | AddField(field_type, "Renderer_UseAccurateGpuEmulation", | 199 | AddField(field_type, "Renderer_GPUAccuracyLevel", |
| 188 | Settings::values.use_accurate_gpu_emulation); | 200 | TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy)); |
| 189 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", | 201 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", |
| 190 | Settings::values.use_asynchronous_gpu_emulation); | 202 | Settings::values.use_asynchronous_gpu_emulation); |
| 191 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); | 203 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); |
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 1e3940801..ff2d11cc8 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp | |||
| @@ -14,13 +14,14 @@ | |||
| 14 | #include "core/core.h" | 14 | #include "core/core.h" |
| 15 | #include "core/core_timing.h" | 15 | #include "core/core_timing.h" |
| 16 | 16 | ||
| 17 | namespace { | ||
| 17 | // Numbers are chosen randomly to make sure the correct one is given. | 18 | // Numbers are chosen randomly to make sure the correct one is given. |
| 18 | static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; | 19 | constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; |
| 19 | static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals | 20 | constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals |
| 20 | 21 | ||
| 21 | static std::bitset<CB_IDS.size()> callbacks_ran_flags; | 22 | std::bitset<CB_IDS.size()> callbacks_ran_flags; |
| 22 | static u64 expected_callback = 0; | 23 | u64 expected_callback = 0; |
| 23 | static s64 lateness = 0; | 24 | s64 lateness = 0; |
| 24 | 25 | ||
| 25 | template <unsigned int IDX> | 26 | template <unsigned int IDX> |
| 26 | void CallbackTemplate(u64 userdata, s64 cycles_late) { | 27 | void CallbackTemplate(u64 userdata, s64 cycles_late) { |
| @@ -31,7 +32,7 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) { | |||
| 31 | REQUIRE(lateness == cycles_late); | 32 | REQUIRE(lateness == cycles_late); |
| 32 | } | 33 | } |
| 33 | 34 | ||
| 34 | static u64 callbacks_done = 0; | 35 | u64 callbacks_done = 0; |
| 35 | 36 | ||
| 36 | void EmptyCallback(u64 userdata, s64 cycles_late) { | 37 | void EmptyCallback(u64 userdata, s64 cycles_late) { |
| 37 | ++callbacks_done; | 38 | ++callbacks_done; |
| @@ -48,8 +49,8 @@ struct ScopeInit final { | |||
| 48 | Core::Timing::CoreTiming core_timing; | 49 | Core::Timing::CoreTiming core_timing; |
| 49 | }; | 50 | }; |
| 50 | 51 | ||
| 51 | static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, | 52 | void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, |
| 52 | int expected_lateness = 0, int cpu_downcount = 0) { | 53 | int expected_lateness = 0, int cpu_downcount = 0) { |
| 53 | callbacks_ran_flags = 0; | 54 | callbacks_ran_flags = 0; |
| 54 | expected_callback = CB_IDS[idx]; | 55 | expected_callback = CB_IDS[idx]; |
| 55 | lateness = expected_lateness; | 56 | lateness = expected_lateness; |
| @@ -62,6 +63,7 @@ static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 | |||
| 62 | 63 | ||
| 63 | REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); | 64 | REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); |
| 64 | } | 65 | } |
| 66 | } // Anonymous namespace | ||
| 65 | 67 | ||
| 66 | TEST_CASE("CoreTiming[BasicOrder]", "[core]") { | 68 | TEST_CASE("CoreTiming[BasicOrder]", "[core]") { |
| 67 | ScopeInit guard; | 69 | ScopeInit guard; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 258d58eba..8ede4ba9b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -23,6 +23,7 @@ add_library(video_core STATIC | |||
| 23 | engines/shader_bytecode.h | 23 | engines/shader_bytecode.h |
| 24 | engines/shader_header.h | 24 | engines/shader_header.h |
| 25 | engines/shader_type.h | 25 | engines/shader_type.h |
| 26 | fence_manager.h | ||
| 26 | gpu.cpp | 27 | gpu.cpp |
| 27 | gpu.h | 28 | gpu.h |
| 28 | gpu_asynch.cpp | 29 | gpu_asynch.cpp |
| @@ -51,6 +52,8 @@ add_library(video_core STATIC | |||
| 51 | renderer_opengl/gl_buffer_cache.h | 52 | renderer_opengl/gl_buffer_cache.h |
| 52 | renderer_opengl/gl_device.cpp | 53 | renderer_opengl/gl_device.cpp |
| 53 | renderer_opengl/gl_device.h | 54 | renderer_opengl/gl_device.h |
| 55 | renderer_opengl/gl_fence_manager.cpp | ||
| 56 | renderer_opengl/gl_fence_manager.h | ||
| 54 | renderer_opengl/gl_framebuffer_cache.cpp | 57 | renderer_opengl/gl_framebuffer_cache.cpp |
| 55 | renderer_opengl/gl_framebuffer_cache.h | 58 | renderer_opengl/gl_framebuffer_cache.h |
| 56 | renderer_opengl/gl_rasterizer.cpp | 59 | renderer_opengl/gl_rasterizer.cpp |
| @@ -160,6 +163,8 @@ if (ENABLE_VULKAN) | |||
| 160 | renderer_vulkan/fixed_pipeline_state.h | 163 | renderer_vulkan/fixed_pipeline_state.h |
| 161 | renderer_vulkan/maxwell_to_vk.cpp | 164 | renderer_vulkan/maxwell_to_vk.cpp |
| 162 | renderer_vulkan/maxwell_to_vk.h | 165 | renderer_vulkan/maxwell_to_vk.h |
| 166 | renderer_vulkan/nsight_aftermath_tracker.cpp | ||
| 167 | renderer_vulkan/nsight_aftermath_tracker.h | ||
| 163 | renderer_vulkan/renderer_vulkan.h | 168 | renderer_vulkan/renderer_vulkan.h |
| 164 | renderer_vulkan/renderer_vulkan.cpp | 169 | renderer_vulkan/renderer_vulkan.cpp |
| 165 | renderer_vulkan/vk_blit_screen.cpp | 170 | renderer_vulkan/vk_blit_screen.cpp |
| @@ -174,6 +179,8 @@ if (ENABLE_VULKAN) | |||
| 174 | renderer_vulkan/vk_descriptor_pool.h | 179 | renderer_vulkan/vk_descriptor_pool.h |
| 175 | renderer_vulkan/vk_device.cpp | 180 | renderer_vulkan/vk_device.cpp |
| 176 | renderer_vulkan/vk_device.h | 181 | renderer_vulkan/vk_device.h |
| 182 | renderer_vulkan/vk_fence_manager.cpp | ||
| 183 | renderer_vulkan/vk_fence_manager.h | ||
| 177 | renderer_vulkan/vk_graphics_pipeline.cpp | 184 | renderer_vulkan/vk_graphics_pipeline.cpp |
| 178 | renderer_vulkan/vk_graphics_pipeline.h | 185 | renderer_vulkan/vk_graphics_pipeline.h |
| 179 | renderer_vulkan/vk_image.cpp | 186 | renderer_vulkan/vk_image.cpp |
| @@ -213,19 +220,30 @@ if (ENABLE_VULKAN) | |||
| 213 | renderer_vulkan/wrapper.cpp | 220 | renderer_vulkan/wrapper.cpp |
| 214 | renderer_vulkan/wrapper.h | 221 | renderer_vulkan/wrapper.h |
| 215 | ) | 222 | ) |
| 216 | |||
| 217 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | ||
| 218 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) | ||
| 219 | endif() | 223 | endif() |
| 220 | 224 | ||
| 221 | create_target_directory_groups(video_core) | 225 | create_target_directory_groups(video_core) |
| 222 | 226 | ||
| 223 | target_link_libraries(video_core PUBLIC common core) | 227 | target_link_libraries(video_core PUBLIC common core) |
| 224 | target_link_libraries(video_core PRIVATE glad) | 228 | target_link_libraries(video_core PRIVATE glad) |
| 229 | |||
| 225 | if (ENABLE_VULKAN) | 230 | if (ENABLE_VULKAN) |
| 231 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | ||
| 232 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) | ||
| 226 | target_link_libraries(video_core PRIVATE sirit) | 233 | target_link_libraries(video_core PRIVATE sirit) |
| 227 | endif() | 234 | endif() |
| 228 | 235 | ||
| 236 | if (ENABLE_NSIGHT_AFTERMATH) | ||
| 237 | if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) | ||
| 238 | message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided") | ||
| 239 | endif() | ||
| 240 | if (NOT WIN32) | ||
| 241 | message(ERROR "Nsight Aftermath doesn't support non-Windows platforms") | ||
| 242 | endif() | ||
| 243 | target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH) | ||
| 244 | target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include") | ||
| 245 | endif() | ||
| 246 | |||
| 229 | if (MSVC) | 247 | if (MSVC) |
| 230 | target_compile_options(video_core PRIVATE /we4267) | 248 | target_compile_options(video_core PRIVATE /we4267) |
| 231 | else() | 249 | else() |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 83e7a1cde..510f11089 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <list> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 9 | #include <mutex> | 10 | #include <mutex> |
| 10 | #include <unordered_map> | 11 | #include <unordered_map> |
| @@ -18,8 +19,10 @@ | |||
| 18 | 19 | ||
| 19 | #include "common/alignment.h" | 20 | #include "common/alignment.h" |
| 20 | #include "common/common_types.h" | 21 | #include "common/common_types.h" |
| 22 | #include "common/logging/log.h" | ||
| 21 | #include "core/core.h" | 23 | #include "core/core.h" |
| 22 | #include "core/memory.h" | 24 | #include "core/memory.h" |
| 25 | #include "core/settings.h" | ||
| 23 | #include "video_core/buffer_cache/buffer_block.h" | 26 | #include "video_core/buffer_cache/buffer_block.h" |
| 24 | #include "video_core/buffer_cache/map_interval.h" | 27 | #include "video_core/buffer_cache/map_interval.h" |
| 25 | #include "video_core/memory_manager.h" | 28 | #include "video_core/memory_manager.h" |
| @@ -79,6 +82,9 @@ public: | |||
| 79 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); | 82 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 80 | if (is_written) { | 83 | if (is_written) { |
| 81 | map->MarkAsModified(true, GetModifiedTicks()); | 84 | map->MarkAsModified(true, GetModifiedTicks()); |
| 85 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | ||
| 86 | MarkForAsyncFlush(map); | ||
| 87 | } | ||
| 82 | if (!map->IsWritten()) { | 88 | if (!map->IsWritten()) { |
| 83 | map->MarkAsWritten(true); | 89 | map->MarkAsWritten(true); |
| 84 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 90 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
| @@ -137,11 +143,22 @@ public: | |||
| 137 | }); | 143 | }); |
| 138 | for (auto& object : objects) { | 144 | for (auto& object : objects) { |
| 139 | if (object->IsModified() && object->IsRegistered()) { | 145 | if (object->IsModified() && object->IsRegistered()) { |
| 146 | mutex.unlock(); | ||
| 140 | FlushMap(object); | 147 | FlushMap(object); |
| 148 | mutex.lock(); | ||
| 141 | } | 149 | } |
| 142 | } | 150 | } |
| 143 | } | 151 | } |
| 144 | 152 | ||
| 153 | bool MustFlushRegion(VAddr addr, std::size_t size) { | ||
| 154 | std::lock_guard lock{mutex}; | ||
| 155 | |||
| 156 | const std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 157 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { | ||
| 158 | return map->IsModified() && map->IsRegistered(); | ||
| 159 | }); | ||
| 160 | } | ||
| 161 | |||
| 145 | /// Mark the specified region as being invalidated | 162 | /// Mark the specified region as being invalidated |
| 146 | void InvalidateRegion(VAddr addr, u64 size) { | 163 | void InvalidateRegion(VAddr addr, u64 size) { |
| 147 | std::lock_guard lock{mutex}; | 164 | std::lock_guard lock{mutex}; |
| @@ -154,6 +171,77 @@ public: | |||
| 154 | } | 171 | } |
| 155 | } | 172 | } |
| 156 | 173 | ||
| 174 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 175 | std::lock_guard lock{mutex}; | ||
| 176 | |||
| 177 | for (const auto& object : GetMapsInRange(addr, size)) { | ||
| 178 | if (object->IsMemoryMarked() && object->IsRegistered()) { | ||
| 179 | UnmarkMemory(object); | ||
| 180 | object->SetSyncPending(true); | ||
| 181 | marked_for_unregister.emplace_back(object); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | void SyncGuestHost() { | ||
| 187 | std::lock_guard lock{mutex}; | ||
| 188 | |||
| 189 | for (const auto& object : marked_for_unregister) { | ||
| 190 | if (object->IsRegistered()) { | ||
| 191 | object->SetSyncPending(false); | ||
| 192 | Unregister(object); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | marked_for_unregister.clear(); | ||
| 196 | } | ||
| 197 | |||
| 198 | void CommitAsyncFlushes() { | ||
| 199 | if (uncommitted_flushes) { | ||
| 200 | auto commit_list = std::make_shared<std::list<MapInterval>>(); | ||
| 201 | for (auto& map : *uncommitted_flushes) { | ||
| 202 | if (map->IsRegistered() && map->IsModified()) { | ||
| 203 | // TODO(Blinkhawk): Implement backend asynchronous flushing | ||
| 204 | // AsyncFlushMap(map) | ||
| 205 | commit_list->push_back(map); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | if (!commit_list->empty()) { | ||
| 209 | committed_flushes.push_back(commit_list); | ||
| 210 | } else { | ||
| 211 | committed_flushes.emplace_back(); | ||
| 212 | } | ||
| 213 | } else { | ||
| 214 | committed_flushes.emplace_back(); | ||
| 215 | } | ||
| 216 | uncommitted_flushes.reset(); | ||
| 217 | } | ||
| 218 | |||
| 219 | bool ShouldWaitAsyncFlushes() const { | ||
| 220 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | ||
| 221 | } | ||
| 222 | |||
| 223 | bool HasUncommittedFlushes() const { | ||
| 224 | return uncommitted_flushes != nullptr; | ||
| 225 | } | ||
| 226 | |||
| 227 | void PopAsyncFlushes() { | ||
| 228 | if (committed_flushes.empty()) { | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | auto& flush_list = committed_flushes.front(); | ||
| 232 | if (!flush_list) { | ||
| 233 | committed_flushes.pop_front(); | ||
| 234 | return; | ||
| 235 | } | ||
| 236 | for (MapInterval& map : *flush_list) { | ||
| 237 | if (map->IsRegistered()) { | ||
| 238 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush | ||
| 239 | FlushMap(map); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | committed_flushes.pop_front(); | ||
| 243 | } | ||
| 244 | |||
| 157 | virtual BufferType GetEmptyBuffer(std::size_t size) = 0; | 245 | virtual BufferType GetEmptyBuffer(std::size_t size) = 0; |
| 158 | 246 | ||
| 159 | protected: | 247 | protected: |
| @@ -196,17 +284,30 @@ protected: | |||
| 196 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | 284 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
| 197 | mapped_addresses.insert({interval, new_map}); | 285 | mapped_addresses.insert({interval, new_map}); |
| 198 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 286 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 287 | new_map->SetMemoryMarked(true); | ||
| 199 | if (inherit_written) { | 288 | if (inherit_written) { |
| 200 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | 289 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
| 201 | new_map->MarkAsWritten(true); | 290 | new_map->MarkAsWritten(true); |
| 202 | } | 291 | } |
| 203 | } | 292 | } |
| 204 | 293 | ||
| 205 | /// Unregisters an object from the cache | 294 | void UnmarkMemory(const MapInterval& map) { |
| 206 | void Unregister(MapInterval& map) { | 295 | if (!map->IsMemoryMarked()) { |
| 296 | return; | ||
| 297 | } | ||
| 207 | const std::size_t size = map->GetEnd() - map->GetStart(); | 298 | const std::size_t size = map->GetEnd() - map->GetStart(); |
| 208 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); | 299 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); |
| 300 | map->SetMemoryMarked(false); | ||
| 301 | } | ||
| 302 | |||
| 303 | /// Unregisters an object from the cache | ||
| 304 | void Unregister(const MapInterval& map) { | ||
| 305 | UnmarkMemory(map); | ||
| 209 | map->MarkAsRegistered(false); | 306 | map->MarkAsRegistered(false); |
| 307 | if (map->IsSyncPending()) { | ||
| 308 | marked_for_unregister.remove(map); | ||
| 309 | map->SetSyncPending(false); | ||
| 310 | } | ||
| 210 | if (map->IsWritten()) { | 311 | if (map->IsWritten()) { |
| 211 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 312 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
| 212 | } | 313 | } |
| @@ -264,6 +365,9 @@ private: | |||
| 264 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | 365 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); |
| 265 | if (modified_inheritance) { | 366 | if (modified_inheritance) { |
| 266 | new_map->MarkAsModified(true, GetModifiedTicks()); | 367 | new_map->MarkAsModified(true, GetModifiedTicks()); |
| 368 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | ||
| 369 | MarkForAsyncFlush(new_map); | ||
| 370 | } | ||
| 267 | } | 371 | } |
| 268 | Register(new_map, write_inheritance); | 372 | Register(new_map, write_inheritance); |
| 269 | return new_map; | 373 | return new_map; |
| @@ -450,6 +554,13 @@ private: | |||
| 450 | return false; | 554 | return false; |
| 451 | } | 555 | } |
| 452 | 556 | ||
| 557 | void MarkForAsyncFlush(MapInterval& map) { | ||
| 558 | if (!uncommitted_flushes) { | ||
| 559 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); | ||
| 560 | } | ||
| 561 | uncommitted_flushes->insert(map); | ||
| 562 | } | ||
| 563 | |||
| 453 | VideoCore::RasterizerInterface& rasterizer; | 564 | VideoCore::RasterizerInterface& rasterizer; |
| 454 | Core::System& system; | 565 | Core::System& system; |
| 455 | 566 | ||
| @@ -479,6 +590,10 @@ private: | |||
| 479 | u64 modified_ticks = 0; | 590 | u64 modified_ticks = 0; |
| 480 | 591 | ||
| 481 | std::vector<u8> staging_buffer; | 592 | std::vector<u8> staging_buffer; |
| 593 | std::list<MapInterval> marked_for_unregister; | ||
| 594 | |||
| 595 | std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; | ||
| 596 | std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; | ||
| 482 | 597 | ||
| 483 | std::recursive_mutex mutex; | 598 | std::recursive_mutex mutex; |
| 484 | }; | 599 | }; |
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index b0956029d..29d8b26f3 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -46,6 +46,22 @@ public: | |||
| 46 | return is_registered; | 46 | return is_registered; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 50 | is_memory_marked = is_memory_marked_; | ||
| 51 | } | ||
| 52 | |||
| 53 | bool IsMemoryMarked() const { | ||
| 54 | return is_memory_marked; | ||
| 55 | } | ||
| 56 | |||
| 57 | void SetSyncPending(bool is_sync_pending_) { | ||
| 58 | is_sync_pending = is_sync_pending_; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool IsSyncPending() const { | ||
| 62 | return is_sync_pending; | ||
| 63 | } | ||
| 64 | |||
| 49 | VAddr GetStart() const { | 65 | VAddr GetStart() const { |
| 50 | return start; | 66 | return start; |
| 51 | } | 67 | } |
| @@ -83,6 +99,8 @@ private: | |||
| 83 | bool is_written{}; | 99 | bool is_written{}; |
| 84 | bool is_modified{}; | 100 | bool is_modified{}; |
| 85 | bool is_registered{}; | 101 | bool is_registered{}; |
| 102 | bool is_memory_marked{}; | ||
| 103 | bool is_sync_pending{}; | ||
| 86 | u64 ticks{}; | 104 | u64 ticks{}; |
| 87 | }; | 105 | }; |
| 88 | 106 | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 713c14182..324dafdcd 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | 14 | ||
| 15 | DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} | 15 | DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} |
| 16 | 16 | ||
| 17 | DmaPusher::~DmaPusher() = default; | 17 | DmaPusher::~DmaPusher() = default; |
| 18 | 18 | ||
| @@ -21,17 +21,20 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, | |||
| 21 | void DmaPusher::DispatchCalls() { | 21 | void DmaPusher::DispatchCalls() { |
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | gpu.SyncGuestHost(); | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 25 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().OnMemoryWrite(); | 26 | gpu.Maxwell3D().OnMemoryWrite(); |
| 26 | 27 | ||
| 27 | dma_pushbuffer_subindex = 0; | 28 | dma_pushbuffer_subindex = 0; |
| 28 | 29 | ||
| 29 | while (Core::System::GetInstance().IsPoweredOn()) { | 30 | while (system.IsPoweredOn()) { |
| 30 | if (!Step()) { | 31 | if (!Step()) { |
| 31 | break; | 32 | break; |
| 32 | } | 33 | } |
| 33 | } | 34 | } |
| 34 | gpu.FlushCommands(); | 35 | gpu.FlushCommands(); |
| 36 | gpu.SyncGuestHost(); | ||
| 37 | gpu.OnCommandListEnd(); | ||
| 35 | } | 38 | } |
| 36 | 39 | ||
| 37 | bool DmaPusher::Step() { | 40 | bool DmaPusher::Step() { |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 6ab06518f..d6188614a 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | 12 | ||
| 13 | namespace Core { | ||
| 14 | class System; | ||
| 15 | } | ||
| 16 | |||
| 13 | namespace Tegra { | 17 | namespace Tegra { |
| 14 | 18 | ||
| 15 | enum class SubmissionMode : u32 { | 19 | enum class SubmissionMode : u32 { |
| @@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; | |||
| 56 | */ | 60 | */ |
| 57 | class DmaPusher { | 61 | class DmaPusher { |
| 58 | public: | 62 | public: |
| 59 | explicit DmaPusher(GPU& gpu); | 63 | explicit DmaPusher(Core::System& system, GPU& gpu); |
| 60 | ~DmaPusher(); | 64 | ~DmaPusher(); |
| 61 | 65 | ||
| 62 | void Push(CommandList&& entries) { | 66 | void Push(CommandList&& entries) { |
| @@ -72,8 +76,6 @@ private: | |||
| 72 | 76 | ||
| 73 | void CallMethod(u32 argument) const; | 77 | void CallMethod(u32 argument) const; |
| 74 | 78 | ||
| 75 | GPU& gpu; | ||
| 76 | |||
| 77 | std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once | 79 | std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once |
| 78 | 80 | ||
| 79 | std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed | 81 | std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed |
| @@ -92,6 +94,9 @@ private: | |||
| 92 | 94 | ||
| 93 | GPUVAddr dma_mget{}; ///< main pushbuffer last read address | 95 | GPUVAddr dma_mget{}; ///< main pushbuffer last read address |
| 94 | bool ib_enable{true}; ///< IB mode enabled | 96 | bool ib_enable{true}; ///< IB mode enabled |
| 97 | |||
| 98 | GPU& gpu; | ||
| 99 | Core::System& system; | ||
| 95 | }; | 100 | }; |
| 96 | 101 | ||
| 97 | } // namespace Tegra | 102 | } // namespace Tegra |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 85d308e26..bace6affb 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -28,7 +28,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 28 | } | 28 | } |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { | 31 | static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { |
| 32 | const u32 line_a = src_2 - src_1; | 32 | const u32 line_a = src_2 - src_1; |
| 33 | const u32 line_b = dst_2 - dst_1; | 33 | const u32 line_b = dst_2 - dst_1; |
| 34 | const u32 excess = std::max<s32>(0, line_a - src_line + src_1); | 34 | const u32 excess = std::max<s32>(0, line_a - src_line + src_1); |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ba63b44b4..2824ed707 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 92 | color_mask.A.Assign(1); | 92 | color_mask.A.Assign(1); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | for (auto& format : regs.vertex_attrib_format) { | ||
| 96 | format.constant.Assign(1); | ||
| 97 | } | ||
| 98 | |||
| 95 | // NVN games expect these values to be enabled at boot | 99 | // NVN games expect these values to be enabled at boot |
| 96 | regs.rasterize_enable = 1; | 100 | regs.rasterize_enable = 1; |
| 97 | regs.rt_separate_frag_data = 1; | 101 | regs.rt_separate_frag_data = 1; |
| @@ -400,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 400 | 404 | ||
| 401 | switch (regs.query.query_get.operation) { | 405 | switch (regs.query.query_get.operation) { |
| 402 | case Regs::QueryOperation::Release: | 406 | case Regs::QueryOperation::Release: |
| 403 | StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | 407 | if (regs.query.query_get.fence == 1) { |
| 408 | rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); | ||
| 409 | } else { | ||
| 410 | StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | ||
| 411 | } | ||
| 404 | break; | 412 | break; |
| 405 | case Regs::QueryOperation::Acquire: | 413 | case Regs::QueryOperation::Acquire: |
| 406 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that | 414 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that |
| @@ -479,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() { | |||
| 479 | const u32 increment = regs.sync_info.increment.Value(); | 487 | const u32 increment = regs.sync_info.increment.Value(); |
| 480 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); | 488 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); |
| 481 | if (increment) { | 489 | if (increment) { |
| 482 | system.GPU().IncrementSyncPoint(sync_point); | 490 | rasterizer.SignalSyncPoint(sync_point); |
| 483 | } | 491 | } |
| 484 | } | 492 | } |
| 485 | 493 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5cf6a4cc3..59d5752d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1149,7 +1149,7 @@ public: | |||
| 1149 | 1149 | ||
| 1150 | /// Returns whether the vertex array specified by index is supposed to be | 1150 | /// Returns whether the vertex array specified by index is supposed to be |
| 1151 | /// accessed per instance or not. | 1151 | /// accessed per instance or not. |
| 1152 | bool IsInstancingEnabled(u32 index) const { | 1152 | bool IsInstancingEnabled(std::size_t index) const { |
| 1153 | return is_instanced[index]; | 1153 | return is_instanced[index]; |
| 1154 | } | 1154 | } |
| 1155 | } instanced_arrays; | 1155 | } instanced_arrays; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c2610f992..3bfed6ab8 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() { | |||
| 104 | write_buffer.resize(dst_size); | 104 | write_buffer.resize(dst_size); |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 107 | if (Settings::IsGPULevelExtreme()) { |
| 108 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 108 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 109 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 110 | } else { | ||
| 111 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | ||
| 112 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 113 | } | ||
| 109 | 114 | ||
| 110 | Texture::UnswizzleSubrect( | 115 | Texture::UnswizzleSubrect( |
| 111 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, | 116 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, |
| @@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 136 | write_buffer.resize(dst_size); | 141 | write_buffer.resize(dst_size); |
| 137 | } | 142 | } |
| 138 | 143 | ||
| 139 | if (Settings::values.use_accurate_gpu_emulation) { | 144 | if (Settings::IsGPULevelExtreme()) { |
| 140 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 145 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 141 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 146 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 142 | } else { | 147 | } else { |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h new file mode 100644 index 000000000..dabd1588c --- /dev/null +++ b/src/video_core/fence_manager.h | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <memory> | ||
| 10 | #include <queue> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "core/core.h" | ||
| 15 | #include "core/memory.h" | ||
| 16 | #include "core/settings.h" | ||
| 17 | #include "video_core/gpu.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_interface.h" | ||
| 20 | |||
| 21 | namespace VideoCommon { | ||
| 22 | |||
| 23 | class FenceBase { | ||
| 24 | public: | ||
| 25 | FenceBase(u32 payload, bool is_stubbed) | ||
| 26 | : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {} | ||
| 27 | |||
| 28 | FenceBase(GPUVAddr address, u32 payload, bool is_stubbed) | ||
| 29 | : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {} | ||
| 30 | |||
| 31 | GPUVAddr GetAddress() const { | ||
| 32 | return address; | ||
| 33 | } | ||
| 34 | |||
| 35 | u32 GetPayload() const { | ||
| 36 | return payload; | ||
| 37 | } | ||
| 38 | |||
| 39 | bool IsSemaphore() const { | ||
| 40 | return is_semaphore; | ||
| 41 | } | ||
| 42 | |||
| 43 | private: | ||
| 44 | GPUVAddr address; | ||
| 45 | u32 payload; | ||
| 46 | bool is_semaphore; | ||
| 47 | |||
| 48 | protected: | ||
| 49 | bool is_stubbed; | ||
| 50 | }; | ||
| 51 | |||
| 52 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | ||
| 53 | class FenceManager { | ||
| 54 | public: | ||
| 55 | void SignalSemaphore(GPUVAddr addr, u32 value) { | ||
| 56 | TryReleasePendingFences(); | ||
| 57 | const bool should_flush = ShouldFlush(); | ||
| 58 | CommitAsyncFlushes(); | ||
| 59 | TFence new_fence = CreateFence(addr, value, !should_flush); | ||
| 60 | fences.push(new_fence); | ||
| 61 | QueueFence(new_fence); | ||
| 62 | if (should_flush) { | ||
| 63 | rasterizer.FlushCommands(); | ||
| 64 | } | ||
| 65 | rasterizer.SyncGuestHost(); | ||
| 66 | } | ||
| 67 | |||
| 68 | void SignalSyncPoint(u32 value) { | ||
| 69 | TryReleasePendingFences(); | ||
| 70 | const bool should_flush = ShouldFlush(); | ||
| 71 | CommitAsyncFlushes(); | ||
| 72 | TFence new_fence = CreateFence(value, !should_flush); | ||
| 73 | fences.push(new_fence); | ||
| 74 | QueueFence(new_fence); | ||
| 75 | if (should_flush) { | ||
| 76 | rasterizer.FlushCommands(); | ||
| 77 | } | ||
| 78 | rasterizer.SyncGuestHost(); | ||
| 79 | } | ||
| 80 | |||
| 81 | void WaitPendingFences() { | ||
| 82 | auto& gpu{system.GPU()}; | ||
| 83 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 84 | while (!fences.empty()) { | ||
| 85 | TFence& current_fence = fences.front(); | ||
| 86 | if (ShouldWait()) { | ||
| 87 | WaitFence(current_fence); | ||
| 88 | } | ||
| 89 | PopAsyncFlushes(); | ||
| 90 | if (current_fence->IsSemaphore()) { | ||
| 91 | memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload()); | ||
| 92 | } else { | ||
| 93 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||
| 94 | } | ||
| 95 | fences.pop(); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | protected: | ||
| 100 | FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 101 | TTextureCache& texture_cache, TTBufferCache& buffer_cache, | ||
| 102 | TQueryCache& query_cache) | ||
| 103 | : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, | ||
| 104 | buffer_cache{buffer_cache}, query_cache{query_cache} {} | ||
| 105 | |||
| 106 | virtual ~FenceManager() {} | ||
| 107 | |||
| 108 | /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is | ||
| 109 | /// true | ||
| 110 | virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; | ||
| 111 | /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true | ||
| 112 | virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0; | ||
| 113 | /// Queues a fence into the backend if the fence isn't stubbed. | ||
| 114 | virtual void QueueFence(TFence& fence) = 0; | ||
| 115 | /// Notifies that the backend fence has been signaled/reached in host GPU. | ||
| 116 | virtual bool IsFenceSignaled(TFence& fence) const = 0; | ||
| 117 | /// Waits until a fence has been signalled by the host GPU. | ||
| 118 | virtual void WaitFence(TFence& fence) = 0; | ||
| 119 | |||
| 120 | Core::System& system; | ||
| 121 | VideoCore::RasterizerInterface& rasterizer; | ||
| 122 | TTextureCache& texture_cache; | ||
| 123 | TTBufferCache& buffer_cache; | ||
| 124 | TQueryCache& query_cache; | ||
| 125 | |||
| 126 | private: | ||
| 127 | void TryReleasePendingFences() { | ||
| 128 | auto& gpu{system.GPU()}; | ||
| 129 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 130 | while (!fences.empty()) { | ||
| 131 | TFence& current_fence = fences.front(); | ||
| 132 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | ||
| 133 | return; | ||
| 134 | } | ||
| 135 | PopAsyncFlushes(); | ||
| 136 | if (current_fence->IsSemaphore()) { | ||
| 137 | memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload()); | ||
| 138 | } else { | ||
| 139 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||
| 140 | } | ||
| 141 | fences.pop(); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | bool ShouldWait() const { | ||
| 146 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | ||
| 147 | query_cache.ShouldWaitAsyncFlushes(); | ||
| 148 | } | ||
| 149 | |||
| 150 | bool ShouldFlush() const { | ||
| 151 | return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || | ||
| 152 | query_cache.HasUncommittedFlushes(); | ||
| 153 | } | ||
| 154 | |||
| 155 | void PopAsyncFlushes() { | ||
| 156 | texture_cache.PopAsyncFlushes(); | ||
| 157 | buffer_cache.PopAsyncFlushes(); | ||
| 158 | query_cache.PopAsyncFlushes(); | ||
| 159 | } | ||
| 160 | |||
| 161 | void CommitAsyncFlushes() { | ||
| 162 | texture_cache.CommitAsyncFlushes(); | ||
| 163 | buffer_cache.CommitAsyncFlushes(); | ||
| 164 | query_cache.CommitAsyncFlushes(); | ||
| 165 | } | ||
| 166 | |||
| 167 | std::queue<TFence> fences; | ||
| 168 | }; | ||
| 169 | |||
| 170 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8acf2eda2..3b7572d61 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render | |||
| 27 | : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { | 27 | : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { |
| 28 | auto& rasterizer{renderer->Rasterizer()}; | 28 | auto& rasterizer{renderer->Rasterizer()}; |
| 29 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); | 29 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 30 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 30 | dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); |
| 31 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 31 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 32 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); | 32 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); |
| 33 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); | 33 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| @@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | |||
| 125 | return true; | 125 | return true; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | ||
| 129 | std::unique_lock lck{flush_request_mutex}; | ||
| 130 | const u64 fence = ++last_flush_fence; | ||
| 131 | flush_requests.emplace_back(fence, addr, size); | ||
| 132 | return fence; | ||
| 133 | } | ||
| 134 | |||
| 135 | void GPU::TickWork() { | ||
| 136 | std::unique_lock lck{flush_request_mutex}; | ||
| 137 | while (!flush_requests.empty()) { | ||
| 138 | auto& request = flush_requests.front(); | ||
| 139 | const u64 fence = request.fence; | ||
| 140 | const VAddr addr = request.addr; | ||
| 141 | const std::size_t size = request.size; | ||
| 142 | flush_requests.pop_front(); | ||
| 143 | flush_request_mutex.unlock(); | ||
| 144 | renderer->Rasterizer().FlushRegion(addr, size); | ||
| 145 | current_flush_fence.store(fence); | ||
| 146 | flush_request_mutex.lock(); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 128 | u64 GPU::GetTicks() const { | 150 | u64 GPU::GetTicks() const { |
| 129 | // This values were reversed engineered by fincs from NVN | 151 | // This values were reversed engineered by fincs from NVN |
| 130 | // The gpu clock is reported in units of 385/625 nanoseconds | 152 | // The gpu clock is reported in units of 385/625 nanoseconds |
| @@ -142,6 +164,13 @@ void GPU::FlushCommands() { | |||
| 142 | renderer->Rasterizer().FlushCommands(); | 164 | renderer->Rasterizer().FlushCommands(); |
| 143 | } | 165 | } |
| 144 | 166 | ||
| 167 | void GPU::SyncGuestHost() { | ||
| 168 | renderer->Rasterizer().SyncGuestHost(); | ||
| 169 | } | ||
| 170 | |||
| 171 | void GPU::OnCommandListEnd() { | ||
| 172 | renderer->Rasterizer().ReleaseFences(); | ||
| 173 | } | ||
| 145 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | 174 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence |
| 146 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | 175 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. |
| 147 | // So the values you see in docs might be multiplied by 4. | 176 | // So the values you see in docs might be multiplied by 4. |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 1a2d747be..5e3eb94e9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -155,7 +155,23 @@ public: | |||
| 155 | /// Calls a GPU method. | 155 | /// Calls a GPU method. |
| 156 | void CallMethod(const MethodCall& method_call); | 156 | void CallMethod(const MethodCall& method_call); |
| 157 | 157 | ||
| 158 | /// Flush all current written commands into the host GPU for execution. | ||
| 158 | void FlushCommands(); | 159 | void FlushCommands(); |
| 160 | /// Synchronizes CPU writes with Host GPU memory. | ||
| 161 | void SyncGuestHost(); | ||
| 162 | /// Signal the ending of command list. | ||
| 163 | virtual void OnCommandListEnd(); | ||
| 164 | |||
| 165 | /// Request a host GPU memory flush from the CPU. | ||
| 166 | u64 RequestFlush(VAddr addr, std::size_t size); | ||
| 167 | |||
| 168 | /// Obtains current flush request fence id. | ||
| 169 | u64 CurrentFlushRequestFence() const { | ||
| 170 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 171 | } | ||
| 172 | |||
| 173 | /// Tick pending requests within the GPU. | ||
| 174 | void TickWork(); | ||
| 159 | 175 | ||
| 160 | /// Returns a reference to the Maxwell3D GPU engine. | 176 | /// Returns a reference to the Maxwell3D GPU engine. |
| 161 | Engines::Maxwell3D& Maxwell3D(); | 177 | Engines::Maxwell3D& Maxwell3D(); |
| @@ -325,6 +341,19 @@ private: | |||
| 325 | 341 | ||
| 326 | std::condition_variable sync_cv; | 342 | std::condition_variable sync_cv; |
| 327 | 343 | ||
| 344 | struct FlushRequest { | ||
| 345 | FlushRequest(u64 fence, VAddr addr, std::size_t size) | ||
| 346 | : fence{fence}, addr{addr}, size{size} {} | ||
| 347 | u64 fence; | ||
| 348 | VAddr addr; | ||
| 349 | std::size_t size; | ||
| 350 | }; | ||
| 351 | |||
| 352 | std::list<FlushRequest> flush_requests; | ||
| 353 | std::atomic<u64> current_flush_fence{}; | ||
| 354 | u64 last_flush_fence{}; | ||
| 355 | std::mutex flush_request_mutex; | ||
| 356 | |||
| 328 | const bool is_async; | 357 | const bool is_async; |
| 329 | }; | 358 | }; |
| 330 | 359 | ||
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 20e73a37e..53305ab43 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const { | |||
| 52 | gpu_thread.WaitIdle(); | 52 | gpu_thread.WaitIdle(); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | void GPUAsynch::OnCommandListEnd() { | ||
| 56 | gpu_thread.OnCommandListEnd(); | ||
| 57 | } | ||
| 58 | |||
| 55 | } // namespace VideoCommon | 59 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 03fd0eef0..517658612 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -32,6 +32,8 @@ public: | |||
| 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 33 | void WaitIdle() const override; | 33 | void WaitIdle() const override; |
| 34 | 34 | ||
| 35 | void OnCommandListEnd() override; | ||
| 36 | |||
| 35 | protected: | 37 | protected: |
| 36 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | 38 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |
| 37 | 39 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 10cda686b..c3bb4fe06 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/frontend/emu_window.h" | 8 | #include "core/frontend/emu_window.h" |
| 9 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | 10 | #include "video_core/dma_pusher.h" |
| 10 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 11 | #include "video_core/gpu_thread.h" | 12 | #include "video_core/gpu_thread.h" |
| @@ -14,8 +15,9 @@ | |||
| 14 | namespace VideoCommon::GPUThread { | 15 | namespace VideoCommon::GPUThread { |
| 15 | 16 | ||
| 16 | /// Runs the GPU thread | 17 | /// Runs the GPU thread |
| 17 | static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | 18 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, |
| 18 | Tegra::DmaPusher& dma_pusher, SynchState& state) { | 19 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, |
| 20 | SynchState& state) { | ||
| 19 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 20 | 22 | ||
| 21 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| @@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic | |||
| 37 | dma_pusher.DispatchCalls(); | 39 | dma_pusher.DispatchCalls(); |
| 38 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | 40 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 39 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 41 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 42 | } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { | ||
| 43 | renderer.Rasterizer().ReleaseFences(); | ||
| 44 | } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) { | ||
| 45 | system.GPU().TickWork(); | ||
| 40 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 46 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |
| 41 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 47 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 42 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 48 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 43 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | 49 | renderer.Rasterizer().OnCPUWrite(data->addr, data->size); |
| 44 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | 50 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |
| 45 | return; | 51 | return; |
| 46 | } else { | 52 | } else { |
| @@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() { | |||
| 65 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 71 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| 66 | Core::Frontend::GraphicsContext& context, | 72 | Core::Frontend::GraphicsContext& context, |
| 67 | Tegra::DmaPusher& dma_pusher) { | 73 | Tegra::DmaPusher& dma_pusher) { |
| 68 | thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), | 74 | thread = std::thread{RunThread, std::ref(system), std::ref(renderer), |
| 69 | std::ref(state)}; | 75 | std::ref(context), std::ref(dma_pusher), std::ref(state)}; |
| 70 | } | 76 | } |
| 71 | 77 | ||
| 72 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 78 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| @@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 78 | } | 84 | } |
| 79 | 85 | ||
| 80 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 86 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
| 81 | PushCommand(FlushRegionCommand(addr, size)); | 87 | if (!Settings::IsGPULevelHigh()) { |
| 88 | PushCommand(FlushRegionCommand(addr, size)); | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | if (!Settings::IsGPULevelExtreme()) { | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { | ||
| 95 | auto& gpu = system.GPU(); | ||
| 96 | u64 fence = gpu.RequestFlush(addr, size); | ||
| 97 | PushCommand(GPUTickCommand()); | ||
| 98 | while (fence > gpu.CurrentFlushRequestFence()) { | ||
| 99 | } | ||
| 100 | } | ||
| 82 | } | 101 | } |
| 83 | 102 | ||
| 84 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 103 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 85 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | 104 | system.Renderer().Rasterizer().OnCPUWrite(addr, size); |
| 86 | } | 105 | } |
| 87 | 106 | ||
| 88 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 107 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 108 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 90 | InvalidateRegion(addr, size); | 109 | system.Renderer().Rasterizer().OnCPUWrite(addr, size); |
| 91 | } | 110 | } |
| 92 | 111 | ||
| 93 | void ThreadManager::WaitIdle() const { | 112 | void ThreadManager::WaitIdle() const { |
| @@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const { | |||
| 95 | } | 114 | } |
| 96 | } | 115 | } |
| 97 | 116 | ||
| 117 | void ThreadManager::OnCommandListEnd() { | ||
| 118 | PushCommand(OnCommandListEndCommand()); | ||
| 119 | } | ||
| 120 | |||
| 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 121 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 99 | const u64 fence{++state.last_fence}; | 122 | const u64 fence{++state.last_fence}; |
| 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 123 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cd74ad330..5a28335d6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final { | |||
| 70 | u64 size; | 70 | u64 size; |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| 73 | /// Command called within the gpu, to schedule actions after a command list end | ||
| 74 | struct OnCommandListEndCommand final {}; | ||
| 75 | |||
| 76 | /// Command to make the gpu look into pending requests | ||
| 77 | struct GPUTickCommand final {}; | ||
| 78 | |||
| 73 | using CommandData = | 79 | using CommandData = |
| 74 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | 80 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
| 75 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | 81 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, |
| 82 | GPUTickCommand>; | ||
| 76 | 83 | ||
| 77 | struct CommandDataContainer { | 84 | struct CommandDataContainer { |
| 78 | CommandDataContainer() = default; | 85 | CommandDataContainer() = default; |
| @@ -122,6 +129,8 @@ public: | |||
| 122 | // Wait until the gpu thread is idle. | 129 | // Wait until the gpu thread is idle. |
| 123 | void WaitIdle() const; | 130 | void WaitIdle() const; |
| 124 | 131 | ||
| 132 | void OnCommandListEnd(); | ||
| 133 | |||
| 125 | private: | 134 | private: |
| 126 | /// Pushes a command to be executed by the GPU thread | 135 | /// Pushes a command to be executed by the GPU thread |
| 127 | u64 PushCommand(CommandData&& command_data); | 136 | u64 PushCommand(CommandData&& command_data); |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 5ea2b01f2..2f75f8801 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -12,10 +12,12 @@ | |||
| 12 | #include <mutex> | 12 | #include <mutex> |
| 13 | #include <optional> | 13 | #include <optional> |
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <unordered_set> | ||
| 15 | #include <vector> | 16 | #include <vector> |
| 16 | 17 | ||
| 17 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 18 | #include "core/core.h" | 19 | #include "core/core.h" |
| 20 | #include "core/settings.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 21 | #include "video_core/memory_manager.h" | 23 | #include "video_core/memory_manager.h" |
| @@ -130,6 +132,9 @@ public: | |||
| 130 | } | 132 | } |
| 131 | 133 | ||
| 132 | query->BindCounter(Stream(type).Current(), timestamp); | 134 | query->BindCounter(Stream(type).Current(), timestamp); |
| 135 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 136 | AsyncFlushQuery(cpu_addr); | ||
| 137 | } | ||
| 133 | } | 138 | } |
| 134 | 139 | ||
| 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 140 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| @@ -170,6 +175,37 @@ public: | |||
| 170 | return streams[static_cast<std::size_t>(type)]; | 175 | return streams[static_cast<std::size_t>(type)]; |
| 171 | } | 176 | } |
| 172 | 177 | ||
| 178 | void CommitAsyncFlushes() { | ||
| 179 | committed_flushes.push_back(uncommitted_flushes); | ||
| 180 | uncommitted_flushes.reset(); | ||
| 181 | } | ||
| 182 | |||
| 183 | bool HasUncommittedFlushes() const { | ||
| 184 | return uncommitted_flushes != nullptr; | ||
| 185 | } | ||
| 186 | |||
| 187 | bool ShouldWaitAsyncFlushes() const { | ||
| 188 | if (committed_flushes.empty()) { | ||
| 189 | return false; | ||
| 190 | } | ||
| 191 | return committed_flushes.front() != nullptr; | ||
| 192 | } | ||
| 193 | |||
| 194 | void PopAsyncFlushes() { | ||
| 195 | if (committed_flushes.empty()) { | ||
| 196 | return; | ||
| 197 | } | ||
| 198 | auto& flush_list = committed_flushes.front(); | ||
| 199 | if (!flush_list) { | ||
| 200 | committed_flushes.pop_front(); | ||
| 201 | return; | ||
| 202 | } | ||
| 203 | for (VAddr query_address : *flush_list) { | ||
| 204 | FlushAndRemoveRegion(query_address, 4); | ||
| 205 | } | ||
| 206 | committed_flushes.pop_front(); | ||
| 207 | } | ||
| 208 | |||
| 173 | protected: | 209 | protected: |
| 174 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | 210 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; |
| 175 | 211 | ||
| @@ -224,6 +260,13 @@ private: | |||
| 224 | return found != std::end(contents) ? &*found : nullptr; | 260 | return found != std::end(contents) ? &*found : nullptr; |
| 225 | } | 261 | } |
| 226 | 262 | ||
| 263 | void AsyncFlushQuery(VAddr addr) { | ||
| 264 | if (!uncommitted_flushes) { | ||
| 265 | uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>(); | ||
| 266 | } | ||
| 267 | uncommitted_flushes->insert(addr); | ||
| 268 | } | ||
| 269 | |||
| 227 | static constexpr std::uintptr_t PAGE_SIZE = 4096; | 270 | static constexpr std::uintptr_t PAGE_SIZE = 4096; |
| 228 | static constexpr unsigned PAGE_SHIFT = 12; | 271 | static constexpr unsigned PAGE_SHIFT = 12; |
| 229 | 272 | ||
| @@ -235,6 +278,9 @@ private: | |||
| 235 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | 278 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; |
| 236 | 279 | ||
| 237 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 280 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 281 | |||
| 282 | std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{}; | ||
| 283 | std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes; | ||
| 238 | }; | 284 | }; |
| 239 | 285 | ||
| 240 | template <class QueryCache, class HostCounter> | 286 | template <class QueryCache, class HostCounter> |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ae5b9c4e..603f61952 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -49,15 +49,33 @@ public: | |||
| 49 | /// Records a GPU query and caches it | 49 | /// Records a GPU query and caches it |
| 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |
| 51 | 51 | ||
| 52 | /// Signal a GPU based semaphore as a fence | ||
| 53 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | ||
| 54 | |||
| 55 | /// Signal a GPU based syncpoint as a fence | ||
| 56 | virtual void SignalSyncPoint(u32 value) = 0; | ||
| 57 | |||
| 58 | /// Release all pending fences. | ||
| 59 | virtual void ReleaseFences() = 0; | ||
| 60 | |||
| 52 | /// Notify rasterizer that all caches should be flushed to Switch memory | 61 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 53 | virtual void FlushAll() = 0; | 62 | virtual void FlushAll() = 0; |
| 54 | 63 | ||
| 55 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 64 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 56 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 65 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 57 | 66 | ||
| 67 | /// Check if the the specified memory area requires flushing to CPU Memory. | ||
| 68 | virtual bool MustFlushRegion(VAddr addr, u64 size) = 0; | ||
| 69 | |||
| 58 | /// Notify rasterizer that any caches of the specified region should be invalidated | 70 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 59 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 71 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 60 | 72 | ||
| 73 | /// Notify rasterizer that any caches of the specified region are desync with guest | ||
| 74 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | ||
| 75 | |||
| 76 | /// Sync memory between guest and host. | ||
| 77 | virtual void SyncGuestHost() = 0; | ||
| 78 | |||
| 61 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 79 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 62 | /// and invalidated | 80 | /// and invalidated |
| 63 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 81 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index cb5792407..4efce0de7 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | |||
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | void OGLBufferCache::WriteBarrier() { | 54 | void OGLBufferCache::WriteBarrier() { |
| 55 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | 55 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { | 58 | GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { |
| @@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s | |||
| 72 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 72 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 73 | u8* data) { | 73 | u8* data) { |
| 74 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | 74 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); |
| 75 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 75 | glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | 76 | glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), |
| 76 | static_cast<GLsizeiptr>(size), data); | 77 | static_cast<GLsizeiptr>(size), data); |
| 77 | } | 78 | } |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp new file mode 100644 index 000000000..99ddcb3f8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | |||
| 7 | #include "video_core/renderer_opengl/gl_fence_manager.h" | ||
| 8 | |||
| 9 | namespace OpenGL { | ||
| 10 | |||
| 11 | GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) | ||
| 12 | : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {} | ||
| 13 | |||
| 14 | GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) | ||
| 15 | : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {} | ||
| 16 | |||
| 17 | GLInnerFence::~GLInnerFence() = default; | ||
| 18 | |||
| 19 | void GLInnerFence::Queue() { | ||
| 20 | if (is_stubbed) { | ||
| 21 | return; | ||
| 22 | } | ||
| 23 | ASSERT(sync_object.handle == 0); | ||
| 24 | sync_object.Create(); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool GLInnerFence::IsSignaled() const { | ||
| 28 | if (is_stubbed) { | ||
| 29 | return true; | ||
| 30 | } | ||
| 31 | ASSERT(sync_object.handle != 0); | ||
| 32 | GLsizei length; | ||
| 33 | GLint sync_status; | ||
| 34 | glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status); | ||
| 35 | return sync_status == GL_SIGNALED; | ||
| 36 | } | ||
| 37 | |||
| 38 | void GLInnerFence::Wait() { | ||
| 39 | if (is_stubbed) { | ||
| 40 | return; | ||
| 41 | } | ||
| 42 | ASSERT(sync_object.handle != 0); | ||
| 43 | glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); | ||
| 44 | } | ||
| 45 | |||
| 46 | FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, | ||
| 47 | VideoCore::RasterizerInterface& rasterizer, | ||
| 48 | TextureCacheOpenGL& texture_cache, | ||
| 49 | OGLBufferCache& buffer_cache, QueryCache& query_cache) | ||
| 50 | : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {} | ||
| 51 | |||
| 52 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | ||
| 53 | return std::make_shared<GLInnerFence>(value, is_stubbed); | ||
| 54 | } | ||
| 55 | |||
| 56 | Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | ||
| 57 | return std::make_shared<GLInnerFence>(addr, value, is_stubbed); | ||
| 58 | } | ||
| 59 | |||
| 60 | void FenceManagerOpenGL::QueueFence(Fence& fence) { | ||
| 61 | fence->Queue(); | ||
| 62 | } | ||
| 63 | |||
| 64 | bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const { | ||
| 65 | return fence->IsSignaled(); | ||
| 66 | } | ||
| 67 | |||
| 68 | void FenceManagerOpenGL::WaitFence(Fence& fence) { | ||
| 69 | fence->Wait(); | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h new file mode 100644 index 000000000..c917b3343 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <glad/glad.h> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/fence_manager.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_query_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 16 | |||
| 17 | namespace OpenGL { | ||
| 18 | |||
| 19 | class GLInnerFence : public VideoCommon::FenceBase { | ||
| 20 | public: | ||
| 21 | GLInnerFence(u32 payload, bool is_stubbed); | ||
| 22 | GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed); | ||
| 23 | ~GLInnerFence(); | ||
| 24 | |||
| 25 | void Queue(); | ||
| 26 | |||
| 27 | bool IsSignaled() const; | ||
| 28 | |||
| 29 | void Wait(); | ||
| 30 | |||
| 31 | private: | ||
| 32 | OGLSync sync_object; | ||
| 33 | }; | ||
| 34 | |||
| 35 | using Fence = std::shared_ptr<GLInnerFence>; | ||
| 36 | using GenericFenceManager = | ||
| 37 | VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; | ||
| 38 | |||
| 39 | class FenceManagerOpenGL final : public GenericFenceManager { | ||
| 40 | public: | ||
| 41 | FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 42 | TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, | ||
| 43 | QueryCache& query_cache); | ||
| 44 | |||
| 45 | protected: | ||
| 46 | Fence CreateFence(u32 value, bool is_stubbed) override; | ||
| 47 | Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | ||
| 48 | void QueueFence(Fence& fence) override; | ||
| 49 | bool IsFenceSignaled(Fence& fence) const override; | ||
| 50 | void WaitFence(Fence& fence) override; | ||
| 51 | }; | ||
| 52 | |||
| 53 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 175374f0d..4c16c89d2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 99 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 99 | ScreenInfo& info, GLShader::ProgramManager& program_manager, |
| 100 | StateTracker& state_tracker) | 100 | StateTracker& state_tracker) |
| 101 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | 101 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, |
| 102 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, | 102 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |
| 103 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, | 103 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |
| 104 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { | 104 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |
| 105 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | ||
| 105 | CheckExtensions(); | 106 | CheckExtensions(); |
| 106 | } | 107 | } |
| 107 | 108 | ||
| @@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 599 | EndTransformFeedback(); | 600 | EndTransformFeedback(); |
| 600 | 601 | ||
| 601 | ++num_queued_commands; | 602 | ++num_queued_commands; |
| 603 | |||
| 604 | system.GPU().TickWork(); | ||
| 602 | } | 605 | } |
| 603 | 606 | ||
| 604 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 607 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 649 | query_cache.FlushRegion(addr, size); | 652 | query_cache.FlushRegion(addr, size); |
| 650 | } | 653 | } |
| 651 | 654 | ||
| 655 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | ||
| 656 | if (!Settings::IsGPULevelHigh()) { | ||
| 657 | return buffer_cache.MustFlushRegion(addr, size); | ||
| 658 | } | ||
| 659 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | ||
| 660 | } | ||
| 661 | |||
| 652 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 662 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| 653 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 663 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 654 | if (addr == 0 || size == 0) { | 664 | if (addr == 0 || size == 0) { |
| @@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 660 | query_cache.InvalidateRegion(addr, size); | 670 | query_cache.InvalidateRegion(addr, size); |
| 661 | } | 671 | } |
| 662 | 672 | ||
| 673 | void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||
| 674 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 675 | if (addr == 0 || size == 0) { | ||
| 676 | return; | ||
| 677 | } | ||
| 678 | texture_cache.OnCPUWrite(addr, size); | ||
| 679 | shader_cache.InvalidateRegion(addr, size); | ||
| 680 | buffer_cache.OnCPUWrite(addr, size); | ||
| 681 | query_cache.InvalidateRegion(addr, size); | ||
| 682 | } | ||
| 683 | |||
| 684 | void RasterizerOpenGL::SyncGuestHost() { | ||
| 685 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 686 | texture_cache.SyncGuestHost(); | ||
| 687 | buffer_cache.SyncGuestHost(); | ||
| 688 | } | ||
| 689 | |||
| 690 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | ||
| 691 | auto& gpu{system.GPU()}; | ||
| 692 | if (!gpu.IsAsync()) { | ||
| 693 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 694 | memory_manager.Write<u32>(addr, value); | ||
| 695 | return; | ||
| 696 | } | ||
| 697 | fence_manager.SignalSemaphore(addr, value); | ||
| 698 | } | ||
| 699 | |||
| 700 | void RasterizerOpenGL::SignalSyncPoint(u32 value) { | ||
| 701 | auto& gpu{system.GPU()}; | ||
| 702 | if (!gpu.IsAsync()) { | ||
| 703 | gpu.IncrementSyncPoint(value); | ||
| 704 | return; | ||
| 705 | } | ||
| 706 | fence_manager.SignalSyncPoint(value); | ||
| 707 | } | ||
| 708 | |||
| 709 | void RasterizerOpenGL::ReleaseFences() { | ||
| 710 | auto& gpu{system.GPU()}; | ||
| 711 | if (!gpu.IsAsync()) { | ||
| 712 | return; | ||
| 713 | } | ||
| 714 | fence_manager.WaitPendingFences(); | ||
| 715 | } | ||
| 716 | |||
| 663 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 717 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 664 | if (Settings::values.use_accurate_gpu_emulation) { | 718 | if (Settings::IsGPULevelExtreme()) { |
| 665 | FlushRegion(addr, size); | 719 | FlushRegion(addr, size); |
| 666 | } | 720 | } |
| 667 | InvalidateRegion(addr, size); | 721 | InvalidateRegion(addr, size); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index caea174d2..ebd2173eb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "video_core/rasterizer_interface.h" | 23 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_fence_manager.h" | ||
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 27 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -66,7 +67,13 @@ public: | |||
| 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 67 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 67 | void FlushAll() override; | 68 | void FlushAll() override; |
| 68 | void FlushRegion(VAddr addr, u64 size) override; | 69 | void FlushRegion(VAddr addr, u64 size) override; |
| 70 | bool MustFlushRegion(VAddr addr, u64 size) override; | ||
| 69 | void InvalidateRegion(VAddr addr, u64 size) override; | 71 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 72 | void OnCPUWrite(VAddr addr, u64 size) override; | ||
| 73 | void SyncGuestHost() override; | ||
| 74 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||
| 75 | void SignalSyncPoint(u32 value) override; | ||
| 76 | void ReleaseFences() override; | ||
| 70 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 77 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 71 | void FlushCommands() override; | 78 | void FlushCommands() override; |
| 72 | void TickFrame() override; | 79 | void TickFrame() override; |
| @@ -222,6 +229,8 @@ private: | |||
| 222 | SamplerCacheOpenGL sampler_cache; | 229 | SamplerCacheOpenGL sampler_cache; |
| 223 | FramebufferCacheOpenGL framebuffer_cache; | 230 | FramebufferCacheOpenGL framebuffer_cache; |
| 224 | QueryCache query_cache; | 231 | QueryCache query_cache; |
| 232 | OGLBufferCache buffer_cache; | ||
| 233 | FenceManagerOpenGL fence_manager; | ||
| 225 | 234 | ||
| 226 | Core::System& system; | 235 | Core::System& system; |
| 227 | ScreenInfo& screen_info; | 236 | ScreenInfo& screen_info; |
| @@ -229,7 +238,6 @@ private: | |||
| 229 | StateTracker& state_tracker; | 238 | StateTracker& state_tracker; |
| 230 | 239 | ||
| 231 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 240 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 232 | OGLBufferCache buffer_cache; | ||
| 233 | 241 | ||
| 234 | GLint vertex_binding = 0; | 242 | GLint vertex_binding = 0; |
| 235 | 243 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6d2ff20f9..f63156b8d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 448 | 448 | ||
| 449 | // Look up shader in the cache based on address | 449 | // Look up shader in the cache based on address |
| 450 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | 450 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 451 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; | 451 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; |
| 452 | if (shader) { | 452 | if (shader) { |
| 453 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 453 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 454 | } | 454 | } |
| @@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 477 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 477 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 478 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | 478 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); |
| 479 | } | 479 | } |
| 480 | Register(shader); | 480 | |
| 481 | if (cpu_addr) { | ||
| 482 | Register(shader); | ||
| 483 | } else { | ||
| 484 | null_shader = shader; | ||
| 485 | } | ||
| 481 | 486 | ||
| 482 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 487 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 483 | } | 488 | } |
| @@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 486 | auto& memory_manager{system.GPU().MemoryManager()}; | 491 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 487 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; | 492 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; |
| 488 | 493 | ||
| 489 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; | 494 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; |
| 490 | if (kernel) { | 495 | if (kernel) { |
| 491 | return kernel; | 496 | return kernel; |
| 492 | } | 497 | } |
| @@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 507 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | 512 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); |
| 508 | } | 513 | } |
| 509 | 514 | ||
| 510 | Register(kernel); | 515 | if (cpu_addr) { |
| 516 | Register(kernel); | ||
| 517 | } else { | ||
| 518 | null_kernel = kernel; | ||
| 519 | } | ||
| 511 | return kernel; | 520 | return kernel; |
| 512 | } | 521 | } |
| 513 | 522 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index c836df5bd..91690b470 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -125,6 +125,9 @@ private: | |||
| 125 | ShaderDiskCacheOpenGL disk_cache; | 125 | ShaderDiskCacheOpenGL disk_cache; |
| 126 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | 126 | std::unordered_map<u64, PrecompiledShader> runtime_cache; |
| 127 | 127 | ||
| 128 | Shader null_shader{}; | ||
| 129 | Shader null_kernel{}; | ||
| 130 | |||
| 128 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 131 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 129 | }; | 132 | }; |
| 130 | 133 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 22242cce9..0cd3ad7e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -484,7 +484,7 @@ private: | |||
| 484 | code.AddLine("switch (jmp_to) {{"); | 484 | code.AddLine("switch (jmp_to) {{"); |
| 485 | 485 | ||
| 486 | for (const auto& pair : ir.GetBasicBlocks()) { | 486 | for (const auto& pair : ir.GetBasicBlocks()) { |
| 487 | const auto [address, bb] = pair; | 487 | const auto& [address, bb] = pair; |
| 488 | code.AddLine("case 0x{:X}U: {{", address); | 488 | code.AddLine("case 0x{:X}U: {{", address); |
| 489 | ++code.scope; | 489 | ++code.scope; |
| 490 | 490 | ||
| @@ -1484,8 +1484,8 @@ private: | |||
| 1484 | dy += '('; | 1484 | dy += '('; |
| 1485 | 1485 | ||
| 1486 | for (std::size_t index = 0; index < components; ++index) { | 1486 | for (std::size_t index = 0; index < components; ++index) { |
| 1487 | const auto operand_x{derivates.at(index * 2)}; | 1487 | const auto& operand_x{derivates.at(index * 2)}; |
| 1488 | const auto operand_y{derivates.at(index * 2 + 1)}; | 1488 | const auto& operand_y{derivates.at(index * 2 + 1)}; |
| 1489 | dx += Visit(operand_x).AsFloat(); | 1489 | dx += Visit(operand_x).AsFloat(); |
| 1490 | dy += Visit(operand_y).AsFloat(); | 1490 | dy += Visit(operand_y).AsFloat(); |
| 1491 | 1491 | ||
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 2bb376555..be1c31978 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -2,10 +2,12 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 5 | #include <tuple> | 6 | #include <tuple> |
| 6 | 7 | ||
| 7 | #include <boost/functional/hash.hpp> | 8 | #include <boost/functional/hash.hpp> |
| 8 | 9 | ||
| 10 | #include "common/cityhash.h" | ||
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 11 | 13 | ||
| @@ -13,289 +15,352 @@ namespace Vulkan { | |||
| 13 | 15 | ||
| 14 | namespace { | 16 | namespace { |
| 15 | 17 | ||
| 16 | constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) { | 18 | constexpr std::size_t POINT = 0; |
| 17 | const FixedPipelineState::StencilFace front_stencil( | 19 | constexpr std::size_t LINE = 1; |
| 18 | regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass, | 20 | constexpr std::size_t POLYGON = 2; |
| 19 | regs.stencil_front_func_func); | 21 | constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { |
| 20 | const FixedPipelineState::StencilFace back_stencil = | 22 | POINT, // Points |
| 21 | regs.stencil_two_side_enable | 23 | LINE, // Lines |
| 22 | ? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail, | 24 | LINE, // LineLoop |
| 23 | regs.stencil_back_op_zpass, | 25 | LINE, // LineStrip |
| 24 | regs.stencil_back_func_func) | 26 | POLYGON, // Triangles |
| 25 | : front_stencil; | 27 | POLYGON, // TriangleStrip |
| 26 | return FixedPipelineState::DepthStencil( | 28 | POLYGON, // TriangleFan |
| 27 | regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1, | 29 | POLYGON, // Quads |
| 28 | regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil); | 30 | POLYGON, // QuadStrip |
| 29 | } | 31 | POLYGON, // Polygon |
| 30 | 32 | LINE, // LinesAdjacency | |
| 31 | constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) { | 33 | LINE, // LineStripAdjacency |
| 32 | return FixedPipelineState::InputAssembly( | 34 | POLYGON, // TrianglesAdjacency |
| 33 | regs.draw.topology, regs.primitive_restart.enabled, | 35 | POLYGON, // TriangleStripAdjacency |
| 34 | regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f); | 36 | POLYGON, // Patches |
| 35 | } | 37 | }; |
| 36 | |||
| 37 | constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState( | ||
| 38 | const Maxwell& regs, std::size_t render_target) { | ||
| 39 | const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target]; | ||
| 40 | const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0}; | ||
| 41 | |||
| 42 | const FixedPipelineState::BlendingAttachment default_blending( | ||
| 43 | false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One, | ||
| 44 | Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One, | ||
| 45 | Maxwell::Blend::Factor::Zero, components); | ||
| 46 | if (render_target >= regs.rt_control.count) { | ||
| 47 | return default_blending; | ||
| 48 | } | ||
| 49 | 38 | ||
| 50 | if (!regs.independent_blend_enable) { | 39 | } // Anonymous namespace |
| 51 | const auto& src = regs.blend; | ||
| 52 | if (!src.enable[render_target]) { | ||
| 53 | return default_blending; | ||
| 54 | } | ||
| 55 | return FixedPipelineState::BlendingAttachment( | ||
| 56 | true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, | ||
| 57 | src.factor_source_a, src.factor_dest_a, components); | ||
| 58 | } | ||
| 59 | 40 | ||
| 60 | if (!regs.blend.enable[render_target]) { | 41 | void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { |
| 61 | return default_blending; | 42 | raw = 0; |
| 43 | front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); | ||
| 44 | front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); | ||
| 45 | front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); | ||
| 46 | front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); | ||
| 47 | if (regs.stencil_two_side_enable) { | ||
| 48 | back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); | ||
| 49 | back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); | ||
| 50 | back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); | ||
| 51 | back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); | ||
| 52 | } else { | ||
| 53 | back.action_stencil_fail.Assign(front.action_stencil_fail); | ||
| 54 | back.action_depth_fail.Assign(front.action_depth_fail); | ||
| 55 | back.action_depth_pass.Assign(front.action_depth_pass); | ||
| 56 | back.test_func.Assign(front.test_func); | ||
| 62 | } | 57 | } |
| 63 | const auto& src = regs.independent_blend[render_target]; | 58 | depth_test_enable.Assign(regs.depth_test_enable); |
| 64 | return FixedPipelineState::BlendingAttachment( | 59 | depth_write_enable.Assign(regs.depth_write_enabled); |
| 65 | true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, | 60 | depth_bounds_enable.Assign(regs.depth_bounds_enable); |
| 66 | src.factor_source_a, src.factor_dest_a, components); | 61 | stencil_enable.Assign(regs.stencil_enable); |
| 62 | depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); | ||
| 67 | } | 63 | } |
| 68 | 64 | ||
| 69 | constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) { | 65 | void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { |
| 70 | return FixedPipelineState::ColorBlending( | 66 | const auto& clip = regs.view_volume_clip_control; |
| 71 | {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}, | ||
| 72 | regs.rt_control.count, | ||
| 73 | {GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1), | ||
| 74 | GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3), | ||
| 75 | GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5), | ||
| 76 | GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)}); | ||
| 77 | } | ||
| 78 | |||
| 79 | constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) { | ||
| 80 | return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim, | ||
| 81 | regs.tess_mode.spacing, regs.tess_mode.cw != 0); | ||
| 82 | } | ||
| 83 | |||
| 84 | constexpr std::size_t Point = 0; | ||
| 85 | constexpr std::size_t Line = 1; | ||
| 86 | constexpr std::size_t Polygon = 2; | ||
| 87 | constexpr std::array PolygonOffsetEnableLUT = { | ||
| 88 | Point, // Points | ||
| 89 | Line, // Lines | ||
| 90 | Line, // LineLoop | ||
| 91 | Line, // LineStrip | ||
| 92 | Polygon, // Triangles | ||
| 93 | Polygon, // TriangleStrip | ||
| 94 | Polygon, // TriangleFan | ||
| 95 | Polygon, // Quads | ||
| 96 | Polygon, // QuadStrip | ||
| 97 | Polygon, // Polygon | ||
| 98 | Line, // LinesAdjacency | ||
| 99 | Line, // LineStripAdjacency | ||
| 100 | Polygon, // TrianglesAdjacency | ||
| 101 | Polygon, // TriangleStripAdjacency | ||
| 102 | Polygon, // Patches | ||
| 103 | }; | ||
| 104 | |||
| 105 | constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) { | ||
| 106 | const std::array enabled_lut = {regs.polygon_offset_point_enable, | 67 | const std::array enabled_lut = {regs.polygon_offset_point_enable, |
| 107 | regs.polygon_offset_line_enable, | 68 | regs.polygon_offset_line_enable, |
| 108 | regs.polygon_offset_fill_enable}; | 69 | regs.polygon_offset_fill_enable}; |
| 109 | const auto topology = static_cast<std::size_t>(regs.draw.topology.Value()); | 70 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); |
| 110 | const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; | ||
| 111 | 71 | ||
| 112 | const auto& clip = regs.view_volume_clip_control; | 72 | u32 packed_front_face = PackFrontFace(regs.front_face); |
| 113 | const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; | ||
| 114 | |||
| 115 | Maxwell::FrontFace front_face = regs.front_face; | ||
| 116 | if (regs.screen_y_control.triangle_rast_flip != 0 && | 73 | if (regs.screen_y_control.triangle_rast_flip != 0 && |
| 117 | regs.viewport_transform[0].scale_y > 0.0f) { | 74 | regs.viewport_transform[0].scale_y > 0.0f) { |
| 118 | if (front_face == Maxwell::FrontFace::CounterClockWise) | 75 | // Flip front face |
| 119 | front_face = Maxwell::FrontFace::ClockWise; | 76 | packed_front_face = 1 - packed_front_face; |
| 120 | else if (front_face == Maxwell::FrontFace::ClockWise) | ||
| 121 | front_face = Maxwell::FrontFace::CounterClockWise; | ||
| 122 | } | 77 | } |
| 123 | 78 | ||
| 124 | const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | 79 | raw = 0; |
| 125 | return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, | 80 | topology.Assign(topology_index); |
| 126 | depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); | 81 | primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); |
| 82 | cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); | ||
| 83 | depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); | ||
| 84 | depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0); | ||
| 85 | ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); | ||
| 86 | cull_face.Assign(PackCullFace(regs.cull_face)); | ||
| 87 | front_face.Assign(packed_front_face); | ||
| 88 | polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); | ||
| 89 | patch_control_points_minus_one.Assign(regs.patch_vertices - 1); | ||
| 90 | tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value())); | ||
| 91 | tessellation_spacing.Assign(static_cast<u32>(regs.tess_mode.spacing.Value())); | ||
| 92 | tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); | ||
| 93 | logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); | ||
| 94 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); | ||
| 95 | std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast | ||
| 127 | } | 96 | } |
| 128 | 97 | ||
| 129 | } // Anonymous namespace | 98 | void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept { |
| 130 | 99 | for (std::size_t index = 0; index < std::size(attachments); ++index) { | |
| 131 | std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept { | 100 | attachments[index].Fill(regs, index); |
| 132 | return (index << stride) ^ divisor; | 101 | } |
| 133 | } | 102 | } |
| 134 | 103 | ||
| 135 | bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept { | 104 | void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { |
| 136 | return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor); | 105 | const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; |
| 137 | } | 106 | |
| 107 | raw = 0; | ||
| 108 | mask_r.Assign(mask.R); | ||
| 109 | mask_g.Assign(mask.G); | ||
| 110 | mask_b.Assign(mask.B); | ||
| 111 | mask_a.Assign(mask.A); | ||
| 112 | |||
| 113 | // TODO: C++20 Use templated lambda to deduplicate code | ||
| 114 | |||
| 115 | if (!regs.independent_blend_enable) { | ||
| 116 | const auto& src = regs.blend; | ||
| 117 | if (!src.enable[index]) { | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | equation_rgb.Assign(PackBlendEquation(src.equation_rgb)); | ||
| 121 | equation_a.Assign(PackBlendEquation(src.equation_a)); | ||
| 122 | factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb)); | ||
| 123 | factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb)); | ||
| 124 | factor_source_a.Assign(PackBlendFactor(src.factor_source_a)); | ||
| 125 | factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a)); | ||
| 126 | enable.Assign(1); | ||
| 127 | return; | ||
| 128 | } | ||
| 138 | 129 | ||
| 139 | std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept { | 130 | if (!regs.blend.enable[index]) { |
| 140 | return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^ | 131 | return; |
| 141 | (static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^ | 132 | } |
| 142 | (static_cast<std::size_t>(offset) << 36); | 133 | const auto& src = regs.independent_blend[index]; |
| 134 | equation_rgb.Assign(PackBlendEquation(src.equation_rgb)); | ||
| 135 | equation_a.Assign(PackBlendEquation(src.equation_a)); | ||
| 136 | factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb)); | ||
| 137 | factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb)); | ||
| 138 | factor_source_a.Assign(PackBlendFactor(src.factor_source_a)); | ||
| 139 | factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a)); | ||
| 140 | enable.Assign(1); | ||
| 143 | } | 141 | } |
| 144 | 142 | ||
| 145 | bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept { | 143 | std::size_t FixedPipelineState::Hash() const noexcept { |
| 146 | return std::tie(index, buffer, type, size, offset) == | 144 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 147 | std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset); | 145 | return static_cast<std::size_t>(hash); |
| 148 | } | 146 | } |
| 149 | 147 | ||
| 150 | std::size_t FixedPipelineState::StencilFace::Hash() const noexcept { | 148 | bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { |
| 151 | return static_cast<std::size_t>(action_stencil_fail) ^ | 149 | return std::memcmp(this, &rhs, sizeof *this) == 0; |
| 152 | (static_cast<std::size_t>(action_depth_fail) << 4) ^ | ||
| 153 | (static_cast<std::size_t>(action_depth_fail) << 20) ^ | ||
| 154 | (static_cast<std::size_t>(action_depth_pass) << 36); | ||
| 155 | } | 150 | } |
| 156 | 151 | ||
| 157 | bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept { | 152 | FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { |
| 158 | return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) == | 153 | FixedPipelineState fixed_state; |
| 159 | std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass, | 154 | fixed_state.rasterizer.Fill(regs); |
| 160 | rhs.test_func); | 155 | fixed_state.depth_stencil.Fill(regs); |
| 156 | fixed_state.color_blending.Fill(regs); | ||
| 157 | fixed_state.padding = {}; | ||
| 158 | return fixed_state; | ||
| 161 | } | 159 | } |
| 162 | 160 | ||
| 163 | std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { | 161 | u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { |
| 164 | return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^ | 162 | // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 |
| 165 | (static_cast<std::size_t>(src_rgb_func) << 10) ^ | 163 | // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. |
| 166 | (static_cast<std::size_t>(dst_rgb_func) << 15) ^ | 164 | // Perfect for a hash. |
| 167 | (static_cast<std::size_t>(a_equation) << 20) ^ | 165 | const u32 value = static_cast<u32>(op); |
| 168 | (static_cast<std::size_t>(src_a_func) << 25) ^ | 166 | return value - (value >= 0x200 ? 0x200 : 1); |
| 169 | (static_cast<std::size_t>(dst_a_func) << 30) ^ | ||
| 170 | (static_cast<std::size_t>(components[0]) << 35) ^ | ||
| 171 | (static_cast<std::size_t>(components[1]) << 36) ^ | ||
| 172 | (static_cast<std::size_t>(components[2]) << 37) ^ | ||
| 173 | (static_cast<std::size_t>(components[3]) << 38); | ||
| 174 | } | 167 | } |
| 175 | 168 | ||
| 176 | bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const | 169 | Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept { |
| 177 | noexcept { | 170 | // Read PackComparisonOp for the logic behind this. |
| 178 | return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func, | 171 | return static_cast<Maxwell::ComparisonOp>(packed + 1); |
| 179 | dst_a_func, components) == | ||
| 180 | std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func, | ||
| 181 | rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components); | ||
| 182 | } | 172 | } |
| 183 | 173 | ||
| 184 | std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { | 174 | u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp op) noexcept { |
| 185 | std::size_t hash = num_bindings ^ (num_attributes << 32); | 175 | switch (op) { |
| 186 | for (std::size_t i = 0; i < num_bindings; ++i) { | 176 | case Maxwell::StencilOp::Keep: |
| 187 | boost::hash_combine(hash, bindings[i].Hash()); | 177 | case Maxwell::StencilOp::KeepOGL: |
| 188 | } | 178 | return 0; |
| 189 | for (std::size_t i = 0; i < num_attributes; ++i) { | 179 | case Maxwell::StencilOp::Zero: |
| 190 | boost::hash_combine(hash, attributes[i].Hash()); | 180 | case Maxwell::StencilOp::ZeroOGL: |
| 181 | return 1; | ||
| 182 | case Maxwell::StencilOp::Replace: | ||
| 183 | case Maxwell::StencilOp::ReplaceOGL: | ||
| 184 | return 2; | ||
| 185 | case Maxwell::StencilOp::Incr: | ||
| 186 | case Maxwell::StencilOp::IncrOGL: | ||
| 187 | return 3; | ||
| 188 | case Maxwell::StencilOp::Decr: | ||
| 189 | case Maxwell::StencilOp::DecrOGL: | ||
| 190 | return 4; | ||
| 191 | case Maxwell::StencilOp::Invert: | ||
| 192 | case Maxwell::StencilOp::InvertOGL: | ||
| 193 | return 5; | ||
| 194 | case Maxwell::StencilOp::IncrWrap: | ||
| 195 | case Maxwell::StencilOp::IncrWrapOGL: | ||
| 196 | return 6; | ||
| 197 | case Maxwell::StencilOp::DecrWrap: | ||
| 198 | case Maxwell::StencilOp::DecrWrapOGL: | ||
| 199 | return 7; | ||
| 191 | } | 200 | } |
| 192 | return hash; | 201 | return 0; |
| 193 | } | 202 | } |
| 194 | 203 | ||
| 195 | bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept { | 204 | Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept { |
| 196 | return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(), | 205 | static constexpr std::array LUT = {Maxwell::StencilOp::Keep, Maxwell::StencilOp::Zero, |
| 197 | rhs.bindings.begin() + rhs.num_bindings) && | 206 | Maxwell::StencilOp::Replace, Maxwell::StencilOp::Incr, |
| 198 | std::equal(attributes.begin(), attributes.begin() + num_attributes, | 207 | Maxwell::StencilOp::Decr, Maxwell::StencilOp::Invert, |
| 199 | rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes); | 208 | Maxwell::StencilOp::IncrWrap, Maxwell::StencilOp::DecrWrap}; |
| 209 | return LUT[packed]; | ||
| 200 | } | 210 | } |
| 201 | 211 | ||
| 202 | std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { | 212 | u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept { |
| 203 | std::size_t point_size_int = 0; | 213 | // FrontAndBack is 0x408, by substracting 0x406 in it we get 2. |
| 204 | std::memcpy(&point_size_int, &point_size, sizeof(point_size)); | 214 | // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1. |
| 205 | return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^ | 215 | const u32 value = static_cast<u32>(cull); |
| 206 | static_cast<std::size_t>(primitive_restart_enable); | 216 | return value - (value == 0x408 ? 0x406 : 0x404); |
| 207 | } | 217 | } |
| 208 | 218 | ||
| 209 | bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept { | 219 | Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept { |
| 210 | return std::tie(topology, primitive_restart_enable, point_size) == | 220 | static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back, |
| 211 | std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size); | 221 | Maxwell::CullFace::FrontAndBack}; |
| 222 | return LUT[packed]; | ||
| 212 | } | 223 | } |
| 213 | 224 | ||
| 214 | std::size_t FixedPipelineState::Tessellation::Hash() const noexcept { | 225 | u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept { |
| 215 | return static_cast<std::size_t>(patch_control_points) ^ | 226 | return static_cast<u32>(face) - 0x900; |
| 216 | (static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^ | ||
| 217 | (static_cast<std::size_t>(clockwise) << 10); | ||
| 218 | } | 227 | } |
| 219 | 228 | ||
| 220 | bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept { | 229 | Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept { |
| 221 | return std::tie(patch_control_points, primitive, spacing, clockwise) == | 230 | return static_cast<Maxwell::FrontFace>(packed + 0x900); |
| 222 | std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise); | ||
| 223 | } | 231 | } |
| 224 | 232 | ||
| 225 | std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { | 233 | u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept { |
| 226 | return static_cast<std::size_t>(cull_enable) ^ | 234 | return static_cast<u32>(mode) - 0x1B00; |
| 227 | (static_cast<std::size_t>(depth_bias_enable) << 1) ^ | ||
| 228 | (static_cast<std::size_t>(depth_clamp_enable) << 2) ^ | ||
| 229 | (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^ | ||
| 230 | (static_cast<std::size_t>(cull_face) << 24) ^ | ||
| 231 | (static_cast<std::size_t>(front_face) << 48); | ||
| 232 | } | 235 | } |
| 233 | 236 | ||
| 234 | bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { | 237 | Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept { |
| 235 | return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, | 238 | return static_cast<Maxwell::PolygonMode>(packed + 0x1B00); |
| 236 | cull_face, front_face) == | ||
| 237 | std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable, | ||
| 238 | rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face); | ||
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { | 241 | u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept { |
| 242 | std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^ | 242 | return static_cast<u32>(op) - 0x1500; |
| 243 | (static_cast<std::size_t>(depth_write_enable) << 1) ^ | ||
| 244 | (static_cast<std::size_t>(depth_bounds_enable) << 2) ^ | ||
| 245 | (static_cast<std::size_t>(stencil_enable) << 3) ^ | ||
| 246 | (static_cast<std::size_t>(depth_test_function) << 4); | ||
| 247 | boost::hash_combine(hash, front_stencil.Hash()); | ||
| 248 | boost::hash_combine(hash, back_stencil.Hash()); | ||
| 249 | return hash; | ||
| 250 | } | 243 | } |
| 251 | 244 | ||
| 252 | bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept { | 245 | Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept { |
| 253 | return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function, | 246 | return static_cast<Maxwell::LogicOperation>(packed + 0x1500); |
| 254 | stencil_enable, front_stencil, back_stencil) == | ||
| 255 | std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable, | ||
| 256 | rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil, | ||
| 257 | rhs.back_stencil); | ||
| 258 | } | 247 | } |
| 259 | 248 | ||
| 260 | std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept { | 249 | u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept { |
| 261 | std::size_t hash = attachments_count << 13; | 250 | switch (equation) { |
| 262 | for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) { | 251 | case Maxwell::Blend::Equation::Add: |
| 263 | boost::hash_combine(hash, attachments[rt].Hash()); | 252 | case Maxwell::Blend::Equation::AddGL: |
| 253 | return 0; | ||
| 254 | case Maxwell::Blend::Equation::Subtract: | ||
| 255 | case Maxwell::Blend::Equation::SubtractGL: | ||
| 256 | return 1; | ||
| 257 | case Maxwell::Blend::Equation::ReverseSubtract: | ||
| 258 | case Maxwell::Blend::Equation::ReverseSubtractGL: | ||
| 259 | return 2; | ||
| 260 | case Maxwell::Blend::Equation::Min: | ||
| 261 | case Maxwell::Blend::Equation::MinGL: | ||
| 262 | return 3; | ||
| 263 | case Maxwell::Blend::Equation::Max: | ||
| 264 | case Maxwell::Blend::Equation::MaxGL: | ||
| 265 | return 4; | ||
| 264 | } | 266 | } |
| 265 | return hash; | 267 | return 0; |
| 266 | } | 268 | } |
| 267 | 269 | ||
| 268 | bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept { | 270 | Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept { |
| 269 | return std::equal(attachments.begin(), attachments.begin() + attachments_count, | 271 | static constexpr std::array LUT = { |
| 270 | rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count); | 272 | Maxwell::Blend::Equation::Add, Maxwell::Blend::Equation::Subtract, |
| 271 | } | 273 | Maxwell::Blend::Equation::ReverseSubtract, Maxwell::Blend::Equation::Min, |
| 272 | 274 | Maxwell::Blend::Equation::Max}; | |
| 273 | std::size_t FixedPipelineState::Hash() const noexcept { | 275 | return LUT[packed]; |
| 274 | std::size_t hash = 0; | ||
| 275 | boost::hash_combine(hash, vertex_input.Hash()); | ||
| 276 | boost::hash_combine(hash, input_assembly.Hash()); | ||
| 277 | boost::hash_combine(hash, tessellation.Hash()); | ||
| 278 | boost::hash_combine(hash, rasterizer.Hash()); | ||
| 279 | boost::hash_combine(hash, depth_stencil.Hash()); | ||
| 280 | boost::hash_combine(hash, color_blending.Hash()); | ||
| 281 | return hash; | ||
| 282 | } | 276 | } |
| 283 | 277 | ||
| 284 | bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { | 278 | u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept { |
| 285 | return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil, | 279 | switch (factor) { |
| 286 | color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly, | 280 | case Maxwell::Blend::Factor::Zero: |
| 287 | rhs.tessellation, rhs.rasterizer, rhs.depth_stencil, | 281 | case Maxwell::Blend::Factor::ZeroGL: |
| 288 | rhs.color_blending); | 282 | return 0; |
| 283 | case Maxwell::Blend::Factor::One: | ||
| 284 | case Maxwell::Blend::Factor::OneGL: | ||
| 285 | return 1; | ||
| 286 | case Maxwell::Blend::Factor::SourceColor: | ||
| 287 | case Maxwell::Blend::Factor::SourceColorGL: | ||
| 288 | return 2; | ||
| 289 | case Maxwell::Blend::Factor::OneMinusSourceColor: | ||
| 290 | case Maxwell::Blend::Factor::OneMinusSourceColorGL: | ||
| 291 | return 3; | ||
| 292 | case Maxwell::Blend::Factor::SourceAlpha: | ||
| 293 | case Maxwell::Blend::Factor::SourceAlphaGL: | ||
| 294 | return 4; | ||
| 295 | case Maxwell::Blend::Factor::OneMinusSourceAlpha: | ||
| 296 | case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: | ||
| 297 | return 5; | ||
| 298 | case Maxwell::Blend::Factor::DestAlpha: | ||
| 299 | case Maxwell::Blend::Factor::DestAlphaGL: | ||
| 300 | return 6; | ||
| 301 | case Maxwell::Blend::Factor::OneMinusDestAlpha: | ||
| 302 | case Maxwell::Blend::Factor::OneMinusDestAlphaGL: | ||
| 303 | return 7; | ||
| 304 | case Maxwell::Blend::Factor::DestColor: | ||
| 305 | case Maxwell::Blend::Factor::DestColorGL: | ||
| 306 | return 8; | ||
| 307 | case Maxwell::Blend::Factor::OneMinusDestColor: | ||
| 308 | case Maxwell::Blend::Factor::OneMinusDestColorGL: | ||
| 309 | return 9; | ||
| 310 | case Maxwell::Blend::Factor::SourceAlphaSaturate: | ||
| 311 | case Maxwell::Blend::Factor::SourceAlphaSaturateGL: | ||
| 312 | return 10; | ||
| 313 | case Maxwell::Blend::Factor::Source1Color: | ||
| 314 | case Maxwell::Blend::Factor::Source1ColorGL: | ||
| 315 | return 11; | ||
| 316 | case Maxwell::Blend::Factor::OneMinusSource1Color: | ||
| 317 | case Maxwell::Blend::Factor::OneMinusSource1ColorGL: | ||
| 318 | return 12; | ||
| 319 | case Maxwell::Blend::Factor::Source1Alpha: | ||
| 320 | case Maxwell::Blend::Factor::Source1AlphaGL: | ||
| 321 | return 13; | ||
| 322 | case Maxwell::Blend::Factor::OneMinusSource1Alpha: | ||
| 323 | case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: | ||
| 324 | return 14; | ||
| 325 | case Maxwell::Blend::Factor::ConstantColor: | ||
| 326 | case Maxwell::Blend::Factor::ConstantColorGL: | ||
| 327 | return 15; | ||
| 328 | case Maxwell::Blend::Factor::OneMinusConstantColor: | ||
| 329 | case Maxwell::Blend::Factor::OneMinusConstantColorGL: | ||
| 330 | return 16; | ||
| 331 | case Maxwell::Blend::Factor::ConstantAlpha: | ||
| 332 | case Maxwell::Blend::Factor::ConstantAlphaGL: | ||
| 333 | return 17; | ||
| 334 | case Maxwell::Blend::Factor::OneMinusConstantAlpha: | ||
| 335 | case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: | ||
| 336 | return 18; | ||
| 337 | } | ||
| 338 | return 0; | ||
| 289 | } | 339 | } |
| 290 | 340 | ||
| 291 | FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { | 341 | Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept { |
| 292 | FixedPipelineState fixed_state; | 342 | static constexpr std::array LUT = { |
| 293 | fixed_state.input_assembly = GetInputAssemblyState(regs); | 343 | Maxwell::Blend::Factor::Zero, |
| 294 | fixed_state.tessellation = GetTessellationState(regs); | 344 | Maxwell::Blend::Factor::One, |
| 295 | fixed_state.rasterizer = GetRasterizerState(regs); | 345 | Maxwell::Blend::Factor::SourceColor, |
| 296 | fixed_state.depth_stencil = GetDepthStencilState(regs); | 346 | Maxwell::Blend::Factor::OneMinusSourceColor, |
| 297 | fixed_state.color_blending = GetColorBlendingState(regs); | 347 | Maxwell::Blend::Factor::SourceAlpha, |
| 298 | return fixed_state; | 348 | Maxwell::Blend::Factor::OneMinusSourceAlpha, |
| 349 | Maxwell::Blend::Factor::DestAlpha, | ||
| 350 | Maxwell::Blend::Factor::OneMinusDestAlpha, | ||
| 351 | Maxwell::Blend::Factor::DestColor, | ||
| 352 | Maxwell::Blend::Factor::OneMinusDestColor, | ||
| 353 | Maxwell::Blend::Factor::SourceAlphaSaturate, | ||
| 354 | Maxwell::Blend::Factor::Source1Color, | ||
| 355 | Maxwell::Blend::Factor::OneMinusSource1Color, | ||
| 356 | Maxwell::Blend::Factor::Source1Alpha, | ||
| 357 | Maxwell::Blend::Factor::OneMinusSource1Alpha, | ||
| 358 | Maxwell::Blend::Factor::ConstantColor, | ||
| 359 | Maxwell::Blend::Factor::OneMinusConstantColor, | ||
| 360 | Maxwell::Blend::Factor::ConstantAlpha, | ||
| 361 | Maxwell::Blend::Factor::OneMinusConstantAlpha, | ||
| 362 | }; | ||
| 363 | return LUT[packed]; | ||
| 299 | } | 364 | } |
| 300 | 365 | ||
| 301 | } // namespace Vulkan | 366 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 4c8ba7f90..9fe6bdbf9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <type_traits> | 8 | #include <type_traits> |
| 9 | 9 | ||
| 10 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | 12 | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| @@ -16,93 +17,48 @@ namespace Vulkan { | |||
| 16 | 17 | ||
| 17 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 18 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 18 | 19 | ||
| 19 | // TODO(Rodrigo): Optimize this structure. | 20 | struct alignas(32) FixedPipelineState { |
| 21 | static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; | ||
| 22 | static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; | ||
| 20 | 23 | ||
| 21 | struct FixedPipelineState { | 24 | static u32 PackStencilOp(Maxwell::StencilOp op) noexcept; |
| 22 | using PixelFormat = VideoCore::Surface::PixelFormat; | 25 | static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept; |
| 23 | 26 | ||
| 24 | struct VertexBinding { | 27 | static u32 PackCullFace(Maxwell::CullFace cull) noexcept; |
| 25 | constexpr VertexBinding(u32 index, u32 stride, u32 divisor) | 28 | static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept; |
| 26 | : index{index}, stride{stride}, divisor{divisor} {} | ||
| 27 | VertexBinding() = default; | ||
| 28 | 29 | ||
| 29 | u32 index; | 30 | static u32 PackFrontFace(Maxwell::FrontFace face) noexcept; |
| 30 | u32 stride; | 31 | static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept; |
| 31 | u32 divisor; | ||
| 32 | 32 | ||
| 33 | std::size_t Hash() const noexcept; | 33 | static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept; |
| 34 | 34 | static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept; | |
| 35 | bool operator==(const VertexBinding& rhs) const noexcept; | ||
| 36 | |||
| 37 | bool operator!=(const VertexBinding& rhs) const noexcept { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct VertexAttribute { | ||
| 43 | constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type, | ||
| 44 | Maxwell::VertexAttribute::Size size, u32 offset) | ||
| 45 | : index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {} | ||
| 46 | VertexAttribute() = default; | ||
| 47 | |||
| 48 | u32 index; | ||
| 49 | u32 buffer; | ||
| 50 | Maxwell::VertexAttribute::Type type; | ||
| 51 | Maxwell::VertexAttribute::Size size; | ||
| 52 | u32 offset; | ||
| 53 | |||
| 54 | std::size_t Hash() const noexcept; | ||
| 55 | |||
| 56 | bool operator==(const VertexAttribute& rhs) const noexcept; | ||
| 57 | |||
| 58 | bool operator!=(const VertexAttribute& rhs) const noexcept { | ||
| 59 | return !operator==(rhs); | ||
| 60 | } | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct StencilFace { | ||
| 64 | constexpr StencilFace(Maxwell::StencilOp action_stencil_fail, | ||
| 65 | Maxwell::StencilOp action_depth_fail, | ||
| 66 | Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func) | ||
| 67 | : action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail}, | ||
| 68 | action_depth_pass{action_depth_pass}, test_func{test_func} {} | ||
| 69 | StencilFace() = default; | ||
| 70 | |||
| 71 | Maxwell::StencilOp action_stencil_fail; | ||
| 72 | Maxwell::StencilOp action_depth_fail; | ||
| 73 | Maxwell::StencilOp action_depth_pass; | ||
| 74 | Maxwell::ComparisonOp test_func; | ||
| 75 | 35 | ||
| 76 | std::size_t Hash() const noexcept; | 36 | static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept; |
| 37 | static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept; | ||
| 77 | 38 | ||
| 78 | bool operator==(const StencilFace& rhs) const noexcept; | 39 | static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept; |
| 40 | static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept; | ||
| 79 | 41 | ||
| 80 | bool operator!=(const StencilFace& rhs) const noexcept { | 42 | static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept; |
| 81 | return !operator==(rhs); | 43 | static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept; |
| 82 | } | ||
| 83 | }; | ||
| 84 | 44 | ||
| 85 | struct BlendingAttachment { | 45 | struct BlendingAttachment { |
| 86 | constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, | 46 | union { |
| 87 | Maxwell::Blend::Factor src_rgb_func, | 47 | u32 raw; |
| 88 | Maxwell::Blend::Factor dst_rgb_func, | 48 | BitField<0, 1, u32> mask_r; |
| 89 | Maxwell::Blend::Equation a_equation, | 49 | BitField<1, 1, u32> mask_g; |
| 90 | Maxwell::Blend::Factor src_a_func, | 50 | BitField<2, 1, u32> mask_b; |
| 91 | Maxwell::Blend::Factor dst_a_func, | 51 | BitField<3, 1, u32> mask_a; |
| 92 | std::array<bool, 4> components) | 52 | BitField<4, 3, u32> equation_rgb; |
| 93 | : enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func}, | 53 | BitField<7, 3, u32> equation_a; |
| 94 | dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func}, | 54 | BitField<10, 5, u32> factor_source_rgb; |
| 95 | dst_a_func{dst_a_func}, components{components} {} | 55 | BitField<15, 5, u32> factor_dest_rgb; |
| 96 | BlendingAttachment() = default; | 56 | BitField<20, 5, u32> factor_source_a; |
| 97 | 57 | BitField<25, 5, u32> factor_dest_a; | |
| 98 | bool enable; | 58 | BitField<30, 1, u32> enable; |
| 99 | Maxwell::Blend::Equation rgb_equation; | 59 | }; |
| 100 | Maxwell::Blend::Factor src_rgb_func; | 60 | |
| 101 | Maxwell::Blend::Factor dst_rgb_func; | 61 | void Fill(const Maxwell& regs, std::size_t index); |
| 102 | Maxwell::Blend::Equation a_equation; | ||
| 103 | Maxwell::Blend::Factor src_a_func; | ||
| 104 | Maxwell::Blend::Factor dst_a_func; | ||
| 105 | std::array<bool, 4> components; | ||
| 106 | 62 | ||
| 107 | std::size_t Hash() const noexcept; | 63 | std::size_t Hash() const noexcept; |
| 108 | 64 | ||
| @@ -111,136 +67,178 @@ struct FixedPipelineState { | |||
| 111 | bool operator!=(const BlendingAttachment& rhs) const noexcept { | 67 | bool operator!=(const BlendingAttachment& rhs) const noexcept { |
| 112 | return !operator==(rhs); | 68 | return !operator==(rhs); |
| 113 | } | 69 | } |
| 114 | }; | ||
| 115 | |||
| 116 | struct VertexInput { | ||
| 117 | std::size_t num_bindings = 0; | ||
| 118 | std::size_t num_attributes = 0; | ||
| 119 | std::array<VertexBinding, Maxwell::NumVertexArrays> bindings; | ||
| 120 | std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; | ||
| 121 | |||
| 122 | std::size_t Hash() const noexcept; | ||
| 123 | 70 | ||
| 124 | bool operator==(const VertexInput& rhs) const noexcept; | 71 | constexpr std::array<bool, 4> Mask() const noexcept { |
| 72 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; | ||
| 73 | } | ||
| 125 | 74 | ||
| 126 | bool operator!=(const VertexInput& rhs) const noexcept { | 75 | Maxwell::Blend::Equation EquationRGB() const noexcept { |
| 127 | return !operator==(rhs); | 76 | return UnpackBlendEquation(equation_rgb.Value()); |
| 128 | } | 77 | } |
| 129 | }; | ||
| 130 | 78 | ||
| 131 | struct InputAssembly { | 79 | Maxwell::Blend::Equation EquationAlpha() const noexcept { |
| 132 | constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, | 80 | return UnpackBlendEquation(equation_a.Value()); |
| 133 | float point_size) | 81 | } |
| 134 | : topology{topology}, primitive_restart_enable{primitive_restart_enable}, | ||
| 135 | point_size{point_size} {} | ||
| 136 | InputAssembly() = default; | ||
| 137 | 82 | ||
| 138 | Maxwell::PrimitiveTopology topology; | 83 | Maxwell::Blend::Factor SourceRGBFactor() const noexcept { |
| 139 | bool primitive_restart_enable; | 84 | return UnpackBlendFactor(factor_source_rgb.Value()); |
| 140 | float point_size; | 85 | } |
| 141 | 86 | ||
| 142 | std::size_t Hash() const noexcept; | 87 | Maxwell::Blend::Factor DestRGBFactor() const noexcept { |
| 88 | return UnpackBlendFactor(factor_dest_rgb.Value()); | ||
| 89 | } | ||
| 143 | 90 | ||
| 144 | bool operator==(const InputAssembly& rhs) const noexcept; | 91 | Maxwell::Blend::Factor SourceAlphaFactor() const noexcept { |
| 92 | return UnpackBlendFactor(factor_source_a.Value()); | ||
| 93 | } | ||
| 145 | 94 | ||
| 146 | bool operator!=(const InputAssembly& rhs) const noexcept { | 95 | Maxwell::Blend::Factor DestAlphaFactor() const noexcept { |
| 147 | return !operator==(rhs); | 96 | return UnpackBlendFactor(factor_dest_a.Value()); |
| 148 | } | 97 | } |
| 149 | }; | 98 | }; |
| 150 | 99 | ||
| 151 | struct Tessellation { | 100 | struct VertexInput { |
| 152 | constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive, | 101 | union Binding { |
| 153 | Maxwell::TessellationSpacing spacing, bool clockwise) | 102 | u16 raw; |
| 154 | : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing}, | 103 | BitField<0, 1, u16> enabled; |
| 155 | clockwise{clockwise} {} | 104 | BitField<1, 12, u16> stride; |
| 156 | Tessellation() = default; | 105 | }; |
| 157 | 106 | ||
| 158 | u32 patch_control_points; | 107 | union Attribute { |
| 159 | Maxwell::TessellationPrimitive primitive; | 108 | u32 raw; |
| 160 | Maxwell::TessellationSpacing spacing; | 109 | BitField<0, 1, u32> enabled; |
| 161 | bool clockwise; | 110 | BitField<1, 5, u32> buffer; |
| 162 | 111 | BitField<6, 14, u32> offset; | |
| 163 | std::size_t Hash() const noexcept; | 112 | BitField<20, 3, u32> type; |
| 164 | 113 | BitField<23, 6, u32> size; | |
| 165 | bool operator==(const Tessellation& rhs) const noexcept; | 114 | |
| 115 | constexpr Maxwell::VertexAttribute::Type Type() const noexcept { | ||
| 116 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); | ||
| 117 | } | ||
| 118 | |||
| 119 | constexpr Maxwell::VertexAttribute::Size Size() const noexcept { | ||
| 120 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); | ||
| 121 | } | ||
| 122 | }; | ||
| 123 | |||
| 124 | std::array<Binding, Maxwell::NumVertexArrays> bindings; | ||
| 125 | std::array<u32, Maxwell::NumVertexArrays> binding_divisors; | ||
| 126 | std::array<Attribute, Maxwell::NumVertexAttributes> attributes; | ||
| 127 | |||
| 128 | void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept { | ||
| 129 | auto& binding = bindings[index]; | ||
| 130 | binding.raw = 0; | ||
| 131 | binding.enabled.Assign(enabled ? 1 : 0); | ||
| 132 | binding.stride.Assign(stride); | ||
| 133 | binding_divisors[index] = divisor; | ||
| 134 | } | ||
| 166 | 135 | ||
| 167 | bool operator!=(const Tessellation& rhs) const noexcept { | 136 | void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset, |
| 168 | return !operator==(rhs); | 137 | Maxwell::VertexAttribute::Type type, |
| 138 | Maxwell::VertexAttribute::Size size) noexcept { | ||
| 139 | auto& attribute = attributes[index]; | ||
| 140 | attribute.raw = 0; | ||
| 141 | attribute.enabled.Assign(enabled ? 1 : 0); | ||
| 142 | attribute.buffer.Assign(buffer); | ||
| 143 | attribute.offset.Assign(offset); | ||
| 144 | attribute.type.Assign(static_cast<u32>(type)); | ||
| 145 | attribute.size.Assign(static_cast<u32>(size)); | ||
| 169 | } | 146 | } |
| 170 | }; | 147 | }; |
| 171 | 148 | ||
| 172 | struct Rasterizer { | 149 | struct Rasterizer { |
| 173 | constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, | 150 | union { |
| 174 | bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, | 151 | u32 raw; |
| 175 | Maxwell::FrontFace front_face) | 152 | BitField<0, 4, u32> topology; |
| 176 | : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, | 153 | BitField<4, 1, u32> primitive_restart_enable; |
| 177 | depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, | 154 | BitField<5, 1, u32> cull_enable; |
| 178 | cull_face{cull_face}, front_face{front_face} {} | 155 | BitField<6, 1, u32> depth_bias_enable; |
| 179 | Rasterizer() = default; | 156 | BitField<7, 1, u32> depth_clamp_enable; |
| 180 | 157 | BitField<8, 1, u32> ndc_minus_one_to_one; | |
| 181 | bool cull_enable; | 158 | BitField<9, 2, u32> cull_face; |
| 182 | bool depth_bias_enable; | 159 | BitField<11, 1, u32> front_face; |
| 183 | bool depth_clamp_enable; | 160 | BitField<12, 2, u32> polygon_mode; |
| 184 | bool ndc_minus_one_to_one; | 161 | BitField<14, 5, u32> patch_control_points_minus_one; |
| 185 | Maxwell::CullFace cull_face; | 162 | BitField<19, 2, u32> tessellation_primitive; |
| 186 | Maxwell::FrontFace front_face; | 163 | BitField<21, 2, u32> tessellation_spacing; |
| 187 | 164 | BitField<23, 1, u32> tessellation_clockwise; | |
| 188 | std::size_t Hash() const noexcept; | 165 | BitField<24, 1, u32> logic_op_enable; |
| 166 | BitField<25, 4, u32> logic_op; | ||
| 167 | }; | ||
| 168 | |||
| 169 | // TODO(Rodrigo): Move this to push constants | ||
| 170 | u32 point_size; | ||
| 171 | |||
| 172 | void Fill(const Maxwell& regs) noexcept; | ||
| 173 | |||
| 174 | constexpr Maxwell::PrimitiveTopology Topology() const noexcept { | ||
| 175 | return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); | ||
| 176 | } | ||
| 189 | 177 | ||
| 190 | bool operator==(const Rasterizer& rhs) const noexcept; | 178 | Maxwell::CullFace CullFace() const noexcept { |
| 179 | return UnpackCullFace(cull_face.Value()); | ||
| 180 | } | ||
| 191 | 181 | ||
| 192 | bool operator!=(const Rasterizer& rhs) const noexcept { | 182 | Maxwell::FrontFace FrontFace() const noexcept { |
| 193 | return !operator==(rhs); | 183 | return UnpackFrontFace(front_face.Value()); |
| 194 | } | 184 | } |
| 195 | }; | 185 | }; |
| 196 | 186 | ||
| 197 | struct DepthStencil { | 187 | struct DepthStencil { |
| 198 | constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable, | 188 | template <std::size_t Position> |
| 199 | bool depth_bounds_enable, bool stencil_enable, | 189 | union StencilFace { |
| 200 | Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil, | 190 | BitField<Position + 0, 3, u32> action_stencil_fail; |
| 201 | StencilFace back_stencil) | 191 | BitField<Position + 3, 3, u32> action_depth_fail; |
| 202 | : depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable}, | 192 | BitField<Position + 6, 3, u32> action_depth_pass; |
| 203 | depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable}, | 193 | BitField<Position + 9, 3, u32> test_func; |
| 204 | depth_test_function{depth_test_function}, front_stencil{front_stencil}, | 194 | |
| 205 | back_stencil{back_stencil} {} | 195 | Maxwell::StencilOp ActionStencilFail() const noexcept { |
| 206 | DepthStencil() = default; | 196 | return UnpackStencilOp(action_stencil_fail); |
| 207 | 197 | } | |
| 208 | bool depth_test_enable; | 198 | |
| 209 | bool depth_write_enable; | 199 | Maxwell::StencilOp ActionDepthFail() const noexcept { |
| 210 | bool depth_bounds_enable; | 200 | return UnpackStencilOp(action_depth_fail); |
| 211 | bool stencil_enable; | 201 | } |
| 212 | Maxwell::ComparisonOp depth_test_function; | 202 | |
| 213 | StencilFace front_stencil; | 203 | Maxwell::StencilOp ActionDepthPass() const noexcept { |
| 214 | StencilFace back_stencil; | 204 | return UnpackStencilOp(action_depth_pass); |
| 215 | 205 | } | |
| 216 | std::size_t Hash() const noexcept; | 206 | |
| 217 | 207 | Maxwell::ComparisonOp TestFunc() const noexcept { | |
| 218 | bool operator==(const DepthStencil& rhs) const noexcept; | 208 | return UnpackComparisonOp(test_func); |
| 219 | 209 | } | |
| 220 | bool operator!=(const DepthStencil& rhs) const noexcept { | 210 | }; |
| 221 | return !operator==(rhs); | 211 | |
| 212 | union { | ||
| 213 | u32 raw; | ||
| 214 | StencilFace<0> front; | ||
| 215 | StencilFace<12> back; | ||
| 216 | BitField<24, 1, u32> depth_test_enable; | ||
| 217 | BitField<25, 1, u32> depth_write_enable; | ||
| 218 | BitField<26, 1, u32> depth_bounds_enable; | ||
| 219 | BitField<27, 1, u32> stencil_enable; | ||
| 220 | BitField<28, 3, u32> depth_test_func; | ||
| 221 | }; | ||
| 222 | |||
| 223 | void Fill(const Maxwell& regs) noexcept; | ||
| 224 | |||
| 225 | Maxwell::ComparisonOp DepthTestFunc() const noexcept { | ||
| 226 | return UnpackComparisonOp(depth_test_func); | ||
| 222 | } | 227 | } |
| 223 | }; | 228 | }; |
| 224 | 229 | ||
| 225 | struct ColorBlending { | 230 | struct ColorBlending { |
| 226 | constexpr ColorBlending( | ||
| 227 | std::array<float, 4> blend_constants, std::size_t attachments_count, | ||
| 228 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments) | ||
| 229 | : attachments_count{attachments_count}, attachments{attachments} {} | ||
| 230 | ColorBlending() = default; | ||
| 231 | |||
| 232 | std::size_t attachments_count; | ||
| 233 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; | 231 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; |
| 234 | 232 | ||
| 235 | std::size_t Hash() const noexcept; | 233 | void Fill(const Maxwell& regs) noexcept; |
| 236 | |||
| 237 | bool operator==(const ColorBlending& rhs) const noexcept; | ||
| 238 | |||
| 239 | bool operator!=(const ColorBlending& rhs) const noexcept { | ||
| 240 | return !operator==(rhs); | ||
| 241 | } | ||
| 242 | }; | 234 | }; |
| 243 | 235 | ||
| 236 | VertexInput vertex_input; | ||
| 237 | Rasterizer rasterizer; | ||
| 238 | DepthStencil depth_stencil; | ||
| 239 | ColorBlending color_blending; | ||
| 240 | std::array<u8, 20> padding; | ||
| 241 | |||
| 244 | std::size_t Hash() const noexcept; | 242 | std::size_t Hash() const noexcept; |
| 245 | 243 | ||
| 246 | bool operator==(const FixedPipelineState& rhs) const noexcept; | 244 | bool operator==(const FixedPipelineState& rhs) const noexcept; |
| @@ -248,25 +246,11 @@ struct FixedPipelineState { | |||
| 248 | bool operator!=(const FixedPipelineState& rhs) const noexcept { | 246 | bool operator!=(const FixedPipelineState& rhs) const noexcept { |
| 249 | return !operator==(rhs); | 247 | return !operator==(rhs); |
| 250 | } | 248 | } |
| 251 | |||
| 252 | VertexInput vertex_input; | ||
| 253 | InputAssembly input_assembly; | ||
| 254 | Tessellation tessellation; | ||
| 255 | Rasterizer rasterizer; | ||
| 256 | DepthStencil depth_stencil; | ||
| 257 | ColorBlending color_blending; | ||
| 258 | }; | 249 | }; |
| 259 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>); | 250 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); |
| 260 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>); | ||
| 261 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>); | ||
| 262 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>); | ||
| 263 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>); | ||
| 264 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>); | ||
| 265 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>); | ||
| 266 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>); | ||
| 267 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>); | ||
| 268 | static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>); | ||
| 269 | static_assert(std::is_trivially_copyable_v<FixedPipelineState>); | 251 | static_assert(std::is_trivially_copyable_v<FixedPipelineState>); |
| 252 | static_assert(std::is_trivially_constructible_v<FixedPipelineState>); | ||
| 253 | static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned"); | ||
| 270 | 254 | ||
| 271 | FixedPipelineState GetFixedPipelineState(const Maxwell& regs); | 255 | FixedPipelineState GetFixedPipelineState(const Maxwell& regs); |
| 272 | 256 | ||
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp new file mode 100644 index 000000000..435c8c1b8 --- /dev/null +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp | |||
| @@ -0,0 +1,220 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #ifdef HAS_NSIGHT_AFTERMATH | ||
| 6 | |||
| 7 | #include <mutex> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #define VK_NO_PROTOTYPES | ||
| 16 | #include <vulkan/vulkan.h> | ||
| 17 | |||
| 18 | #include <GFSDK_Aftermath.h> | ||
| 19 | #include <GFSDK_Aftermath_Defines.h> | ||
| 20 | #include <GFSDK_Aftermath_GpuCrashDump.h> | ||
| 21 | #include <GFSDK_Aftermath_GpuCrashDumpDecoding.h> | ||
| 22 | |||
| 23 | #include "common/common_paths.h" | ||
| 24 | #include "common/common_types.h" | ||
| 25 | #include "common/file_util.h" | ||
| 26 | #include "common/logging/log.h" | ||
| 27 | #include "common/scope_exit.h" | ||
| 28 | |||
| 29 | #include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | |||
| 33 | static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; | ||
| 34 | |||
| 35 | NsightAftermathTracker::NsightAftermathTracker() = default; | ||
| 36 | |||
| 37 | NsightAftermathTracker::~NsightAftermathTracker() { | ||
| 38 | if (initialized) { | ||
| 39 | (void)GFSDK_Aftermath_DisableGpuCrashDumps(); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | bool NsightAftermathTracker::Initialize() { | ||
| 44 | if (!dl.Open(AFTERMATH_LIB_NAME)) { | ||
| 45 | LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); | ||
| 46 | return false; | ||
| 47 | } | ||
| 48 | |||
| 49 | if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", | ||
| 50 | &GFSDK_Aftermath_DisableGpuCrashDumps) || | ||
| 51 | !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", | ||
| 52 | &GFSDK_Aftermath_EnableGpuCrashDumps) || | ||
| 53 | !dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier", | ||
| 54 | &GFSDK_Aftermath_GetShaderDebugInfoIdentifier) || | ||
| 55 | !dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) || | ||
| 56 | !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder", | ||
| 57 | &GFSDK_Aftermath_GpuCrashDump_CreateDecoder) || | ||
| 58 | !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder", | ||
| 59 | &GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) || | ||
| 60 | !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON", | ||
| 61 | &GFSDK_Aftermath_GpuCrashDump_GenerateJSON) || | ||
| 62 | !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON", | ||
| 63 | &GFSDK_Aftermath_GpuCrashDump_GetJSON)) { | ||
| 64 | LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); | ||
| 65 | return false; | ||
| 66 | } | ||
| 67 | |||
| 68 | dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash"; | ||
| 69 | |||
| 70 | (void)FileUtil::DeleteDirRecursively(dump_dir); | ||
| 71 | if (!FileUtil::CreateDir(dump_dir)) { | ||
| 72 | LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); | ||
| 73 | return false; | ||
| 74 | } | ||
| 75 | |||
| 76 | if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( | ||
| 77 | GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, | ||
| 78 | GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, | ||
| 79 | ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { | ||
| 80 | LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); | ||
| 81 | return false; | ||
| 82 | } | ||
| 83 | |||
| 84 | LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); | ||
| 85 | |||
| 86 | initialized = true; | ||
| 87 | return true; | ||
| 88 | } | ||
| 89 | |||
| 90 | void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { | ||
| 91 | if (!initialized) { | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | |||
| 95 | std::vector<u32> spirv_copy = spirv; | ||
| 96 | GFSDK_Aftermath_SpirvCode shader; | ||
| 97 | shader.pData = spirv_copy.data(); | ||
| 98 | shader.size = static_cast<u32>(spirv_copy.size() * 4); | ||
| 99 | |||
| 100 | std::scoped_lock lock{mutex}; | ||
| 101 | |||
| 102 | GFSDK_Aftermath_ShaderHash hash; | ||
| 103 | if (!GFSDK_Aftermath_SUCCEED( | ||
| 104 | GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) { | ||
| 105 | LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module"); | ||
| 106 | return; | ||
| 107 | } | ||
| 108 | |||
| 109 | FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); | ||
| 110 | if (!file.IsOpen()) { | ||
| 111 | LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); | ||
| 112 | return; | ||
| 113 | } | ||
| 114 | if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) { | ||
| 115 | LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, | ||
| 121 | u32 gpu_crash_dump_size) { | ||
| 122 | std::scoped_lock lock{mutex}; | ||
| 123 | |||
| 124 | LOG_CRITICAL(Render_Vulkan, "called"); | ||
| 125 | |||
| 126 | GFSDK_Aftermath_GpuCrashDump_Decoder decoder; | ||
| 127 | if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder( | ||
| 128 | GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) { | ||
| 129 | LOG_ERROR(Render_Vulkan, "Failed to create decoder"); | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); }); | ||
| 133 | |||
| 134 | u32 json_size = 0; | ||
| 135 | if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON( | ||
| 136 | decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, | ||
| 137 | GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr, | ||
| 138 | this, &json_size))) { | ||
| 139 | LOG_ERROR(Render_Vulkan, "Failed to generate JSON"); | ||
| 140 | return; | ||
| 141 | } | ||
| 142 | std::vector<char> json(json_size); | ||
| 143 | if (!GFSDK_Aftermath_SUCCEED( | ||
| 144 | GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) { | ||
| 145 | LOG_ERROR(Render_Vulkan, "Failed to query JSON"); | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | |||
| 149 | const std::string base_name = [this] { | ||
| 150 | const int id = dump_id++; | ||
| 151 | if (id == 0) { | ||
| 152 | return fmt::format("{}/crash.nv-gpudmp", dump_dir); | ||
| 153 | } else { | ||
| 154 | return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id); | ||
| 155 | } | ||
| 156 | }(); | ||
| 157 | |||
| 158 | std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size); | ||
| 159 | if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { | ||
| 160 | LOG_ERROR(Render_Vulkan, "Failed to write dump file"); | ||
| 161 | return; | ||
| 162 | } | ||
| 163 | const std::string_view json_view(json.data(), json.size()); | ||
| 164 | if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { | ||
| 165 | LOG_ERROR(Render_Vulkan, "Failed to write JSON"); | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info, | ||
| 171 | u32 shader_debug_info_size) { | ||
| 172 | std::scoped_lock lock{mutex}; | ||
| 173 | |||
| 174 | GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier; | ||
| 175 | if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier( | ||
| 176 | GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) { | ||
| 177 | LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed"); | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | |||
| 181 | const std::string path = | ||
| 182 | fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); | ||
| 183 | FileUtil::IOFile file(path, "wb"); | ||
| 184 | if (!file.IsOpen()) { | ||
| 185 | LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); | ||
| 186 | return; | ||
| 187 | } | ||
| 188 | if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) != | ||
| 189 | shader_debug_info_size) { | ||
| 190 | LOG_ERROR(Render_Vulkan, "Failed to write file {}", path); | ||
| 191 | return; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | void NsightAftermathTracker::OnCrashDumpDescriptionCallback( | ||
| 196 | PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) { | ||
| 197 | add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu"); | ||
| 198 | } | ||
| 199 | |||
| 200 | void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump, | ||
| 201 | u32 gpu_crash_dump_size, void* user_data) { | ||
| 202 | static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump, | ||
| 203 | gpu_crash_dump_size); | ||
| 204 | } | ||
| 205 | |||
| 206 | void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info, | ||
| 207 | u32 shader_debug_info_size, void* user_data) { | ||
| 208 | static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback( | ||
| 209 | shader_debug_info, shader_debug_info_size); | ||
| 210 | } | ||
| 211 | |||
| 212 | void NsightAftermathTracker::CrashDumpDescriptionCallback( | ||
| 213 | PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) { | ||
| 214 | static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback( | ||
| 215 | add_description); | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Vulkan | ||
| 219 | |||
| 220 | #endif // HAS_NSIGHT_AFTERMATH | ||
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h new file mode 100644 index 000000000..afe7ae99e --- /dev/null +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <mutex> | ||
| 8 | #include <string> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #define VK_NO_PROTOTYPES | ||
| 12 | #include <vulkan/vulkan.h> | ||
| 13 | |||
| 14 | #ifdef HAS_NSIGHT_AFTERMATH | ||
| 15 | #include <GFSDK_Aftermath_Defines.h> | ||
| 16 | #include <GFSDK_Aftermath_GpuCrashDump.h> | ||
| 17 | #include <GFSDK_Aftermath_GpuCrashDumpDecoding.h> | ||
| 18 | #endif | ||
| 19 | |||
| 20 | #include "common/common_types.h" | ||
| 21 | #include "common/dynamic_library.h" | ||
| 22 | |||
| 23 | namespace Vulkan { | ||
| 24 | |||
| 25 | class NsightAftermathTracker { | ||
| 26 | public: | ||
| 27 | NsightAftermathTracker(); | ||
| 28 | ~NsightAftermathTracker(); | ||
| 29 | |||
| 30 | NsightAftermathTracker(const NsightAftermathTracker&) = delete; | ||
| 31 | NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete; | ||
| 32 | |||
| 33 | // Delete move semantics because Aftermath initialization uses a pointer to this. | ||
| 34 | NsightAftermathTracker(NsightAftermathTracker&&) = delete; | ||
| 35 | NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; | ||
| 36 | |||
| 37 | bool Initialize(); | ||
| 38 | |||
| 39 | void SaveShader(const std::vector<u32>& spirv) const; | ||
| 40 | |||
| 41 | private: | ||
| 42 | #ifdef HAS_NSIGHT_AFTERMATH | ||
| 43 | static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size, | ||
| 44 | void* user_data); | ||
| 45 | |||
| 46 | static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size, | ||
| 47 | void* user_data); | ||
| 48 | |||
| 49 | static void CrashDumpDescriptionCallback( | ||
| 50 | PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data); | ||
| 51 | |||
| 52 | void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size); | ||
| 53 | |||
| 54 | void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size); | ||
| 55 | |||
| 56 | void OnCrashDumpDescriptionCallback( | ||
| 57 | PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description); | ||
| 58 | |||
| 59 | mutable std::mutex mutex; | ||
| 60 | |||
| 61 | std::string dump_dir; | ||
| 62 | int dump_id = 0; | ||
| 63 | |||
| 64 | bool initialized = false; | ||
| 65 | |||
| 66 | Common::DynamicLibrary dl; | ||
| 67 | PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; | ||
| 68 | PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; | ||
| 69 | PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; | ||
| 70 | PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; | ||
| 71 | PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; | ||
| 72 | PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; | ||
| 73 | PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; | ||
| 74 | PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; | ||
| 75 | #endif | ||
| 76 | }; | ||
| 77 | |||
| 78 | #ifndef HAS_NSIGHT_AFTERMATH | ||
| 79 | inline NsightAftermathTracker::NsightAftermathTracker() = default; | ||
| 80 | inline NsightAftermathTracker::~NsightAftermathTracker() = default; | ||
| 81 | inline bool NsightAftermathTracker::Initialize() { | ||
| 82 | return false; | ||
| 83 | } | ||
| 84 | inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} | ||
| 85 | #endif | ||
| 86 | |||
| 87 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp new file mode 100644 index 000000000..5a472ba9b --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V quad_indexed.comp -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (local_size_x = 1024) in; | ||
| 17 | |||
| 18 | layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { | ||
| 19 | uint input_indexes[]; | ||
| 20 | }; | ||
| 21 | |||
| 22 | layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | ||
| 23 | uint output_indexes[]; | ||
| 24 | }; | ||
| 25 | |||
| 26 | layout (push_constant) uniform PushConstants { | ||
| 27 | uint base_vertex; | ||
| 28 | int index_shift; // 0: uint8, 1: uint16, 2: uint32 | ||
| 29 | }; | ||
| 30 | |||
| 31 | void main() { | ||
| 32 | int primitive = int(gl_GlobalInvocationID.x); | ||
| 33 | if (primitive * 6 >= output_indexes.length()) { | ||
| 34 | return; | ||
| 35 | } | ||
| 36 | |||
| 37 | int index_size = 8 << index_shift; | ||
| 38 | int flipped_shift = 2 - index_shift; | ||
| 39 | int mask = (1 << flipped_shift) - 1; | ||
| 40 | |||
| 41 | const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); | ||
| 42 | for (uint vertex = 0; vertex < 6; ++vertex) { | ||
| 43 | int offset = primitive * 4 + quad_swizzle[vertex]; | ||
| 44 | int int_offset = offset >> flipped_shift; | ||
| 45 | int bit_offset = (offset & mask) * index_size; | ||
| 46 | uint packed_input = input_indexes[int_offset]; | ||
| 47 | uint index = bitfieldExtract(packed_input, bit_offset, index_size); | ||
| 48 | output_indexes[primitive * 6 + vertex] = index + base_vertex; | ||
| 49 | } | ||
| 50 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 878a78755..7b0268033 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt | |||
| 135 | return entry; | 135 | return entry; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | VkPushConstantRange BuildQuadArrayPassPushConstantRange() { | 138 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 139 | VkPushConstantRange range; | 139 | VkPushConstantRange range; |
| 140 | range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | 140 | range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; |
| 141 | range.offset = 0; | 141 | range.offset = 0; |
| 142 | range.size = sizeof(u32); | 142 | range.size = static_cast<u32>(size); |
| 143 | return range; | 143 | return range; |
| 144 | } | 144 | } |
| 145 | 145 | ||
| @@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = { | |||
| 220 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, | 220 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, |
| 221 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | 221 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; |
| 222 | 222 | ||
| 223 | std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { | 223 | // Quad indexed SPIR-V module. Generated from the "shaders/" directory. |
| 224 | constexpr u8 QUAD_INDEXED_SPV[] = { | ||
| 225 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, | ||
| 226 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 227 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 228 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 229 | 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 230 | 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 231 | 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 232 | 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 233 | 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 234 | 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 235 | 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 236 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 237 | 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 238 | 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 239 | 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 240 | 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 241 | 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 242 | 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 243 | 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 244 | 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 245 | 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||
| 246 | 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 247 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 248 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 249 | 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, | ||
| 250 | 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 251 | 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 252 | 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 253 | 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, | ||
| 254 | 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 255 | 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 256 | 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 257 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 258 | 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 259 | 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 260 | 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 261 | 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||
| 262 | 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 263 | 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 264 | 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 265 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 266 | 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 267 | 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 268 | 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, | ||
| 269 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 270 | 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 271 | 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 272 | 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, | ||
| 273 | 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 274 | 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 275 | 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, | ||
| 276 | 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, | ||
| 277 | 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 278 | 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 279 | 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||
| 280 | 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 281 | 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, | ||
| 282 | 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 283 | 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 284 | 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, | ||
| 285 | 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, | ||
| 286 | 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, | ||
| 287 | 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 288 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 289 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, | ||
| 290 | 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, | ||
| 291 | 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, | ||
| 292 | 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, | ||
| 293 | 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 294 | 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 295 | 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 296 | 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||
| 297 | 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 298 | 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 299 | 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||
| 300 | 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 301 | 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 302 | 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 303 | 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 304 | 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||
| 305 | 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 306 | 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 307 | 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 308 | 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 309 | 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 310 | 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, | ||
| 311 | 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 312 | 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, | ||
| 313 | 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 314 | 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||
| 315 | 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, | ||
| 316 | 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 317 | 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, | ||
| 318 | 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 319 | 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||
| 320 | 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 321 | 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 322 | 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 323 | 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||
| 324 | 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||
| 325 | 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, | ||
| 326 | 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 327 | 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||
| 328 | 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 329 | 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, | ||
| 330 | 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, | ||
| 331 | 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 332 | 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 333 | 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, | ||
| 334 | 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 335 | 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, | ||
| 336 | 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, | ||
| 337 | 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, | ||
| 338 | 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, | ||
| 339 | 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 340 | 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 341 | 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, | ||
| 342 | 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, | ||
| 343 | 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, | ||
| 344 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||
| 345 | |||
| 346 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { | ||
| 224 | std::array<VkDescriptorSetLayoutBinding, 2> bindings; | 347 | std::array<VkDescriptorSetLayoutBinding, 2> bindings; |
| 225 | bindings[0].binding = 0; | 348 | bindings[0].binding = 0; |
| 226 | bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | 349 | bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; |
| @@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings( | |||
| 235 | return bindings; | 358 | return bindings; |
| 236 | } | 359 | } |
| 237 | 360 | ||
| 238 | VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { | 361 | VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { |
| 239 | VkDescriptorUpdateTemplateEntryKHR entry; | 362 | VkDescriptorUpdateTemplateEntryKHR entry; |
| 240 | entry.dstBinding = 0; | 363 | entry.dstBinding = 0; |
| 241 | entry.dstArrayElement = 0; | 364 | entry.dstArrayElement = 0; |
| @@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, | |||
| 337 | VKUpdateDescriptorQueue& update_descriptor_queue) | 460 | VKUpdateDescriptorQueue& update_descriptor_queue) |
| 338 | : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), | 461 | : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), |
| 339 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | 462 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), |
| 340 | BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), | 463 | BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), |
| 341 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | 464 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, |
| 342 | update_descriptor_queue{update_descriptor_queue} {} | 465 | update_descriptor_queue{update_descriptor_queue} {} |
| 343 | 466 | ||
| 344 | QuadArrayPass::~QuadArrayPass() = default; | 467 | QuadArrayPass::~QuadArrayPass() = default; |
| 345 | 468 | ||
| 346 | std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | 469 | std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { |
| 347 | const u32 num_triangle_vertices = num_vertices * 6 / 4; | 470 | const u32 num_triangle_vertices = (num_vertices / 4) * 6; |
| 348 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | 471 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); |
| 349 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | 472 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); |
| 350 | 473 | ||
| @@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 | |||
| 383 | Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, | 506 | Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, |
| 384 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, | 507 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, |
| 385 | VKUpdateDescriptorQueue& update_descriptor_queue) | 508 | VKUpdateDescriptorQueue& update_descriptor_queue) |
| 386 | : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), | 509 | : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), |
| 387 | BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), | 510 | BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), |
| 388 | uint8_pass), | 511 | uint8_pass), |
| 389 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | 512 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, |
| 390 | update_descriptor_queue{update_descriptor_queue} {} | 513 | update_descriptor_queue{update_descriptor_queue} {} |
| @@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff | |||
| 425 | return {*buffer.handle, 0}; | 548 | return {*buffer.handle, 0}; |
| 426 | } | 549 | } |
| 427 | 550 | ||
| 551 | QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, | ||
| 552 | VKDescriptorPool& descriptor_pool, | ||
| 553 | VKStagingBufferPool& staging_buffer_pool, | ||
| 554 | VKUpdateDescriptorQueue& update_descriptor_queue) | ||
| 555 | : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), | ||
| 556 | BuildInputOutputDescriptorUpdateTemplate(), | ||
| 557 | BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), | ||
| 558 | QUAD_INDEXED_SPV), | ||
| 559 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | ||
| 560 | update_descriptor_queue{update_descriptor_queue} {} | ||
| 561 | |||
| 562 | QuadIndexedPass::~QuadIndexedPass() = default; | ||
| 563 | |||
| 564 | std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | ||
| 565 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | ||
| 566 | VkBuffer src_buffer, u64 src_offset) { | ||
| 567 | const u32 index_shift = [index_format] { | ||
| 568 | switch (index_format) { | ||
| 569 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | ||
| 570 | return 0; | ||
| 571 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort: | ||
| 572 | return 1; | ||
| 573 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt: | ||
| 574 | return 2; | ||
| 575 | } | ||
| 576 | UNREACHABLE(); | ||
| 577 | return 2; | ||
| 578 | }(); | ||
| 579 | const u32 input_size = num_vertices << index_shift; | ||
| 580 | const u32 num_tri_vertices = (num_vertices / 4) * 6; | ||
| 581 | |||
| 582 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); | ||
| 583 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | ||
| 584 | |||
| 585 | update_descriptor_queue.Acquire(); | ||
| 586 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | ||
| 587 | update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); | ||
| 588 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); | ||
| 589 | |||
| 590 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 591 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, | ||
| 592 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | ||
| 593 | static constexpr u32 dispatch_size = 1024; | ||
| 594 | const std::array push_constants = {base_vertex, index_shift}; | ||
| 595 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 596 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 597 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | ||
| 598 | &push_constants); | ||
| 599 | cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | ||
| 600 | |||
| 601 | VkBufferMemoryBarrier barrier; | ||
| 602 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 603 | barrier.pNext = nullptr; | ||
| 604 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 605 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 606 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 607 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 608 | barrier.buffer = buffer; | ||
| 609 | barrier.offset = 0; | ||
| 610 | barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||
| 611 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 612 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | ||
| 613 | }); | ||
| 614 | return {*buffer.handle, 0}; | ||
| 615 | } | ||
| 616 | |||
| 428 | } // namespace Vulkan | 617 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index ec80c8683..26bf834de 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | 13 | #include "video_core/renderer_vulkan/wrapper.h" |
| 13 | 14 | ||
| @@ -73,4 +74,22 @@ private: | |||
| 73 | VKUpdateDescriptorQueue& update_descriptor_queue; | 74 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 74 | }; | 75 | }; |
| 75 | 76 | ||
| 77 | class QuadIndexedPass final : public VKComputePass { | ||
| 78 | public: | ||
| 79 | explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, | ||
| 80 | VKDescriptorPool& descriptor_pool, | ||
| 81 | VKStagingBufferPool& staging_buffer_pool, | ||
| 82 | VKUpdateDescriptorQueue& update_descriptor_queue); | ||
| 83 | ~QuadIndexedPass(); | ||
| 84 | |||
| 85 | std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | ||
| 86 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | ||
| 87 | u64 src_offset); | ||
| 88 | |||
| 89 | private: | ||
| 90 | VKScheduler& scheduler; | ||
| 91 | VKStagingBufferPool& staging_buffer_pool; | ||
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 93 | }; | ||
| 94 | |||
| 76 | } // namespace Vulkan | 95 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 23beafa4f..52566bb79 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -105,6 +105,8 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat | |||
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { |
| 108 | device.SaveShader(code); | ||
| 109 | |||
| 108 | VkShaderModuleCreateInfo ci; | 110 | VkShaderModuleCreateInfo ci; |
| 109 | ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; | 111 | ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; |
| 110 | ci.pNext = nullptr; | 112 | ci.pNext = nullptr; |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 52d29e49d..e90c76492 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <string_view> | 9 | #include <string_view> |
| 10 | #include <thread> | 10 | #include <thread> |
| 11 | #include <unordered_set> | 11 | #include <unordered_set> |
| 12 | #include <utility> | ||
| 12 | #include <vector> | 13 | #include <vector> |
| 13 | 14 | ||
| 14 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| @@ -167,6 +168,7 @@ bool VKDevice::Create() { | |||
| 167 | VkPhysicalDeviceFeatures2 features2; | 168 | VkPhysicalDeviceFeatures2 features2; |
| 168 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | 169 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; |
| 169 | features2.pNext = nullptr; | 170 | features2.pNext = nullptr; |
| 171 | const void* first_next = &features2; | ||
| 170 | void** next = &features2.pNext; | 172 | void** next = &features2.pNext; |
| 171 | 173 | ||
| 172 | auto& features = features2.features; | 174 | auto& features = features2.features; |
| @@ -296,7 +298,19 @@ bool VKDevice::Create() { | |||
| 296 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 298 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 297 | } | 299 | } |
| 298 | 300 | ||
| 299 | logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld); | 301 | VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; |
| 302 | if (nv_device_diagnostics_config) { | ||
| 303 | nsight_aftermath_tracker.Initialize(); | ||
| 304 | |||
| 305 | diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV; | ||
| 306 | diagnostics_nv.pNext = &features2; | ||
| 307 | diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | | ||
| 308 | VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | | ||
| 309 | VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV; | ||
| 310 | first_next = &diagnostics_nv; | ||
| 311 | } | ||
| 312 | |||
| 313 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); | ||
| 300 | if (!logical) { | 314 | if (!logical) { |
| 301 | LOG_ERROR(Render_Vulkan, "Failed to create logical device"); | 315 | LOG_ERROR(Render_Vulkan, "Failed to create logical device"); |
| 302 | return false; | 316 | return false; |
| @@ -344,17 +358,12 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla | |||
| 344 | void VKDevice::ReportLoss() const { | 358 | void VKDevice::ReportLoss() const { |
| 345 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); | 359 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); |
| 346 | 360 | ||
| 347 | // Wait some time to let the log flush | 361 | // Wait for the log to flush and for Nsight Aftermath to dump the results |
| 348 | std::this_thread::sleep_for(std::chrono::seconds{1}); | 362 | std::this_thread::sleep_for(std::chrono::seconds{3}); |
| 349 | 363 | } | |
| 350 | if (!nv_device_diagnostic_checkpoints) { | ||
| 351 | return; | ||
| 352 | } | ||
| 353 | 364 | ||
| 354 | [[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld); | 365 | void VKDevice::SaveShader(const std::vector<u32>& spirv) const { |
| 355 | // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be | 366 | nsight_aftermath_tracker.SaveShader(spirv); |
| 356 | // executed. It can be done on a debugger by evaluating the expression: | ||
| 357 | // *(VKGraphicsPipeline*)data[0] | ||
| 358 | } | 367 | } |
| 359 | 368 | ||
| 360 | bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { | 369 | bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { |
| @@ -527,8 +536,8 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 527 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, | 536 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, |
| 528 | false); | 537 | false); |
| 529 | if (Settings::values.renderer_debug) { | 538 | if (Settings::values.renderer_debug) { |
| 530 | Test(extension, nv_device_diagnostic_checkpoints, | 539 | Test(extension, nv_device_diagnostics_config, |
| 531 | VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); | 540 | VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); |
| 532 | } | 541 | } |
| 533 | } | 542 | } |
| 534 | 543 | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 60d64572a..a4d841e26 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" | ||
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | 14 | #include "video_core/renderer_vulkan/wrapper.h" |
| 14 | 15 | ||
| 15 | namespace Vulkan { | 16 | namespace Vulkan { |
| @@ -43,6 +44,9 @@ public: | |||
| 43 | /// Reports a device loss. | 44 | /// Reports a device loss. |
| 44 | void ReportLoss() const; | 45 | void ReportLoss() const; |
| 45 | 46 | ||
| 47 | /// Reports a shader to Nsight Aftermath. | ||
| 48 | void SaveShader(const std::vector<u32>& spirv) const; | ||
| 49 | |||
| 46 | /// Returns the dispatch loader with direct function pointers of the device. | 50 | /// Returns the dispatch loader with direct function pointers of the device. |
| 47 | const vk::DeviceDispatch& GetDispatchLoader() const { | 51 | const vk::DeviceDispatch& GetDispatchLoader() const { |
| 48 | return dld; | 52 | return dld; |
| @@ -173,11 +177,6 @@ public: | |||
| 173 | return ext_transform_feedback; | 177 | return ext_transform_feedback; |
| 174 | } | 178 | } |
| 175 | 179 | ||
| 176 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | ||
| 177 | bool IsNvDeviceDiagnosticCheckpoints() const { | ||
| 178 | return nv_device_diagnostic_checkpoints; | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Returns the vendor name reported from Vulkan. | 180 | /// Returns the vendor name reported from Vulkan. |
| 182 | std::string_view GetVendorName() const { | 181 | std::string_view GetVendorName() const { |
| 183 | return vendor_name; | 182 | return vendor_name; |
| @@ -233,7 +232,7 @@ private: | |||
| 233 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 232 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 234 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 233 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 235 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | 234 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. |
| 236 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | 235 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 237 | 236 | ||
| 238 | // Telemetry parameters | 237 | // Telemetry parameters |
| 239 | std::string vendor_name; ///< Device's driver name. | 238 | std::string vendor_name; ///< Device's driver name. |
| @@ -241,6 +240,9 @@ private: | |||
| 241 | 240 | ||
| 242 | /// Format properties dictionary. | 241 | /// Format properties dictionary. |
| 243 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 242 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
| 243 | |||
| 244 | /// Nsight Aftermath GPU crash tracker | ||
| 245 | NsightAftermathTracker nsight_aftermath_tracker; | ||
| 244 | }; | 246 | }; |
| 245 | 247 | ||
| 246 | } // namespace Vulkan | 248 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp new file mode 100644 index 000000000..a02be5487 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -0,0 +1,101 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <thread> | ||
| 7 | |||
| 8 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | |||
| 17 | InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed) | ||
| 18 | : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {} | ||
| 19 | |||
| 20 | InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, | ||
| 21 | u32 payload, bool is_stubbed) | ||
| 22 | : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {} | ||
| 23 | |||
| 24 | InnerFence::~InnerFence() = default; | ||
| 25 | |||
| 26 | void InnerFence::Queue() { | ||
| 27 | if (is_stubbed) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | ASSERT(!event); | ||
| 31 | |||
| 32 | event = device.GetLogical().CreateEvent(); | ||
| 33 | ticks = scheduler.Ticks(); | ||
| 34 | |||
| 35 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 36 | scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) { | ||
| 37 | cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); | ||
| 38 | }); | ||
| 39 | } | ||
| 40 | |||
| 41 | bool InnerFence::IsSignaled() const { | ||
| 42 | if (is_stubbed) { | ||
| 43 | return true; | ||
| 44 | } | ||
| 45 | ASSERT(event); | ||
| 46 | return IsEventSignalled(); | ||
| 47 | } | ||
| 48 | |||
| 49 | void InnerFence::Wait() { | ||
| 50 | if (is_stubbed) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | ASSERT(event); | ||
| 54 | |||
| 55 | if (ticks >= scheduler.Ticks()) { | ||
| 56 | scheduler.Flush(); | ||
| 57 | } | ||
| 58 | while (!IsEventSignalled()) { | ||
| 59 | std::this_thread::yield(); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | bool InnerFence::IsEventSignalled() const { | ||
| 64 | switch (const VkResult result = event.GetStatus()) { | ||
| 65 | case VK_EVENT_SET: | ||
| 66 | return true; | ||
| 67 | case VK_EVENT_RESET: | ||
| 68 | return false; | ||
| 69 | default: | ||
| 70 | throw vk::Exception(result); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 75 | const VKDevice& device, VKScheduler& scheduler, | ||
| 76 | VKTextureCache& texture_cache, VKBufferCache& buffer_cache, | ||
| 77 | VKQueryCache& query_cache) | ||
| 78 | : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), | ||
| 79 | device{device}, scheduler{scheduler} {} | ||
| 80 | |||
| 81 | Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { | ||
| 82 | return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); | ||
| 83 | } | ||
| 84 | |||
| 85 | Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | ||
| 86 | return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed); | ||
| 87 | } | ||
| 88 | |||
| 89 | void VKFenceManager::QueueFence(Fence& fence) { | ||
| 90 | fence->Queue(); | ||
| 91 | } | ||
| 92 | |||
| 93 | bool VKFenceManager::IsFenceSignaled(Fence& fence) const { | ||
| 94 | return fence->IsSignaled(); | ||
| 95 | } | ||
| 96 | |||
| 97 | void VKFenceManager::WaitFence(Fence& fence) { | ||
| 98 | fence->Wait(); | ||
| 99 | } | ||
| 100 | |||
| 101 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h new file mode 100644 index 000000000..04d07fe6a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "video_core/fence_manager.h" | ||
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 11 | |||
| 12 | namespace Core { | ||
| 13 | class System; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCore { | ||
| 17 | class RasterizerInterface; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace Vulkan { | ||
| 21 | |||
| 22 | class VKBufferCache; | ||
| 23 | class VKDevice; | ||
| 24 | class VKQueryCache; | ||
| 25 | class VKScheduler; | ||
| 26 | class VKTextureCache; | ||
| 27 | |||
| 28 | class InnerFence : public VideoCommon::FenceBase { | ||
| 29 | public: | ||
| 30 | explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, | ||
| 31 | bool is_stubbed); | ||
| 32 | explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, | ||
| 33 | u32 payload, bool is_stubbed); | ||
| 34 | ~InnerFence(); | ||
| 35 | |||
| 36 | void Queue(); | ||
| 37 | |||
| 38 | bool IsSignaled() const; | ||
| 39 | |||
| 40 | void Wait(); | ||
| 41 | |||
| 42 | private: | ||
| 43 | bool IsEventSignalled() const; | ||
| 44 | |||
| 45 | const VKDevice& device; | ||
| 46 | VKScheduler& scheduler; | ||
| 47 | vk::Event event; | ||
| 48 | u64 ticks = 0; | ||
| 49 | }; | ||
| 50 | using Fence = std::shared_ptr<InnerFence>; | ||
| 51 | |||
| 52 | using GenericFenceManager = | ||
| 53 | VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; | ||
| 54 | |||
| 55 | class VKFenceManager final : public GenericFenceManager { | ||
| 56 | public: | ||
| 57 | explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 58 | const VKDevice& device, VKScheduler& scheduler, | ||
| 59 | VKTextureCache& texture_cache, VKBufferCache& buffer_cache, | ||
| 60 | VKQueryCache& query_cache); | ||
| 61 | |||
| 62 | protected: | ||
| 63 | Fence CreateFence(u32 value, bool is_stubbed) override; | ||
| 64 | Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | ||
| 65 | void QueueFence(Fence& fence) override; | ||
| 66 | bool IsFenceSignaled(Fence& fence) const override; | ||
| 67 | void WaitFence(Fence& fence) override; | ||
| 68 | |||
| 69 | private: | ||
| 70 | const VKDevice& device; | ||
| 71 | VKScheduler& scheduler; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b540b838d..8332b42aa 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -26,12 +26,13 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); | |||
| 26 | 26 | ||
| 27 | namespace { | 27 | namespace { |
| 28 | 28 | ||
| 29 | VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { | 29 | template <class StencilFace> |
| 30 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { | ||
| 30 | VkStencilOpState state; | 31 | VkStencilOpState state; |
| 31 | state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail); | 32 | state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()); |
| 32 | state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass); | 33 | state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()); |
| 33 | state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail); | 34 | state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()); |
| 34 | state.compareOp = MaxwellToVK::ComparisonOp(face.test_func); | 35 | state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()); |
| 35 | state.compareMask = 0; | 36 | state.compareMask = 0; |
| 36 | state.writeMask = 0; | 37 | state.writeMask = 0; |
| 37 | state.reference = 0; | 38 | state.reference = 0; |
| @@ -147,6 +148,8 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | |||
| 147 | continue; | 148 | continue; |
| 148 | } | 149 | } |
| 149 | 150 | ||
| 151 | device.SaveShader(stage->code); | ||
| 152 | |||
| 150 | ci.codeSize = stage->code.size() * sizeof(u32); | 153 | ci.codeSize = stage->code.size() * sizeof(u32); |
| 151 | ci.pCode = stage->code.data(); | 154 | ci.pCode = stage->code.data(); |
| 152 | modules.push_back(device.GetLogical().CreateShaderModule(ci)); | 155 | modules.push_back(device.GetLogical().CreateShaderModule(ci)); |
| @@ -157,43 +160,47 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | |||
| 157 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | 160 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, |
| 158 | const SPIRVProgram& program) const { | 161 | const SPIRVProgram& program) const { |
| 159 | const auto& vi = fixed_state.vertex_input; | 162 | const auto& vi = fixed_state.vertex_input; |
| 160 | const auto& ia = fixed_state.input_assembly; | ||
| 161 | const auto& ds = fixed_state.depth_stencil; | 163 | const auto& ds = fixed_state.depth_stencil; |
| 162 | const auto& cd = fixed_state.color_blending; | 164 | const auto& cd = fixed_state.color_blending; |
| 163 | const auto& ts = fixed_state.tessellation; | ||
| 164 | const auto& rs = fixed_state.rasterizer; | 165 | const auto& rs = fixed_state.rasterizer; |
| 165 | 166 | ||
| 166 | std::vector<VkVertexInputBindingDescription> vertex_bindings; | 167 | std::vector<VkVertexInputBindingDescription> vertex_bindings; |
| 167 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | 168 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; |
| 168 | for (std::size_t i = 0; i < vi.num_bindings; ++i) { | 169 | for (std::size_t index = 0; index < std::size(vi.bindings); ++index) { |
| 169 | const auto& binding = vi.bindings[i]; | 170 | const auto& binding = vi.bindings[index]; |
| 170 | const bool instanced = binding.divisor != 0; | 171 | if (!binding.enabled) { |
| 172 | continue; | ||
| 173 | } | ||
| 174 | const bool instanced = vi.binding_divisors[index] != 0; | ||
| 171 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | 175 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; |
| 172 | 176 | ||
| 173 | auto& vertex_binding = vertex_bindings.emplace_back(); | 177 | auto& vertex_binding = vertex_bindings.emplace_back(); |
| 174 | vertex_binding.binding = binding.index; | 178 | vertex_binding.binding = static_cast<u32>(index); |
| 175 | vertex_binding.stride = binding.stride; | 179 | vertex_binding.stride = binding.stride; |
| 176 | vertex_binding.inputRate = rate; | 180 | vertex_binding.inputRate = rate; |
| 177 | 181 | ||
| 178 | if (instanced) { | 182 | if (instanced) { |
| 179 | auto& binding_divisor = vertex_binding_divisors.emplace_back(); | 183 | auto& binding_divisor = vertex_binding_divisors.emplace_back(); |
| 180 | binding_divisor.binding = binding.index; | 184 | binding_divisor.binding = static_cast<u32>(index); |
| 181 | binding_divisor.divisor = binding.divisor; | 185 | binding_divisor.divisor = vi.binding_divisors[index]; |
| 182 | } | 186 | } |
| 183 | } | 187 | } |
| 184 | 188 | ||
| 185 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; | 189 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; |
| 186 | const auto& input_attributes = program[0]->entries.attributes; | 190 | const auto& input_attributes = program[0]->entries.attributes; |
| 187 | for (std::size_t i = 0; i < vi.num_attributes; ++i) { | 191 | for (std::size_t index = 0; index < std::size(vi.attributes); ++index) { |
| 188 | const auto& attribute = vi.attributes[i]; | 192 | const auto& attribute = vi.attributes[index]; |
| 189 | if (input_attributes.find(attribute.index) == input_attributes.end()) { | 193 | if (!attribute.enabled) { |
| 194 | continue; | ||
| 195 | } | ||
| 196 | if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) { | ||
| 190 | // Skip attributes not used by the vertex shaders. | 197 | // Skip attributes not used by the vertex shaders. |
| 191 | continue; | 198 | continue; |
| 192 | } | 199 | } |
| 193 | auto& vertex_attribute = vertex_attributes.emplace_back(); | 200 | auto& vertex_attribute = vertex_attributes.emplace_back(); |
| 194 | vertex_attribute.location = attribute.index; | 201 | vertex_attribute.location = static_cast<u32>(index); |
| 195 | vertex_attribute.binding = attribute.buffer; | 202 | vertex_attribute.binding = attribute.buffer; |
| 196 | vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size); | 203 | vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()); |
| 197 | vertex_attribute.offset = attribute.offset; | 204 | vertex_attribute.offset = attribute.offset; |
| 198 | } | 205 | } |
| 199 | 206 | ||
| @@ -219,15 +226,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 219 | input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; | 226 | input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; |
| 220 | input_assembly_ci.pNext = nullptr; | 227 | input_assembly_ci.pNext = nullptr; |
| 221 | input_assembly_ci.flags = 0; | 228 | input_assembly_ci.flags = 0; |
| 222 | input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); | 229 | input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); |
| 223 | input_assembly_ci.primitiveRestartEnable = | 230 | input_assembly_ci.primitiveRestartEnable = |
| 224 | ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology); | 231 | rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); |
| 225 | 232 | ||
| 226 | VkPipelineTessellationStateCreateInfo tessellation_ci; | 233 | VkPipelineTessellationStateCreateInfo tessellation_ci; |
| 227 | tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; | 234 | tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; |
| 228 | tessellation_ci.pNext = nullptr; | 235 | tessellation_ci.pNext = nullptr; |
| 229 | tessellation_ci.flags = 0; | 236 | tessellation_ci.flags = 0; |
| 230 | tessellation_ci.patchControlPoints = ts.patch_control_points; | 237 | tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; |
| 231 | 238 | ||
| 232 | VkPipelineViewportStateCreateInfo viewport_ci; | 239 | VkPipelineViewportStateCreateInfo viewport_ci; |
| 233 | viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; | 240 | viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; |
| @@ -246,8 +253,8 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 246 | rasterization_ci.rasterizerDiscardEnable = VK_FALSE; | 253 | rasterization_ci.rasterizerDiscardEnable = VK_FALSE; |
| 247 | rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; | 254 | rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; |
| 248 | rasterization_ci.cullMode = | 255 | rasterization_ci.cullMode = |
| 249 | rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE; | 256 | rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; |
| 250 | rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face); | 257 | rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); |
| 251 | rasterization_ci.depthBiasEnable = rs.depth_bias_enable; | 258 | rasterization_ci.depthBiasEnable = rs.depth_bias_enable; |
| 252 | rasterization_ci.depthBiasConstantFactor = 0.0f; | 259 | rasterization_ci.depthBiasConstantFactor = 0.0f; |
| 253 | rasterization_ci.depthBiasClamp = 0.0f; | 260 | rasterization_ci.depthBiasClamp = 0.0f; |
| @@ -271,40 +278,38 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 271 | depth_stencil_ci.flags = 0; | 278 | depth_stencil_ci.flags = 0; |
| 272 | depth_stencil_ci.depthTestEnable = ds.depth_test_enable; | 279 | depth_stencil_ci.depthTestEnable = ds.depth_test_enable; |
| 273 | depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; | 280 | depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; |
| 274 | depth_stencil_ci.depthCompareOp = ds.depth_test_enable | 281 | depth_stencil_ci.depthCompareOp = |
| 275 | ? MaxwellToVK::ComparisonOp(ds.depth_test_function) | 282 | ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS; |
| 276 | : VK_COMPARE_OP_ALWAYS; | ||
| 277 | depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; | 283 | depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; |
| 278 | depth_stencil_ci.stencilTestEnable = ds.stencil_enable; | 284 | depth_stencil_ci.stencilTestEnable = ds.stencil_enable; |
| 279 | depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil); | 285 | depth_stencil_ci.front = GetStencilFaceState(ds.front); |
| 280 | depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil); | 286 | depth_stencil_ci.back = GetStencilFaceState(ds.back); |
| 281 | depth_stencil_ci.minDepthBounds = 0.0f; | 287 | depth_stencil_ci.minDepthBounds = 0.0f; |
| 282 | depth_stencil_ci.maxDepthBounds = 0.0f; | 288 | depth_stencil_ci.maxDepthBounds = 0.0f; |
| 283 | 289 | ||
| 284 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 290 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |
| 285 | const std::size_t num_attachments = | 291 | const std::size_t num_attachments = renderpass_params.color_attachments.size(); |
| 286 | std::min(cd.attachments_count, renderpass_params.color_attachments.size()); | 292 | for (std::size_t index = 0; index < num_attachments; ++index) { |
| 287 | for (std::size_t i = 0; i < num_attachments; ++i) { | 293 | static constexpr std::array COMPONENT_TABLE = { |
| 288 | static constexpr std::array component_table = { | ||
| 289 | VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, | 294 | VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, |
| 290 | VK_COLOR_COMPONENT_A_BIT}; | 295 | VK_COLOR_COMPONENT_A_BIT}; |
| 291 | const auto& blend = cd.attachments[i]; | 296 | const auto& blend = cd.attachments[index]; |
| 292 | 297 | ||
| 293 | VkColorComponentFlags color_components = 0; | 298 | VkColorComponentFlags color_components = 0; |
| 294 | for (std::size_t j = 0; j < component_table.size(); ++j) { | 299 | for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { |
| 295 | if (blend.components[j]) { | 300 | if (blend.Mask()[i]) { |
| 296 | color_components |= component_table[j]; | 301 | color_components |= COMPONENT_TABLE[i]; |
| 297 | } | 302 | } |
| 298 | } | 303 | } |
| 299 | 304 | ||
| 300 | VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i]; | 305 | VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index]; |
| 301 | attachment.blendEnable = blend.enable; | 306 | attachment.blendEnable = blend.enable != 0; |
| 302 | attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func); | 307 | attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()); |
| 303 | attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func); | 308 | attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()); |
| 304 | attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation); | 309 | attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()); |
| 305 | attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func); | 310 | attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()); |
| 306 | attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func); | 311 | attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()); |
| 307 | attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation); | 312 | attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()); |
| 308 | attachment.colorWriteMask = color_components; | 313 | attachment.colorWriteMask = color_components; |
| 309 | } | 314 | } |
| 310 | 315 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 90e3a8edd..91b1b16a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 207 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 207 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; |
| 208 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 208 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 209 | ASSERT(cpu_addr); | 209 | ASSERT(cpu_addr); |
| 210 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | 210 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; |
| 211 | if (!shader) { | 211 | if (!shader) { |
| 212 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 212 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; |
| 213 | 213 | ||
| @@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 218 | 218 | ||
| 219 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | 219 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, |
| 220 | std::move(code), stage_offset); | 220 | std::move(code), stage_offset); |
| 221 | Register(shader); | 221 | if (cpu_addr) { |
| 222 | Register(shader); | ||
| 223 | } else { | ||
| 224 | null_shader = shader; | ||
| 225 | } | ||
| 222 | } | 226 | } |
| 223 | shaders[index] = std::move(shader); | 227 | shaders[index] = std::move(shader); |
| 224 | } | 228 | } |
| @@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 261 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 265 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 262 | ASSERT(cpu_addr); | 266 | ASSERT(cpu_addr); |
| 263 | 267 | ||
| 264 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | 268 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; |
| 265 | if (!shader) { | 269 | if (!shader) { |
| 266 | // No shader found - create a new one | 270 | // No shader found - create a new one |
| 267 | const auto host_ptr = memory_manager.GetPointer(program_addr); | 271 | const auto host_ptr = memory_manager.GetPointer(program_addr); |
| @@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 271 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | 275 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, |
| 272 | program_addr, *cpu_addr, std::move(code), | 276 | program_addr, *cpu_addr, std::move(code), |
| 273 | kernel_main_offset); | 277 | kernel_main_offset); |
| 274 | Register(shader); | 278 | if (cpu_addr) { |
| 279 | Register(shader); | ||
| 280 | } else { | ||
| 281 | null_kernel = shader; | ||
| 282 | } | ||
| 275 | } | 283 | } |
| 276 | 284 | ||
| 277 | Specialization specialization; | 285 | Specialization specialization; |
| @@ -329,12 +337,14 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 329 | const auto& gpu = system.GPU().Maxwell3D(); | 337 | const auto& gpu = system.GPU().Maxwell3D(); |
| 330 | 338 | ||
| 331 | Specialization specialization; | 339 | Specialization specialization; |
| 332 | if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { | 340 | if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { |
| 333 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | 341 | float point_size; |
| 334 | specialization.point_size = fixed_state.input_assembly.point_size; | 342 | std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float)); |
| 343 | specialization.point_size = point_size; | ||
| 344 | ASSERT(point_size != 0.0f); | ||
| 335 | } | 345 | } |
| 336 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | 346 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |
| 337 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | 347 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); |
| 338 | } | 348 | } |
| 339 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 349 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 340 | 350 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7ccdb7083..602a0a340 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -182,6 +182,9 @@ private: | |||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | 182 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 183 | VKRenderPassCache& renderpass_cache; | 183 | VKRenderPassCache& renderpass_cache; |
| 184 | 184 | ||
| 185 | Shader null_shader{}; | ||
| 186 | Shader null_kernel{}; | ||
| 187 | |||
| 185 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 188 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 186 | 189 | ||
| 187 | GraphicsPipelineCacheKey last_graphics_key; | 190 | GraphicsPipelineCacheKey last_graphics_key; |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 0966c7ff7..813f7c162 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -113,8 +113,19 @@ u64 HostCounter::BlockingQuery() const { | |||
| 113 | if (ticks >= cache.Scheduler().Ticks()) { | 113 | if (ticks >= cache.Scheduler().Ticks()) { |
| 114 | cache.Scheduler().Flush(); | 114 | cache.Scheduler().Flush(); |
| 115 | } | 115 | } |
| 116 | return cache.Device().GetLogical().GetQueryResult<u64>( | 116 | u64 data; |
| 117 | query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | 117 | const VkResult result = cache.Device().GetLogical().GetQueryResults( |
| 118 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | ||
| 119 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | ||
| 120 | switch (result) { | ||
| 121 | case VK_SUCCESS: | ||
| 122 | return data; | ||
| 123 | case VK_ERROR_DEVICE_LOST: | ||
| 124 | cache.Device().ReportLoss(); | ||
| 125 | [[fallthrough]]; | ||
| 126 | default: | ||
| 127 | throw vk::Exception(result); | ||
| 128 | } | ||
| 118 | } | 129 | } |
| 119 | 130 | ||
| 120 | } // namespace Vulkan | 131 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4ca0febb8..8a1f57891 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "common/microprofile.h" | 17 | #include "common/microprofile.h" |
| 18 | #include "core/core.h" | 18 | #include "core/core.h" |
| 19 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | #include "core/settings.h" | ||
| 20 | #include "video_core/engines/kepler_compute.h" | 21 | #include "video_core/engines/kepler_compute.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| @@ -292,13 +293,16 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||
| 292 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | 293 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), |
| 293 | update_descriptor_queue(device, scheduler), renderpass_cache(device), | 294 | update_descriptor_queue(device, scheduler), renderpass_cache(device), |
| 294 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 295 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 296 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 295 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 297 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 296 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | 298 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, |
| 297 | staging_pool), | 299 | staging_pool), |
| 298 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, | 300 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 299 | renderpass_cache), | 301 | renderpass_cache), |
| 300 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | 302 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |
| 301 | sampler_cache(device), query_cache(system, *this, device, scheduler) { | 303 | sampler_cache(device), |
| 304 | fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), | ||
| 305 | query_cache(system, *this, device, scheduler) { | ||
| 302 | scheduler.SetQueryCache(query_cache); | 306 | scheduler.SetQueryCache(query_cache); |
| 303 | } | 307 | } |
| 304 | 308 | ||
| @@ -346,11 +350,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 346 | 350 | ||
| 347 | buffer_bindings.Bind(scheduler); | 351 | buffer_bindings.Bind(scheduler); |
| 348 | 352 | ||
| 349 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 350 | scheduler.Record( | ||
| 351 | [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); }); | ||
| 352 | } | ||
| 353 | |||
| 354 | BeginTransformFeedback(); | 353 | BeginTransformFeedback(); |
| 355 | 354 | ||
| 356 | const auto pipeline_layout = pipeline.GetLayout(); | 355 | const auto pipeline_layout = pipeline.GetLayout(); |
| @@ -364,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 364 | }); | 363 | }); |
| 365 | 364 | ||
| 366 | EndTransformFeedback(); | 365 | EndTransformFeedback(); |
| 366 | |||
| 367 | system.GPU().TickWork(); | ||
| 367 | } | 368 | } |
| 368 | 369 | ||
| 369 | void RasterizerVulkan::Clear() { | 370 | void RasterizerVulkan::Clear() { |
| @@ -477,11 +478,6 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 477 | TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 478 | TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 478 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); | 479 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); |
| 479 | 480 | ||
| 480 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 481 | scheduler.Record( | ||
| 482 | [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); }); | ||
| 483 | } | ||
| 484 | |||
| 485 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | 481 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, |
| 486 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | 482 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), |
| 487 | layout = pipeline.GetLayout(), | 483 | layout = pipeline.GetLayout(), |
| @@ -513,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 513 | query_cache.FlushRegion(addr, size); | 509 | query_cache.FlushRegion(addr, size); |
| 514 | } | 510 | } |
| 515 | 511 | ||
| 512 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | ||
| 513 | if (!Settings::IsGPULevelHigh()) { | ||
| 514 | return buffer_cache.MustFlushRegion(addr, size); | ||
| 515 | } | ||
| 516 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | ||
| 517 | } | ||
| 518 | |||
| 516 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 519 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| 517 | if (addr == 0 || size == 0) { | 520 | if (addr == 0 || size == 0) { |
| 518 | return; | 521 | return; |
| @@ -523,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | |||
| 523 | query_cache.InvalidateRegion(addr, size); | 526 | query_cache.InvalidateRegion(addr, size); |
| 524 | } | 527 | } |
| 525 | 528 | ||
| 529 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||
| 530 | if (addr == 0 || size == 0) { | ||
| 531 | return; | ||
| 532 | } | ||
| 533 | texture_cache.OnCPUWrite(addr, size); | ||
| 534 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 535 | buffer_cache.OnCPUWrite(addr, size); | ||
| 536 | query_cache.InvalidateRegion(addr, size); | ||
| 537 | } | ||
| 538 | |||
| 539 | void RasterizerVulkan::SyncGuestHost() { | ||
| 540 | texture_cache.SyncGuestHost(); | ||
| 541 | buffer_cache.SyncGuestHost(); | ||
| 542 | } | ||
| 543 | |||
| 544 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | ||
| 545 | auto& gpu{system.GPU()}; | ||
| 546 | if (!gpu.IsAsync()) { | ||
| 547 | gpu.MemoryManager().Write<u32>(addr, value); | ||
| 548 | return; | ||
| 549 | } | ||
| 550 | fence_manager.SignalSemaphore(addr, value); | ||
| 551 | } | ||
| 552 | |||
| 553 | void RasterizerVulkan::SignalSyncPoint(u32 value) { | ||
| 554 | auto& gpu{system.GPU()}; | ||
| 555 | if (!gpu.IsAsync()) { | ||
| 556 | gpu.IncrementSyncPoint(value); | ||
| 557 | return; | ||
| 558 | } | ||
| 559 | fence_manager.SignalSyncPoint(value); | ||
| 560 | } | ||
| 561 | |||
| 562 | void RasterizerVulkan::ReleaseFences() { | ||
| 563 | auto& gpu{system.GPU()}; | ||
| 564 | if (!gpu.IsAsync()) { | ||
| 565 | return; | ||
| 566 | } | ||
| 567 | fence_manager.WaitPendingFences(); | ||
| 568 | } | ||
| 569 | |||
| 526 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 570 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 527 | FlushRegion(addr, size); | 571 | FlushRegion(addr, size); |
| 528 | InvalidateRegion(addr, size); | 572 | InvalidateRegion(addr, size); |
| @@ -806,25 +850,29 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 806 | BufferBindings& buffer_bindings) { | 850 | BufferBindings& buffer_bindings) { |
| 807 | const auto& regs = system.GPU().Maxwell3D().regs; | 851 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 808 | 852 | ||
| 809 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { | 853 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { |
| 810 | const auto& attrib = regs.vertex_attrib_format[index]; | 854 | const auto& attrib = regs.vertex_attrib_format[index]; |
| 811 | if (!attrib.IsValid()) { | 855 | if (!attrib.IsValid()) { |
| 856 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); | ||
| 812 | continue; | 857 | continue; |
| 813 | } | 858 | } |
| 814 | 859 | ||
| 815 | const auto& buffer = regs.vertex_array[attrib.buffer]; | 860 | [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; |
| 816 | ASSERT(buffer.IsEnabled()); | 861 | ASSERT(buffer.IsEnabled()); |
| 817 | 862 | ||
| 818 | vertex_input.attributes[vertex_input.num_attributes++] = | 863 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), |
| 819 | FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, | 864 | attrib.size.Value()); |
| 820 | attrib.offset); | ||
| 821 | } | 865 | } |
| 822 | 866 | ||
| 823 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { | 867 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 824 | const auto& vertex_array = regs.vertex_array[index]; | 868 | const auto& vertex_array = regs.vertex_array[index]; |
| 825 | if (!vertex_array.IsEnabled()) { | 869 | if (!vertex_array.IsEnabled()) { |
| 870 | vertex_input.SetBinding(index, false, 0, 0); | ||
| 826 | continue; | 871 | continue; |
| 827 | } | 872 | } |
| 873 | vertex_input.SetBinding( | ||
| 874 | index, true, vertex_array.stride, | ||
| 875 | regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); | ||
| 828 | 876 | ||
| 829 | const GPUVAddr start{vertex_array.StartAddress()}; | 877 | const GPUVAddr start{vertex_array.StartAddress()}; |
| 830 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | 878 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; |
| @@ -832,10 +880,6 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 832 | ASSERT(end > start); | 880 | ASSERT(end > start); |
| 833 | const std::size_t size{end - start + 1}; | 881 | const std::size_t size{end - start + 1}; |
| 834 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); | 882 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); |
| 835 | |||
| 836 | vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( | ||
| 837 | index, vertex_array.stride, | ||
| 838 | regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); | ||
| 839 | buffer_bindings.AddVertexBinding(buffer, offset); | 883 | buffer_bindings.AddVertexBinding(buffer, offset); |
| 840 | } | 884 | } |
| 841 | } | 885 | } |
| @@ -844,18 +888,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 844 | bool is_indexed) { | 888 | bool is_indexed) { |
| 845 | const auto& regs = system.GPU().Maxwell3D().regs; | 889 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 846 | switch (regs.draw.topology) { | 890 | switch (regs.draw.topology) { |
| 847 | case Maxwell::PrimitiveTopology::Quads: | 891 | case Maxwell::PrimitiveTopology::Quads: { |
| 848 | if (params.is_indexed) { | 892 | if (!params.is_indexed) { |
| 849 | UNIMPLEMENTED(); | ||
| 850 | } else { | ||
| 851 | const auto [buffer, offset] = | 893 | const auto [buffer, offset] = |
| 852 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | 894 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); |
| 853 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | 895 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); |
| 854 | params.base_vertex = 0; | 896 | params.base_vertex = 0; |
| 855 | params.num_vertices = params.num_vertices * 6 / 4; | 897 | params.num_vertices = params.num_vertices * 6 / 4; |
| 856 | params.is_indexed = true; | 898 | params.is_indexed = true; |
| 899 | break; | ||
| 857 | } | 900 | } |
| 901 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 902 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 903 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( | ||
| 904 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | ||
| 905 | |||
| 906 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 907 | params.num_vertices = (params.num_vertices / 4) * 6; | ||
| 908 | params.base_vertex = 0; | ||
| 858 | break; | 909 | break; |
| 910 | } | ||
| 859 | default: { | 911 | default: { |
| 860 | if (!is_indexed) { | 912 | if (!is_indexed) { |
| 861 | break; | 913 | break; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 46037860a..2fa46b0cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 23 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 23 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 24 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| @@ -118,7 +119,13 @@ public: | |||
| 118 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 119 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 119 | void FlushAll() override; | 120 | void FlushAll() override; |
| 120 | void FlushRegion(VAddr addr, u64 size) override; | 121 | void FlushRegion(VAddr addr, u64 size) override; |
| 122 | bool MustFlushRegion(VAddr addr, u64 size) override; | ||
| 121 | void InvalidateRegion(VAddr addr, u64 size) override; | 123 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 124 | void OnCPUWrite(VAddr addr, u64 size) override; | ||
| 125 | void SyncGuestHost() override; | ||
| 126 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||
| 127 | void SignalSyncPoint(u32 value) override; | ||
| 128 | void ReleaseFences() override; | ||
| 122 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 129 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 123 | void FlushCommands() override; | 130 | void FlushCommands() override; |
| 124 | void TickFrame() override; | 131 | void TickFrame() override; |
| @@ -254,12 +261,14 @@ private: | |||
| 254 | VKUpdateDescriptorQueue update_descriptor_queue; | 261 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 255 | VKRenderPassCache renderpass_cache; | 262 | VKRenderPassCache renderpass_cache; |
| 256 | QuadArrayPass quad_array_pass; | 263 | QuadArrayPass quad_array_pass; |
| 264 | QuadIndexedPass quad_indexed_pass; | ||
| 257 | Uint8Pass uint8_pass; | 265 | Uint8Pass uint8_pass; |
| 258 | 266 | ||
| 259 | VKTextureCache texture_cache; | 267 | VKTextureCache texture_cache; |
| 260 | VKPipelineCache pipeline_cache; | 268 | VKPipelineCache pipeline_cache; |
| 261 | VKBufferCache buffer_cache; | 269 | VKBufferCache buffer_cache; |
| 262 | VKSamplerCache sampler_cache; | 270 | VKSamplerCache sampler_cache; |
| 271 | VKFenceManager fence_manager; | ||
| 263 | VKQueryCache query_cache; | 272 | VKQueryCache query_cache; |
| 264 | 273 | ||
| 265 | std::array<View, Maxwell::NumRenderTargets> color_attachments; | 274 | std::array<View, Maxwell::NumRenderTargets> color_attachments; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 900f551b3..ae7ba3eb5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -166,7 +166,15 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { | |||
| 166 | submit_info.pCommandBuffers = current_cmdbuf.address(); | 166 | submit_info.pCommandBuffers = current_cmdbuf.address(); |
| 167 | submit_info.signalSemaphoreCount = semaphore ? 1 : 0; | 167 | submit_info.signalSemaphoreCount = semaphore ? 1 : 0; |
| 168 | submit_info.pSignalSemaphores = &semaphore; | 168 | submit_info.pSignalSemaphores = &semaphore; |
| 169 | device.GetGraphicsQueue().Submit(submit_info, *current_fence); | 169 | switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { |
| 170 | case VK_SUCCESS: | ||
| 171 | break; | ||
| 172 | case VK_ERROR_DEVICE_LOST: | ||
| 173 | device.ReportLoss(); | ||
| 174 | [[fallthrough]]; | ||
| 175 | default: | ||
| 176 | vk::Check(result); | ||
| 177 | } | ||
| 170 | } | 178 | } |
| 171 | 179 | ||
| 172 | void VKScheduler::AllocateNewContext() { | 180 | void VKScheduler::AllocateNewContext() { |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 38a93a01a..868447af2 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <limits> | ||
| 6 | #include <optional> | 7 | #include <optional> |
| 7 | #include <tuple> | 8 | #include <tuple> |
| 8 | #include <vector> | 9 | #include <vector> |
| @@ -22,22 +23,38 @@ namespace { | |||
| 22 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; | 23 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; |
| 23 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | 24 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; |
| 24 | 25 | ||
| 25 | constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; | 26 | constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024; |
| 26 | 27 | ||
| 27 | std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, | 28 | /// Find a memory type with the passed requirements |
| 28 | VkMemoryPropertyFlags wanted) { | 29 | std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties, |
| 29 | const auto properties = device.GetPhysical().GetMemoryProperties(); | 30 | VkMemoryPropertyFlags wanted, |
| 30 | for (u32 i = 0; i < properties.memoryTypeCount; i++) { | 31 | u32 filter = std::numeric_limits<u32>::max()) { |
| 31 | if (!(filter & (1 << i))) { | 32 | for (u32 i = 0; i < properties.memoryTypeCount; ++i) { |
| 32 | continue; | 33 | const auto flags = properties.memoryTypes[i].propertyFlags; |
| 33 | } | 34 | if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) { |
| 34 | if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { | ||
| 35 | return i; | 35 | return i; |
| 36 | } | 36 | } |
| 37 | } | 37 | } |
| 38 | return std::nullopt; | 38 | return std::nullopt; |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | /// Get the preferred host visible memory type. | ||
| 42 | u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, | ||
| 43 | u32 filter = std::numeric_limits<u32>::max()) { | ||
| 44 | // Prefer device local host visible allocations. Both AMD and Nvidia now provide one. | ||
| 45 | // Otherwise search for a host visible allocation. | ||
| 46 | static constexpr auto HOST_MEMORY = | ||
| 47 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 48 | static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; | ||
| 49 | |||
| 50 | std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY); | ||
| 51 | if (!preferred_type) { | ||
| 52 | preferred_type = FindMemoryType(properties, HOST_MEMORY); | ||
| 53 | ASSERT_MSG(preferred_type, "No host visible and coherent memory type found"); | ||
| 54 | } | ||
| 55 | return preferred_type.value_or(0); | ||
| 56 | } | ||
| 57 | |||
| 41 | } // Anonymous namespace | 58 | } // Anonymous namespace |
| 42 | 59 | ||
| 43 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | 60 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, |
| @@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | |||
| 51 | VKStreamBuffer::~VKStreamBuffer() = default; | 68 | VKStreamBuffer::~VKStreamBuffer() = default; |
| 52 | 69 | ||
| 53 | std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | 70 | std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { |
| 54 | ASSERT(size <= STREAM_BUFFER_SIZE); | 71 | ASSERT(size <= stream_buffer_size); |
| 55 | mapped_size = size; | 72 | mapped_size = size; |
| 56 | 73 | ||
| 57 | if (alignment > 0) { | 74 | if (alignment > 0) { |
| @@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | |||
| 61 | WaitPendingOperations(offset); | 78 | WaitPendingOperations(offset); |
| 62 | 79 | ||
| 63 | bool invalidated = false; | 80 | bool invalidated = false; |
| 64 | if (offset + size > STREAM_BUFFER_SIZE) { | 81 | if (offset + size > stream_buffer_size) { |
| 65 | // The buffer would overflow, save the amount of used watches and reset the state. | 82 | // The buffer would overflow, save the amount of used watches and reset the state. |
| 66 | invalidation_mark = current_watch_cursor; | 83 | invalidation_mark = current_watch_cursor; |
| 67 | current_watch_cursor = 0; | 84 | current_watch_cursor = 0; |
| @@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) { | |||
| 98 | } | 115 | } |
| 99 | 116 | ||
| 100 | void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { | 117 | void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { |
| 118 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | ||
| 119 | const u32 preferred_type = GetMemoryType(memory_properties); | ||
| 120 | const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; | ||
| 121 | |||
| 122 | // Substract from the preferred heap size some bytes to avoid getting out of memory. | ||
| 123 | const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; | ||
| 124 | const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024; | ||
| 125 | |||
| 101 | VkBufferCreateInfo buffer_ci; | 126 | VkBufferCreateInfo buffer_ci; |
| 102 | buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; | 127 | buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 103 | buffer_ci.pNext = nullptr; | 128 | buffer_ci.pNext = nullptr; |
| 104 | buffer_ci.flags = 0; | 129 | buffer_ci.flags = 0; |
| 105 | buffer_ci.size = STREAM_BUFFER_SIZE; | 130 | buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); |
| 106 | buffer_ci.usage = usage; | 131 | buffer_ci.usage = usage; |
| 107 | buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | 132 | buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 108 | buffer_ci.queueFamilyIndexCount = 0; | 133 | buffer_ci.queueFamilyIndexCount = 0; |
| 109 | buffer_ci.pQueueFamilyIndices = nullptr; | 134 | buffer_ci.pQueueFamilyIndices = nullptr; |
| 110 | 135 | ||
| 111 | const auto& dev = device.GetLogical(); | 136 | buffer = device.GetLogical().CreateBuffer(buffer_ci); |
| 112 | buffer = dev.CreateBuffer(buffer_ci); | 137 | |
| 113 | 138 | const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); | |
| 114 | const auto& dld = device.GetDispatchLoader(); | 139 | const u32 required_flags = requirements.memoryTypeBits; |
| 115 | const auto requirements = dev.GetBufferMemoryRequirements(*buffer); | 140 | stream_buffer_size = static_cast<u64>(requirements.size); |
| 116 | // Prefer device local host visible allocations (this should hit AMD's pinned memory). | 141 | |
| 117 | auto type = | ||
| 118 | FindMemoryType(device, requirements.memoryTypeBits, | ||
| 119 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | | ||
| 120 | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); | ||
| 121 | if (!type) { | ||
| 122 | // Otherwise search for a host visible allocation. | ||
| 123 | type = FindMemoryType(device, requirements.memoryTypeBits, | ||
| 124 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | | ||
| 125 | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); | ||
| 126 | ASSERT_MSG(type, "No host visible and coherent memory type found"); | ||
| 127 | } | ||
| 128 | VkMemoryAllocateInfo memory_ai; | 142 | VkMemoryAllocateInfo memory_ai; |
| 129 | memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; | 143 | memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; |
| 130 | memory_ai.pNext = nullptr; | 144 | memory_ai.pNext = nullptr; |
| 131 | memory_ai.allocationSize = requirements.size; | 145 | memory_ai.allocationSize = requirements.size; |
| 132 | memory_ai.memoryTypeIndex = *type; | 146 | memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); |
| 133 | 147 | ||
| 134 | memory = dev.AllocateMemory(memory_ai); | 148 | memory = device.GetLogical().AllocateMemory(memory_ai); |
| 135 | buffer.BindMemory(*memory, 0); | 149 | buffer.BindMemory(*memory, 0); |
| 136 | } | 150 | } |
| 137 | 151 | ||
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 58ce8b973..dfddf7ad6 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -56,8 +56,9 @@ private: | |||
| 56 | const VKDevice& device; ///< Vulkan device manager. | 56 | const VKDevice& device; ///< Vulkan device manager. |
| 57 | VKScheduler& scheduler; ///< Command scheduler. | 57 | VKScheduler& scheduler; ///< Command scheduler. |
| 58 | 58 | ||
| 59 | vk::Buffer buffer; ///< Mapped buffer. | 59 | vk::Buffer buffer; ///< Mapped buffer. |
| 60 | vk::DeviceMemory memory; ///< Memory allocation. | 60 | vk::DeviceMemory memory; ///< Memory allocation. |
| 61 | u64 stream_buffer_size{}; ///< Stream buffer size. | ||
| 61 | 62 | ||
| 62 | u64 offset{}; ///< Buffer iterator. | 63 | u64 offset{}; ///< Buffer iterator. |
| 63 | u64 mapped_size{}; ///< Size reserved for the current copy. | 64 | u64 mapped_size{}; ///< Size reserved for the current copy. |
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 9b94dfff1..539f3c974 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp | |||
| @@ -61,9 +61,9 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 61 | X(vkCmdPipelineBarrier); | 61 | X(vkCmdPipelineBarrier); |
| 62 | X(vkCmdPushConstants); | 62 | X(vkCmdPushConstants); |
| 63 | X(vkCmdSetBlendConstants); | 63 | X(vkCmdSetBlendConstants); |
| 64 | X(vkCmdSetCheckpointNV); | ||
| 65 | X(vkCmdSetDepthBias); | 64 | X(vkCmdSetDepthBias); |
| 66 | X(vkCmdSetDepthBounds); | 65 | X(vkCmdSetDepthBounds); |
| 66 | X(vkCmdSetEvent); | ||
| 67 | X(vkCmdSetScissor); | 67 | X(vkCmdSetScissor); |
| 68 | X(vkCmdSetStencilCompareMask); | 68 | X(vkCmdSetStencilCompareMask); |
| 69 | X(vkCmdSetStencilReference); | 69 | X(vkCmdSetStencilReference); |
| @@ -76,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 76 | X(vkCreateDescriptorPool); | 76 | X(vkCreateDescriptorPool); |
| 77 | X(vkCreateDescriptorSetLayout); | 77 | X(vkCreateDescriptorSetLayout); |
| 78 | X(vkCreateDescriptorUpdateTemplateKHR); | 78 | X(vkCreateDescriptorUpdateTemplateKHR); |
| 79 | X(vkCreateEvent); | ||
| 79 | X(vkCreateFence); | 80 | X(vkCreateFence); |
| 80 | X(vkCreateFramebuffer); | 81 | X(vkCreateFramebuffer); |
| 81 | X(vkCreateGraphicsPipelines); | 82 | X(vkCreateGraphicsPipelines); |
| @@ -94,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 94 | X(vkDestroyDescriptorPool); | 95 | X(vkDestroyDescriptorPool); |
| 95 | X(vkDestroyDescriptorSetLayout); | 96 | X(vkDestroyDescriptorSetLayout); |
| 96 | X(vkDestroyDescriptorUpdateTemplateKHR); | 97 | X(vkDestroyDescriptorUpdateTemplateKHR); |
| 98 | X(vkDestroyEvent); | ||
| 97 | X(vkDestroyFence); | 99 | X(vkDestroyFence); |
| 98 | X(vkDestroyFramebuffer); | 100 | X(vkDestroyFramebuffer); |
| 99 | X(vkDestroyImage); | 101 | X(vkDestroyImage); |
| @@ -113,10 +115,10 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 113 | X(vkFreeMemory); | 115 | X(vkFreeMemory); |
| 114 | X(vkGetBufferMemoryRequirements); | 116 | X(vkGetBufferMemoryRequirements); |
| 115 | X(vkGetDeviceQueue); | 117 | X(vkGetDeviceQueue); |
| 118 | X(vkGetEventStatus); | ||
| 116 | X(vkGetFenceStatus); | 119 | X(vkGetFenceStatus); |
| 117 | X(vkGetImageMemoryRequirements); | 120 | X(vkGetImageMemoryRequirements); |
| 118 | X(vkGetQueryPoolResults); | 121 | X(vkGetQueryPoolResults); |
| 119 | X(vkGetQueueCheckpointDataNV); | ||
| 120 | X(vkMapMemory); | 122 | X(vkMapMemory); |
| 121 | X(vkQueueSubmit); | 123 | X(vkQueueSubmit); |
| 122 | X(vkResetFences); | 124 | X(vkResetFences); |
| @@ -271,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) | |||
| 271 | dld.vkFreeMemory(device, handle, nullptr); | 273 | dld.vkFreeMemory(device, handle, nullptr); |
| 272 | } | 274 | } |
| 273 | 275 | ||
| 276 | void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept { | ||
| 277 | dld.vkDestroyEvent(device, handle, nullptr); | ||
| 278 | } | ||
| 279 | |||
| 274 | void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { | 280 | void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { |
| 275 | dld.vkDestroyFence(device, handle, nullptr); | 281 | dld.vkDestroyFence(device, handle, nullptr); |
| 276 | } | 282 | } |
| @@ -409,17 +415,6 @@ DebugCallback Instance::TryCreateDebugCallback( | |||
| 409 | return DebugCallback(messenger, handle, *dld); | 415 | return DebugCallback(messenger, handle, *dld); |
| 410 | } | 416 | } |
| 411 | 417 | ||
| 412 | std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const { | ||
| 413 | if (!dld.vkGetQueueCheckpointDataNV) { | ||
| 414 | return {}; | ||
| 415 | } | ||
| 416 | u32 num; | ||
| 417 | dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr); | ||
| 418 | std::vector<VkCheckpointDataNV> checkpoints(num); | ||
| 419 | dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data()); | ||
| 420 | return checkpoints; | ||
| 421 | } | ||
| 422 | |||
| 423 | void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | 418 | void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { |
| 424 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); | 419 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); |
| 425 | } | 420 | } |
| @@ -469,12 +464,11 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { | |||
| 469 | } | 464 | } |
| 470 | 465 | ||
| 471 | Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, | 466 | Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, |
| 472 | Span<const char*> enabled_extensions, | 467 | Span<const char*> enabled_extensions, const void* next, |
| 473 | const VkPhysicalDeviceFeatures2& enabled_features, | ||
| 474 | DeviceDispatch& dld) noexcept { | 468 | DeviceDispatch& dld) noexcept { |
| 475 | VkDeviceCreateInfo ci; | 469 | VkDeviceCreateInfo ci; |
| 476 | ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; | 470 | ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; |
| 477 | ci.pNext = &enabled_features; | 471 | ci.pNext = next; |
| 478 | ci.flags = 0; | 472 | ci.flags = 0; |
| 479 | ci.queueCreateInfoCount = queues_ci.size(); | 473 | ci.queueCreateInfoCount = queues_ci.size(); |
| 480 | ci.pQueueCreateInfos = queues_ci.data(); | 474 | ci.pQueueCreateInfos = queues_ci.data(); |
| @@ -613,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons | |||
| 613 | return ShaderModule(object, handle, *dld); | 607 | return ShaderModule(object, handle, *dld); |
| 614 | } | 608 | } |
| 615 | 609 | ||
| 610 | Event Device::CreateEvent() const { | ||
| 611 | VkEventCreateInfo ci; | ||
| 612 | ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; | ||
| 613 | ci.pNext = nullptr; | ||
| 614 | ci.flags = 0; | ||
| 615 | VkEvent object; | ||
| 616 | Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); | ||
| 617 | return Event(object, handle, *dld); | ||
| 618 | } | ||
| 619 | |||
| 616 | SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { | 620 | SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { |
| 617 | VkSwapchainKHR object; | 621 | VkSwapchainKHR object; |
| 618 | Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); | 622 | Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); |
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index fb3657819..bda16a2cb 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h | |||
| @@ -197,9 +197,9 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 197 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; | 197 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; |
| 198 | PFN_vkCmdPushConstants vkCmdPushConstants; | 198 | PFN_vkCmdPushConstants vkCmdPushConstants; |
| 199 | PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; | 199 | PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; |
| 200 | PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV; | ||
| 201 | PFN_vkCmdSetDepthBias vkCmdSetDepthBias; | 200 | PFN_vkCmdSetDepthBias vkCmdSetDepthBias; |
| 202 | PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; | 201 | PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; |
| 202 | PFN_vkCmdSetEvent vkCmdSetEvent; | ||
| 203 | PFN_vkCmdSetScissor vkCmdSetScissor; | 203 | PFN_vkCmdSetScissor vkCmdSetScissor; |
| 204 | PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; | 204 | PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; |
| 205 | PFN_vkCmdSetStencilReference vkCmdSetStencilReference; | 205 | PFN_vkCmdSetStencilReference vkCmdSetStencilReference; |
| @@ -212,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 212 | PFN_vkCreateDescriptorPool vkCreateDescriptorPool; | 212 | PFN_vkCreateDescriptorPool vkCreateDescriptorPool; |
| 213 | PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; | 213 | PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; |
| 214 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; | 214 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; |
| 215 | PFN_vkCreateEvent vkCreateEvent; | ||
| 215 | PFN_vkCreateFence vkCreateFence; | 216 | PFN_vkCreateFence vkCreateFence; |
| 216 | PFN_vkCreateFramebuffer vkCreateFramebuffer; | 217 | PFN_vkCreateFramebuffer vkCreateFramebuffer; |
| 217 | PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; | 218 | PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; |
| @@ -230,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 230 | PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; | 231 | PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; |
| 231 | PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; | 232 | PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; |
| 232 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; | 233 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; |
| 234 | PFN_vkDestroyEvent vkDestroyEvent; | ||
| 233 | PFN_vkDestroyFence vkDestroyFence; | 235 | PFN_vkDestroyFence vkDestroyFence; |
| 234 | PFN_vkDestroyFramebuffer vkDestroyFramebuffer; | 236 | PFN_vkDestroyFramebuffer vkDestroyFramebuffer; |
| 235 | PFN_vkDestroyImage vkDestroyImage; | 237 | PFN_vkDestroyImage vkDestroyImage; |
| @@ -249,10 +251,10 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 249 | PFN_vkFreeMemory vkFreeMemory; | 251 | PFN_vkFreeMemory vkFreeMemory; |
| 250 | PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; | 252 | PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; |
| 251 | PFN_vkGetDeviceQueue vkGetDeviceQueue; | 253 | PFN_vkGetDeviceQueue vkGetDeviceQueue; |
| 254 | PFN_vkGetEventStatus vkGetEventStatus; | ||
| 252 | PFN_vkGetFenceStatus vkGetFenceStatus; | 255 | PFN_vkGetFenceStatus vkGetFenceStatus; |
| 253 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; | 256 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; |
| 254 | PFN_vkGetQueryPoolResults vkGetQueryPoolResults; | 257 | PFN_vkGetQueryPoolResults vkGetQueryPoolResults; |
| 255 | PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV; | ||
| 256 | PFN_vkMapMemory vkMapMemory; | 258 | PFN_vkMapMemory vkMapMemory; |
| 257 | PFN_vkQueueSubmit vkQueueSubmit; | 259 | PFN_vkQueueSubmit vkQueueSubmit; |
| 258 | PFN_vkResetFences vkResetFences; | 260 | PFN_vkResetFences vkResetFences; |
| @@ -281,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept; | |||
| 281 | void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; | 283 | void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; |
| 282 | void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; | 284 | void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; |
| 283 | void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; | 285 | void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; |
| 286 | void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept; | ||
| 284 | void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; | 287 | void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; |
| 285 | void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; | 288 | void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; |
| 286 | void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; | 289 | void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; |
| @@ -567,12 +570,8 @@ public: | |||
| 567 | /// Construct a queue handle. | 570 | /// Construct a queue handle. |
| 568 | constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} | 571 | constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} |
| 569 | 572 | ||
| 570 | /// Returns the checkpoint data. | 573 | VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept { |
| 571 | /// @note Returns an empty vector when the function pointer is not present. | 574 | return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence); |
| 572 | std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const; | ||
| 573 | |||
| 574 | void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const { | ||
| 575 | Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence)); | ||
| 576 | } | 575 | } |
| 577 | 576 | ||
| 578 | VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { | 577 | VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { |
| @@ -654,13 +653,21 @@ public: | |||
| 654 | std::vector<VkImage> GetImages() const; | 653 | std::vector<VkImage> GetImages() const; |
| 655 | }; | 654 | }; |
| 656 | 655 | ||
| 656 | class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> { | ||
| 657 | using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; | ||
| 658 | |||
| 659 | public: | ||
| 660 | VkResult GetStatus() const noexcept { | ||
| 661 | return dld->vkGetEventStatus(owner, handle); | ||
| 662 | } | ||
| 663 | }; | ||
| 664 | |||
| 657 | class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { | 665 | class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { |
| 658 | using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; | 666 | using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; |
| 659 | 667 | ||
| 660 | public: | 668 | public: |
| 661 | static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, | 669 | static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, |
| 662 | Span<const char*> enabled_extensions, | 670 | Span<const char*> enabled_extensions, const void* next, |
| 663 | const VkPhysicalDeviceFeatures2& enabled_features, | ||
| 664 | DeviceDispatch& dld) noexcept; | 671 | DeviceDispatch& dld) noexcept; |
| 665 | 672 | ||
| 666 | Queue GetQueue(u32 family_index) const noexcept; | 673 | Queue GetQueue(u32 family_index) const noexcept; |
| @@ -702,6 +709,8 @@ public: | |||
| 702 | 709 | ||
| 703 | ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; | 710 | ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; |
| 704 | 711 | ||
| 712 | Event CreateEvent() const; | ||
| 713 | |||
| 705 | SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; | 714 | SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; |
| 706 | 715 | ||
| 707 | DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; | 716 | DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; |
| @@ -734,18 +743,11 @@ public: | |||
| 734 | dld->vkResetQueryPoolEXT(handle, query_pool, first, count); | 743 | dld->vkResetQueryPoolEXT(handle, query_pool, first, count); |
| 735 | } | 744 | } |
| 736 | 745 | ||
| 737 | void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, | 746 | VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, |
| 738 | void* data, VkDeviceSize stride, VkQueryResultFlags flags) const { | 747 | void* data, VkDeviceSize stride, VkQueryResultFlags flags) const |
| 739 | Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, | 748 | noexcept { |
| 740 | flags)); | 749 | return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, |
| 741 | } | 750 | flags); |
| 742 | |||
| 743 | template <typename T> | ||
| 744 | T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const { | ||
| 745 | static_assert(std::is_trivially_copyable_v<T>); | ||
| 746 | T value; | ||
| 747 | GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags); | ||
| 748 | return value; | ||
| 749 | } | 751 | } |
| 750 | }; | 752 | }; |
| 751 | 753 | ||
| @@ -920,10 +922,6 @@ public: | |||
| 920 | dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); | 922 | dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); |
| 921 | } | 923 | } |
| 922 | 924 | ||
| 923 | void SetCheckpointNV(const void* checkpoint_marker) const noexcept { | ||
| 924 | dld->vkCmdSetCheckpointNV(handle, checkpoint_marker); | ||
| 925 | } | ||
| 926 | |||
| 927 | void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { | 925 | void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { |
| 928 | dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); | 926 | dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); |
| 929 | } | 927 | } |
| @@ -956,6 +954,10 @@ public: | |||
| 956 | dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); | 954 | dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); |
| 957 | } | 955 | } |
| 958 | 956 | ||
| 957 | void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept { | ||
| 958 | dld->vkCmdSetEvent(handle, event, stage_flags); | ||
| 959 | } | ||
| 960 | |||
| 959 | void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, | 961 | void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, |
| 960 | const VkDeviceSize* offsets, | 962 | const VkDeviceSize* offsets, |
| 961 | const VkDeviceSize* sizes) const noexcept { | 963 | const VkDeviceSize* sizes) const noexcept { |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 6d313963a..e00a3fb70 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -587,8 +587,6 @@ bool TryQuery(CFGRebuildState& state) { | |||
| 587 | return true; | 587 | return true; |
| 588 | } | 588 | } |
| 589 | 589 | ||
| 590 | } // Anonymous namespace | ||
| 591 | |||
| 592 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { | 590 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { |
| 593 | const auto get_expr = ([&](const Condition& cond) -> Expr { | 591 | const auto get_expr = ([&](const Condition& cond) -> Expr { |
| 594 | Expr result{}; | 592 | Expr result{}; |
| @@ -655,6 +653,8 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 655 | state.manager->Decompile(); | 653 | state.manager->Decompile(); |
| 656 | } | 654 | } |
| 657 | 655 | ||
| 656 | } // Anonymous namespace | ||
| 657 | |||
| 658 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 658 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 659 | const CompilerSettings& settings, | 659 | const CompilerSettings& settings, |
| 660 | Registry& registry) { | 660 | Registry& registry) { |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8112ead3e..9392f065b 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -479,7 +479,7 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& | |||
| 479 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); | 479 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 480 | 480 | ||
| 481 | const GlobalMemoryBase descriptor{index, offset}; | 481 | const GlobalMemoryBase descriptor{index, offset}; |
| 482 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 482 | const auto& entry = used_global_memory.try_emplace(descriptor).first; |
| 483 | auto& usage = entry->second; | 483 | auto& usage = entry->second; |
| 484 | usage.is_written |= is_write; | 484 | usage.is_written |= is_write; |
| 485 | usage.is_read |= is_read; | 485 | usage.is_read |= is_read; |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 6c4a1358b..e68f1d305 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -139,7 +139,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 139 | } | 139 | } |
| 140 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | 140 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); |
| 141 | 141 | ||
| 142 | const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare}; | 142 | const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare, false}; |
| 143 | const Sampler& sampler = *GetSampler(instr.sampler, info); | 143 | const Sampler& sampler = *GetSampler(instr.sampler, info); |
| 144 | 144 | ||
| 145 | Node4 values; | 145 | Node4 values; |
| @@ -171,13 +171,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 171 | const auto coord_count = GetCoordCount(texture_type); | 171 | const auto coord_count = GetCoordCount(texture_type); |
| 172 | Node index_var{}; | 172 | Node index_var{}; |
| 173 | const Sampler* sampler = | 173 | const Sampler* sampler = |
| 174 | is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) | 174 | is_bindless |
| 175 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); | 175 | ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false, false}}) |
| 176 | : GetSampler(instr.sampler, {{texture_type, is_array, false, false}}); | ||
| 176 | Node4 values; | 177 | Node4 values; |
| 177 | if (sampler == nullptr) { | 178 | if (sampler == nullptr) { |
| 178 | for (u32 element = 0; element < values.size(); ++element) { | 179 | std::generate(values.begin(), values.end(), [] { return Immediate(0); }); |
| 179 | values[element] = Immediate(0); | ||
| 180 | } | ||
| 181 | WriteTexInstructionFloat(bb, instr, values); | 180 | WriteTexInstructionFloat(bb, instr, values); |
| 182 | break; | 181 | break; |
| 183 | } | 182 | } |
| @@ -269,7 +268,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 269 | "NDV is not implemented"); | 268 | "NDV is not implemented"); |
| 270 | 269 | ||
| 271 | auto texture_type = instr.tmml.texture_type.Value(); | 270 | auto texture_type = instr.tmml.texture_type.Value(); |
| 272 | const bool is_array = instr.tmml.array != 0; | ||
| 273 | Node index_var{}; | 271 | Node index_var{}; |
| 274 | const Sampler* sampler = | 272 | const Sampler* sampler = |
| 275 | is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); | 273 | is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); |
| @@ -593,8 +591,9 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 593 | ++parameter_register; | 591 | ++parameter_register; |
| 594 | } | 592 | } |
| 595 | 593 | ||
| 596 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | 594 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, |
| 597 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | 595 | lod_bias_enabled, 4, 5); |
| 596 | const auto coord_count = std::get<0>(coord_counts); | ||
| 598 | // If enabled arrays index is always stored in the gpr8 field | 597 | // If enabled arrays index is always stored in the gpr8 field |
| 599 | const u64 array_register = instr.gpr8.Value(); | 598 | const u64 array_register = instr.gpr8.Value(); |
| 600 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | 599 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used |
| @@ -632,8 +631,10 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 632 | const bool lod_bias_enabled = | 631 | const bool lod_bias_enabled = |
| 633 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 632 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
| 634 | 633 | ||
| 635 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | 634 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, |
| 636 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | 635 | lod_bias_enabled, 4, 4); |
| 636 | const auto coord_count = std::get<0>(coord_counts); | ||
| 637 | |||
| 637 | // If enabled arrays index is always stored in the gpr8 field | 638 | // If enabled arrays index is always stored in the gpr8 field |
| 638 | const u64 array_register = instr.gpr8.Value(); | 639 | const u64 array_register = instr.gpr8.Value(); |
| 639 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | 640 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index c5ab21f56..79e10ffbb 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -192,6 +192,22 @@ public: | |||
| 192 | index = index_; | 192 | index = index_; |
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 196 | is_memory_marked = is_memory_marked_; | ||
| 197 | } | ||
| 198 | |||
| 199 | bool IsMemoryMarked() const { | ||
| 200 | return is_memory_marked; | ||
| 201 | } | ||
| 202 | |||
| 203 | void SetSyncPending(bool is_sync_pending_) { | ||
| 204 | is_sync_pending = is_sync_pending_; | ||
| 205 | } | ||
| 206 | |||
| 207 | bool IsSyncPending() const { | ||
| 208 | return is_sync_pending; | ||
| 209 | } | ||
| 210 | |||
| 195 | void MarkAsPicked(bool is_picked_) { | 211 | void MarkAsPicked(bool is_picked_) { |
| 196 | is_picked = is_picked_; | 212 | is_picked = is_picked_; |
| 197 | } | 213 | } |
| @@ -303,6 +319,8 @@ private: | |||
| 303 | bool is_target{}; | 319 | bool is_target{}; |
| 304 | bool is_registered{}; | 320 | bool is_registered{}; |
| 305 | bool is_picked{}; | 321 | bool is_picked{}; |
| 322 | bool is_memory_marked{}; | ||
| 323 | bool is_sync_pending{}; | ||
| 306 | u32 index{NO_RT}; | 324 | u32 index{NO_RT}; |
| 307 | u64 modification_tick{}; | 325 | u64 modification_tick{}; |
| 308 | }; | 326 | }; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ca08fd1..cf6bd005a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <list> | ||
| 9 | #include <memory> | 10 | #include <memory> |
| 10 | #include <mutex> | 11 | #include <mutex> |
| 11 | #include <set> | 12 | #include <set> |
| @@ -62,6 +63,30 @@ public: | |||
| 62 | } | 63 | } |
| 63 | } | 64 | } |
| 64 | 65 | ||
| 66 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 67 | std::lock_guard lock{mutex}; | ||
| 68 | |||
| 69 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | ||
| 70 | if (surface->IsMemoryMarked()) { | ||
| 71 | UnmarkMemory(surface); | ||
| 72 | surface->SetSyncPending(true); | ||
| 73 | marked_for_unregister.emplace_back(surface); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | void SyncGuestHost() { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | for (const auto& surface : marked_for_unregister) { | ||
| 82 | if (surface->IsRegistered()) { | ||
| 83 | surface->SetSyncPending(false); | ||
| 84 | Unregister(surface); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | marked_for_unregister.clear(); | ||
| 88 | } | ||
| 89 | |||
| 65 | /** | 90 | /** |
| 66 | * Guarantees that rendertargets don't unregister themselves if the | 91 | * Guarantees that rendertargets don't unregister themselves if the |
| 67 | * collide. Protection is currently only done on 3D slices. | 92 | * collide. Protection is currently only done on 3D slices. |
| @@ -85,10 +110,20 @@ public: | |||
| 85 | return a->GetModificationTick() < b->GetModificationTick(); | 110 | return a->GetModificationTick() < b->GetModificationTick(); |
| 86 | }); | 111 | }); |
| 87 | for (const auto& surface : surfaces) { | 112 | for (const auto& surface : surfaces) { |
| 113 | mutex.unlock(); | ||
| 88 | FlushSurface(surface); | 114 | FlushSurface(surface); |
| 115 | mutex.lock(); | ||
| 89 | } | 116 | } |
| 90 | } | 117 | } |
| 91 | 118 | ||
| 119 | bool MustFlushRegion(VAddr addr, std::size_t size) { | ||
| 120 | std::lock_guard lock{mutex}; | ||
| 121 | |||
| 122 | const auto surfaces = GetSurfacesInRegion(addr, size); | ||
| 123 | return std::any_of(surfaces.cbegin(), surfaces.cend(), | ||
| 124 | [](const TSurface& surface) { return surface->IsModified(); }); | ||
| 125 | } | ||
| 126 | |||
| 92 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | 127 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, |
| 93 | const VideoCommon::Shader::Sampler& entry) { | 128 | const VideoCommon::Shader::Sampler& entry) { |
| 94 | std::lock_guard lock{mutex}; | 129 | std::lock_guard lock{mutex}; |
| @@ -206,8 +241,14 @@ public: | |||
| 206 | 241 | ||
| 207 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, | 242 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, |
| 208 | SurfaceParams::CreateForFramebuffer(system, index), true); | 243 | SurfaceParams::CreateForFramebuffer(system, index), true); |
| 209 | if (render_targets[index].target) | 244 | if (render_targets[index].target) { |
| 210 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | 245 | auto& surface = render_targets[index].target; |
| 246 | surface->MarkAsRenderTarget(false, NO_RT); | ||
| 247 | const auto& cr_params = surface->GetSurfaceParams(); | ||
| 248 | if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) { | ||
| 249 | AsyncFlushSurface(surface); | ||
| 250 | } | ||
| 251 | } | ||
| 211 | render_targets[index].target = surface_view.first; | 252 | render_targets[index].target = surface_view.first; |
| 212 | render_targets[index].view = surface_view.second; | 253 | render_targets[index].view = surface_view.second; |
| 213 | if (render_targets[index].target) | 254 | if (render_targets[index].target) |
| @@ -284,6 +325,34 @@ public: | |||
| 284 | return ++ticks; | 325 | return ++ticks; |
| 285 | } | 326 | } |
| 286 | 327 | ||
| 328 | void CommitAsyncFlushes() { | ||
| 329 | committed_flushes.push_back(uncommitted_flushes); | ||
| 330 | uncommitted_flushes.reset(); | ||
| 331 | } | ||
| 332 | |||
| 333 | bool HasUncommittedFlushes() const { | ||
| 334 | return uncommitted_flushes != nullptr; | ||
| 335 | } | ||
| 336 | |||
| 337 | bool ShouldWaitAsyncFlushes() const { | ||
| 338 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | ||
| 339 | } | ||
| 340 | |||
| 341 | void PopAsyncFlushes() { | ||
| 342 | if (committed_flushes.empty()) { | ||
| 343 | return; | ||
| 344 | } | ||
| 345 | auto& flush_list = committed_flushes.front(); | ||
| 346 | if (!flush_list) { | ||
| 347 | committed_flushes.pop_front(); | ||
| 348 | return; | ||
| 349 | } | ||
| 350 | for (TSurface& surface : *flush_list) { | ||
| 351 | FlushSurface(surface); | ||
| 352 | } | ||
| 353 | committed_flushes.pop_front(); | ||
| 354 | } | ||
| 355 | |||
| 287 | protected: | 356 | protected: |
| 288 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 357 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 289 | bool is_astc_supported) | 358 | bool is_astc_supported) |
| @@ -345,9 +414,20 @@ protected: | |||
| 345 | surface->SetCpuAddr(*cpu_addr); | 414 | surface->SetCpuAddr(*cpu_addr); |
| 346 | RegisterInnerCache(surface); | 415 | RegisterInnerCache(surface); |
| 347 | surface->MarkAsRegistered(true); | 416 | surface->MarkAsRegistered(true); |
| 417 | surface->SetMemoryMarked(true); | ||
| 348 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | 418 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); |
| 349 | } | 419 | } |
| 350 | 420 | ||
| 421 | void UnmarkMemory(TSurface surface) { | ||
| 422 | if (!surface->IsMemoryMarked()) { | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | const std::size_t size = surface->GetSizeInBytes(); | ||
| 426 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 427 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 428 | surface->SetMemoryMarked(false); | ||
| 429 | } | ||
| 430 | |||
| 351 | void Unregister(TSurface surface) { | 431 | void Unregister(TSurface surface) { |
| 352 | if (guard_render_targets && surface->IsProtected()) { | 432 | if (guard_render_targets && surface->IsProtected()) { |
| 353 | return; | 433 | return; |
| @@ -355,9 +435,11 @@ protected: | |||
| 355 | if (!guard_render_targets && surface->IsRenderTarget()) { | 435 | if (!guard_render_targets && surface->IsRenderTarget()) { |
| 356 | ManageRenderTargetUnregister(surface); | 436 | ManageRenderTargetUnregister(surface); |
| 357 | } | 437 | } |
| 358 | const std::size_t size = surface->GetSizeInBytes(); | 438 | UnmarkMemory(surface); |
| 359 | const VAddr cpu_addr = surface->GetCpuAddr(); | 439 | if (surface->IsSyncPending()) { |
| 360 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 440 | marked_for_unregister.remove(surface); |
| 441 | surface->SetSyncPending(false); | ||
| 442 | } | ||
| 361 | UnregisterInnerCache(surface); | 443 | UnregisterInnerCache(surface); |
| 362 | surface->MarkAsRegistered(false); | 444 | surface->MarkAsRegistered(false); |
| 363 | ReserveSurface(surface->GetSurfaceParams(), surface); | 445 | ReserveSurface(surface->GetSurfaceParams(), surface); |
| @@ -417,7 +499,7 @@ private: | |||
| 417 | **/ | 499 | **/ |
| 418 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 500 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, |
| 419 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 501 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 420 | if (Settings::values.use_accurate_gpu_emulation) { | 502 | if (Settings::IsGPULevelExtreme()) { |
| 421 | return RecycleStrategy::Flush; | 503 | return RecycleStrategy::Flush; |
| 422 | } | 504 | } |
| 423 | // 3D Textures decision | 505 | // 3D Textures decision |
| @@ -461,7 +543,7 @@ private: | |||
| 461 | } | 543 | } |
| 462 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | 544 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { |
| 463 | case RecycleStrategy::Ignore: { | 545 | case RecycleStrategy::Ignore: { |
| 464 | return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); | 546 | return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme()); |
| 465 | } | 547 | } |
| 466 | case RecycleStrategy::Flush: { | 548 | case RecycleStrategy::Flush: { |
| 467 | std::sort(overlaps.begin(), overlaps.end(), | 549 | std::sort(overlaps.begin(), overlaps.end(), |
| @@ -509,7 +591,7 @@ private: | |||
| 509 | } | 591 | } |
| 510 | const auto& final_params = new_surface->GetSurfaceParams(); | 592 | const auto& final_params = new_surface->GetSurfaceParams(); |
| 511 | if (cr_params.type != final_params.type) { | 593 | if (cr_params.type != final_params.type) { |
| 512 | if (Settings::values.use_accurate_gpu_emulation) { | 594 | if (Settings::IsGPULevelExtreme()) { |
| 513 | BufferCopy(current_surface, new_surface); | 595 | BufferCopy(current_surface, new_surface); |
| 514 | } | 596 | } |
| 515 | } else { | 597 | } else { |
| @@ -598,7 +680,7 @@ private: | |||
| 598 | if (passed_tests == 0) { | 680 | if (passed_tests == 0) { |
| 599 | return {}; | 681 | return {}; |
| 600 | // In Accurate GPU all tests should pass, else we recycle | 682 | // In Accurate GPU all tests should pass, else we recycle |
| 601 | } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { | 683 | } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { |
| 602 | return {}; | 684 | return {}; |
| 603 | } | 685 | } |
| 604 | for (const auto& surface : overlaps) { | 686 | for (const auto& surface : overlaps) { |
| @@ -668,7 +750,7 @@ private: | |||
| 668 | for (const auto& surface : overlaps) { | 750 | for (const auto& surface : overlaps) { |
| 669 | if (!surface->MatchTarget(params.target)) { | 751 | if (!surface->MatchTarget(params.target)) { |
| 670 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | 752 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| 671 | if (Settings::values.use_accurate_gpu_emulation) { | 753 | if (Settings::IsGPULevelExtreme()) { |
| 672 | return std::nullopt; | 754 | return std::nullopt; |
| 673 | } | 755 | } |
| 674 | Unregister(surface); | 756 | Unregister(surface); |
| @@ -1106,6 +1188,13 @@ private: | |||
| 1106 | TView view; | 1188 | TView view; |
| 1107 | }; | 1189 | }; |
| 1108 | 1190 | ||
| 1191 | void AsyncFlushSurface(TSurface& surface) { | ||
| 1192 | if (!uncommitted_flushes) { | ||
| 1193 | uncommitted_flushes = std::make_shared<std::list<TSurface>>(); | ||
| 1194 | } | ||
| 1195 | uncommitted_flushes->push_back(surface); | ||
| 1196 | } | ||
| 1197 | |||
| 1109 | VideoCore::RasterizerInterface& rasterizer; | 1198 | VideoCore::RasterizerInterface& rasterizer; |
| 1110 | 1199 | ||
| 1111 | FormatLookupTable format_lookup_table; | 1200 | FormatLookupTable format_lookup_table; |
| @@ -1150,6 +1239,11 @@ private: | |||
| 1150 | std::unordered_map<u32, TSurface> invalid_cache; | 1239 | std::unordered_map<u32, TSurface> invalid_cache; |
| 1151 | std::vector<u8> invalid_memory; | 1240 | std::vector<u8> invalid_memory; |
| 1152 | 1241 | ||
| 1242 | std::list<TSurface> marked_for_unregister; | ||
| 1243 | |||
| 1244 | std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; | ||
| 1245 | std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; | ||
| 1246 | |||
| 1153 | StagingCache staging_cache; | 1247 | StagingCache staging_cache; |
| 1154 | std::recursive_mutex mutex; | 1248 | std::recursive_mutex mutex; |
| 1155 | }; | 1249 | }; |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7df5f1452..fae8638ec 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "video_core/textures/texture.h" | 11 | #include "video_core/textures/texture.h" |
| 12 | 12 | ||
| 13 | namespace Tegra::Texture { | 13 | namespace Tegra::Texture { |
| 14 | namespace { | ||
| 14 | 15 | ||
| 15 | /** | 16 | /** |
| 16 | * This table represents the internal swizzle of a gob, | 17 | * This table represents the internal swizzle of a gob, |
| @@ -174,6 +175,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 174 | } | 175 | } |
| 175 | } | 176 | } |
| 176 | 177 | ||
| 178 | } // Anonymous namespace | ||
| 179 | |||
| 177 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 180 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, |
| 178 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 181 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, |
| 179 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 182 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e5eac3f3b..9f2d6d308 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -56,8 +56,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 56 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 56 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 57 | u32 offset_x, u32 offset_y); | 57 | u32 offset_x, u32 offset_y); |
| 58 | 58 | ||
| 59 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 59 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, |
| 60 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 60 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); |
| 61 | u8* swizzle_data); | ||
| 62 | 61 | ||
| 63 | } // namespace Tegra::Texture | 62 | } // namespace Tegra::Texture |
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp index 6aff38735..4bc8ee726 100644 --- a/src/yuzu/applets/profile_select.cpp +++ b/src/yuzu/applets/profile_select.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "yuzu/applets/profile_select.h" | 17 | #include "yuzu/applets/profile_select.h" |
| 18 | #include "yuzu/main.h" | 18 | #include "yuzu/main.h" |
| 19 | 19 | ||
| 20 | namespace { | ||
| 20 | QString FormatUserEntryText(const QString& username, Common::UUID uuid) { | 21 | QString FormatUserEntryText(const QString& username, Common::UUID uuid) { |
| 21 | return QtProfileSelectionDialog::tr( | 22 | return QtProfileSelectionDialog::tr( |
| 22 | "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. " | 23 | "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. " |
| @@ -41,6 +42,7 @@ QPixmap GetIcon(Common::UUID uuid) { | |||
| 41 | 42 | ||
| 42 | return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); | 43 | return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); |
| 43 | } | 44 | } |
| 45 | } // Anonymous namespace | ||
| 44 | 46 | ||
| 45 | QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent) | 47 | QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent) |
| 46 | : QDialog(parent), profile_manager(std::make_unique<Service::Account::ProfileManager>()) { | 48 | : QDialog(parent), profile_manager(std::make_unique<Service::Account::ProfileManager>()) { |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 3b9ab38dd..196a3a116 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -532,6 +532,8 @@ void Config::ReadDebuggingValues() { | |||
| 532 | Settings::values.reporting_services = | 532 | Settings::values.reporting_services = |
| 533 | ReadSetting(QStringLiteral("reporting_services"), false).toBool(); | 533 | ReadSetting(QStringLiteral("reporting_services"), false).toBool(); |
| 534 | Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); | 534 | Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); |
| 535 | Settings::values.disable_cpu_opt = | ||
| 536 | ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool(); | ||
| 535 | 537 | ||
| 536 | qt_config->endGroup(); | 538 | qt_config->endGroup(); |
| 537 | } | 539 | } |
| @@ -637,8 +639,8 @@ void Config::ReadRendererValues() { | |||
| 637 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 639 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); |
| 638 | Settings::values.use_disk_shader_cache = | 640 | Settings::values.use_disk_shader_cache = |
| 639 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); | 641 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); |
| 640 | Settings::values.use_accurate_gpu_emulation = | 642 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); |
| 641 | ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); | 643 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 642 | Settings::values.use_asynchronous_gpu_emulation = | 644 | Settings::values.use_asynchronous_gpu_emulation = |
| 643 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | 645 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); |
| 644 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | 646 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); |
| @@ -1001,6 +1003,7 @@ void Config::SaveDebuggingValues() { | |||
| 1001 | WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); | 1003 | WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); |
| 1002 | WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); | 1004 | WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); |
| 1003 | WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); | 1005 | WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); |
| 1006 | WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false); | ||
| 1004 | 1007 | ||
| 1005 | qt_config->endGroup(); | 1008 | qt_config->endGroup(); |
| 1006 | } | 1009 | } |
| @@ -1077,8 +1080,8 @@ void Config::SaveRendererValues() { | |||
| 1077 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); | 1080 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); |
| 1078 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, | 1081 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, |
| 1079 | true); | 1082 | true); |
| 1080 | WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), | 1083 | WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy), |
| 1081 | Settings::values.use_accurate_gpu_emulation, false); | 1084 | 0); |
| 1082 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | 1085 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1083 | Settings::values.use_asynchronous_gpu_emulation, false); | 1086 | Settings::values.use_asynchronous_gpu_emulation, false); |
| 1084 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1087 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 9631059c7..c2026763e 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp | |||
| @@ -36,6 +36,7 @@ void ConfigureDebug::SetConfiguration() { | |||
| 36 | ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); | 36 | ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); |
| 37 | ui->reporting_services->setChecked(Settings::values.reporting_services); | 37 | ui->reporting_services->setChecked(Settings::values.reporting_services); |
| 38 | ui->quest_flag->setChecked(Settings::values.quest_flag); | 38 | ui->quest_flag->setChecked(Settings::values.quest_flag); |
| 39 | ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt); | ||
| 39 | ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); | 40 | ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); |
| 40 | ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); | 41 | ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); |
| 41 | } | 42 | } |
| @@ -48,6 +49,7 @@ void ConfigureDebug::ApplyConfiguration() { | |||
| 48 | Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); | 49 | Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); |
| 49 | Settings::values.reporting_services = ui->reporting_services->isChecked(); | 50 | Settings::values.reporting_services = ui->reporting_services->isChecked(); |
| 50 | Settings::values.quest_flag = ui->quest_flag->isChecked(); | 51 | Settings::values.quest_flag = ui->quest_flag->isChecked(); |
| 52 | Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked(); | ||
| 51 | Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); | 53 | Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); |
| 52 | Debugger::ToggleConsole(); | 54 | Debugger::ToggleConsole(); |
| 53 | Log::Filter filter; | 55 | Log::Filter filter; |
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index e028c4c80..e0d4c4a44 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui | |||
| @@ -215,6 +215,13 @@ | |||
| 215 | </property> | 215 | </property> |
| 216 | </widget> | 216 | </widget> |
| 217 | </item> | 217 | </item> |
| 218 | <item> | ||
| 219 | <widget class="QCheckBox" name="disable_cpu_opt"> | ||
| 220 | <property name="text"> | ||
| 221 | <string>Disable CPU JIT optimizations</string> | ||
| 222 | </property> | ||
| 223 | </widget> | ||
| 224 | </item> | ||
| 218 | </layout> | 225 | </layout> |
| 219 | </widget> | 226 | </widget> |
| 220 | </item> | 227 | </item> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index b9f429f84..0a3f47339 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | |||
| 19 | 19 | ||
| 20 | void ConfigureGraphicsAdvanced::SetConfiguration() { | 20 | void ConfigureGraphicsAdvanced::SetConfiguration() { |
| 21 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); | 21 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); |
| 22 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); |
| 23 | ui->use_vsync->setEnabled(runtime_lock); | 23 | ui->use_vsync->setEnabled(runtime_lock); |
| 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); | 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); |
| 25 | ui->force_30fps_mode->setEnabled(runtime_lock); | 25 | ui->force_30fps_mode->setEnabled(runtime_lock); |
| @@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { | 31 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { |
| 32 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | 32 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); |
| 33 | Settings::values.gpu_accuracy = gpu_accuracy; | ||
| 33 | Settings::values.use_vsync = ui->use_vsync->isChecked(); | 34 | Settings::values.use_vsync = ui->use_vsync->isChecked(); |
| 34 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | 35 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); |
| 35 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | 36 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 42eec278e..0c7b383e0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -23,11 +23,34 @@ | |||
| 23 | </property> | 23 | </property> |
| 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> | 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> |
| 25 | <item> | 25 | <item> |
| 26 | <widget class="QCheckBox" name="use_accurate_gpu_emulation"> | 26 | <layout class="QHBoxLayout" name="horizontalLayout_2"> |
| 27 | <property name="text"> | 27 | <item> |
| 28 | <string>Use accurate GPU emulation (slow)</string> | 28 | <widget class="QLabel" name="label_gpu_accuracy"> |
| 29 | </property> | 29 | <property name="text"> |
| 30 | </widget> | 30 | <string>Accuracy Level:</string> |
| 31 | </property> | ||
| 32 | </widget> | ||
| 33 | </item> | ||
| 34 | <item> | ||
| 35 | <widget class="QComboBox" name="gpu_accuracy"> | ||
| 36 | <item> | ||
| 37 | <property name="text"> | ||
| 38 | <string notr="true">Normal</string> | ||
| 39 | </property> | ||
| 40 | </item> | ||
| 41 | <item> | ||
| 42 | <property name="text"> | ||
| 43 | <string notr="true">High</string> | ||
| 44 | </property> | ||
| 45 | </item> | ||
| 46 | <item> | ||
| 47 | <property name="text"> | ||
| 48 | <string notr="true">Extreme(very slow)</string> | ||
| 49 | </property> | ||
| 50 | </item> | ||
| 51 | </widget> | ||
| 52 | </item> | ||
| 53 | </layout> | ||
| 31 | </item> | 54 | </item> |
| 32 | <item> | 55 | <item> |
| 33 | <widget class="QCheckBox" name="use_vsync"> | 56 | <widget class="QCheckBox" name="use_vsync"> |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index f4cd905c9..d1ac354bf 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -388,8 +388,8 @@ void Config::ReadValues() { | |||
| 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); |
| 389 | Settings::values.use_disk_shader_cache = | 389 | Settings::values.use_disk_shader_cache = |
| 390 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | 390 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); |
| 391 | Settings::values.use_accurate_gpu_emulation = | 391 | const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); |
| 392 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 392 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 393 | Settings::values.use_asynchronous_gpu_emulation = | 393 | Settings::values.use_asynchronous_gpu_emulation = |
| 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 395 | Settings::values.use_vsync = | 395 | Settings::values.use_vsync = |
| @@ -425,6 +425,8 @@ void Config::ReadValues() { | |||
| 425 | Settings::values.reporting_services = | 425 | Settings::values.reporting_services = |
| 426 | sdl2_config->GetBoolean("Debugging", "reporting_services", false); | 426 | sdl2_config->GetBoolean("Debugging", "reporting_services", false); |
| 427 | Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); | 427 | Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); |
| 428 | Settings::values.disable_cpu_opt = | ||
| 429 | sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false); | ||
| 428 | 430 | ||
| 429 | const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); | 431 | const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); |
| 430 | std::stringstream ss(title_list); | 432 | std::stringstream ss(title_list); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index d63d7a58e..60b1a62fa 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -146,9 +146,9 @@ frame_limit = | |||
| 146 | # 0 (default): Off, 1 : On | 146 | # 0 (default): Off, 1 : On |
| 147 | use_disk_shader_cache = | 147 | use_disk_shader_cache = |
| 148 | 148 | ||
| 149 | # Whether to use accurate GPU emulation | 149 | # Which gpu accuracy level to use |
| 150 | # 0 (default): Off (fast), 1 : On (slow) | 150 | # 0 (Normal), 1 (High), 2 (Extreme) |
| 151 | use_accurate_gpu_emulation = | 151 | gpu_accuracy = |
| 152 | 152 | ||
| 153 | # Whether to use asynchronous GPU emulation | 153 | # Whether to use asynchronous GPU emulation |
| 154 | # 0 : Off (slow), 1 (default): On (fast) | 154 | # 0 : Off (slow), 1 (default): On (fast) |
| @@ -280,6 +280,9 @@ dump_nso=false | |||
| 280 | # Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode | 280 | # Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode |
| 281 | # false: Retail/Normal Mode (default), true: Kiosk Mode | 281 | # false: Retail/Normal Mode (default), true: Kiosk Mode |
| 282 | quest_flag = | 282 | quest_flag = |
| 283 | # Determines whether or not JIT CPU optimizations are enabled | ||
| 284 | # false: Optimizations Enabled, true: Optimizations Disabled | ||
| 285 | disable_cpu_opt = | ||
| 283 | 286 | ||
| 284 | [WebService] | 287 | [WebService] |
| 285 | # Whether or not to enable telemetry | 288 | # Whether or not to enable telemetry |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index f2990910e..cb8e68a39 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp | |||
| @@ -29,6 +29,7 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) | |||
| 29 | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); | 29 | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); |
| 30 | 30 | ||
| 31 | SDL_SysWMinfo wm; | 31 | SDL_SysWMinfo wm; |
| 32 | SDL_VERSION(&wm.version); | ||
| 32 | if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { | 33 | if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { |
| 33 | LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); | 34 | LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); |
| 34 | std::exit(EXIT_FAILURE); | 35 | std::exit(EXIT_FAILURE); |
| @@ -70,7 +71,7 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) | |||
| 70 | EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; | 71 | EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; |
| 71 | 72 | ||
| 72 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { | 73 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { |
| 73 | return nullptr; | 74 | return std::make_unique<DummyContext>(); |
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | void EmuWindow_SDL2_VK::Present() { | 77 | void EmuWindow_SDL2_VK::Present() { |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h index b8021ebea..77a6ca72b 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h | |||
| @@ -22,3 +22,5 @@ public: | |||
| 22 | 22 | ||
| 23 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 23 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 24 | }; | 24 | }; |
| 25 | |||
| 26 | class DummyContext : public Core::Frontend::GraphicsContext {}; | ||
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index ee2591c8f..c0325cc3c 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -126,8 +126,8 @@ void Config::ReadValues() { | |||
| 126 | Settings::values.frame_limit = 100; | 126 | Settings::values.frame_limit = 100; |
| 127 | Settings::values.use_disk_shader_cache = | 127 | Settings::values.use_disk_shader_cache = |
| 128 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | 128 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); |
| 129 | Settings::values.use_accurate_gpu_emulation = | 129 | const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); |
| 130 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 130 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 131 | Settings::values.use_asynchronous_gpu_emulation = | 131 | Settings::values.use_asynchronous_gpu_emulation = |
| 132 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 132 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 133 | 133 | ||