summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/audio_core/CMakeLists.txt3
-rw-r--r--src/audio_core/audio_renderer.cpp32
-rw-r--r--src/audio_core/audio_renderer.h5
-rw-r--r--src/audio_core/behavior_info.cpp100
-rw-r--r--src/audio_core/behavior_info.h66
-rw-r--r--src/audio_core/common.h47
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp10
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp11
-rw-r--r--src/core/crypto/key_manager.cpp3
-rw-r--r--src/core/crypto/partition_data_manager.cpp7
-rw-r--r--src/core/file_sys/program_metadata.cpp11
-rw-r--r--src/core/file_sys/program_metadata.h6
-rw-r--r--src/core/gdbstub/gdbstub.cpp7
-rw-r--r--src/core/hle/kernel/thread.cpp3
-rw-r--r--src/core/hle/service/am/am.cpp4
-rw-r--r--src/core/hle/service/audio/audren_u.cpp13
-rw-r--r--src/core/hle/service/bcat/backend/boxcat.cpp7
-rw-r--r--src/core/hle/service/es/es.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp2
-rw-r--r--src/core/hle/service/time/time_zone_manager.cpp4
-rw-r--r--src/core/loader/elf.cpp5
-rw-r--r--src/core/loader/nro.cpp23
-rw-r--r--src/core/loader/nro.h2
-rw-r--r--src/core/settings.cpp10
-rw-r--r--src/core/settings.h12
-rw-r--r--src/core/telemetry_session.cpp16
-rw-r--r--src/tests/core/core_timing.cpp18
-rw-r--r--src/video_core/CMakeLists.txt24
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h119
-rw-r--r--src/video_core/buffer_cache/map_interval.h18
-rw-r--r--src/video_core/dma_pusher.cpp7
-rw-r--r--src/video_core/dma_pusher.h11
-rw-r--r--src/video_core/engines/fermi_2d.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp12
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/maxwell_dma.cpp11
-rw-r--r--src/video_core/fence_manager.h170
-rw-r--r--src/video_core/gpu.cpp31
-rw-r--r--src/video_core/gpu.h29
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_thread.cpp39
-rw-r--r--src/video_core/gpu_thread.h11
-rw-r--r--src/video_core/query_cache.h46
-rw-r--r--src/video_core/rasterizer_interface.h18
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h53
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp62
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp6
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp515
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h374
-rw-r--r--src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp220
-rw-r--r--src/video_core/renderer_vulkan/nsight_aftermath_tracker.h87
-rw-r--r--src/video_core/renderer_vulkan/shaders/quad_indexed.comp50
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp205
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp35
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp101
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h74
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp93
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp26
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp102
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h5
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp36
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h54
-rw-r--r--src/video_core/shader/control_flow.cpp4
-rw-r--r--src/video_core/shader/decode/memory.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp23
-rw-r--r--src/video_core/texture_cache/surface_base.h18
-rw-r--r--src/video_core/texture_cache/texture_cache.h114
-rw-r--r--src/video_core/textures/decoders.cpp3
-rw-r--r--src/video_core/textures/decoders.h5
-rw-r--r--src/yuzu/applets/profile_select.cpp2
-rw-r--r--src/yuzu/configuration/config.cpp11
-rw-r--r--src/yuzu/configuration/configure_debug.cpp2
-rw-r--r--src/yuzu/configuration/configure_debug.ui7
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp5
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui33
-rw-r--r--src/yuzu_cmd/config.cpp6
-rw-r--r--src/yuzu_cmd/default_ini.h9
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h2
-rw-r--r--src/yuzu_tester/config.cpp4
95 files changed, 2838 insertions, 753 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0913be72c..3a57356ab 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,8 +54,10 @@ else()
54 add_compile_options( 54 add_compile_options(
55 -Wall 55 -Wall
56 -Werror=implicit-fallthrough 56 -Werror=implicit-fallthrough
57 -Werror=missing-declarations
57 -Werror=reorder 58 -Werror=reorder
58 -Wextra 59 -Wextra
60 -Wmissing-declarations
59 -Wno-attributes 61 -Wno-attributes
60 -Wno-unused-parameter 62 -Wno-unused-parameter
61 ) 63 )
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index c381dbe1d..5ef38a337 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -7,9 +7,12 @@ add_library(audio_core STATIC
7 audio_out.h 7 audio_out.h
8 audio_renderer.cpp 8 audio_renderer.cpp
9 audio_renderer.h 9 audio_renderer.h
10 behavior_info.cpp
11 behavior_info.h
10 buffer.h 12 buffer.h
11 codec.cpp 13 codec.cpp
12 codec.h 14 codec.h
15 common.h
13 null_sink.h 16 null_sink.h
14 sink.h 17 sink.h
15 sink_details.cpp 18 sink_details.cpp
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 7a9dc61d4..d18ef6940 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -6,6 +6,7 @@
6#include "audio_core/audio_out.h" 6#include "audio_core/audio_out.h"
7#include "audio_core/audio_renderer.h" 7#include "audio_core/audio_renderer.h"
8#include "audio_core/codec.h" 8#include "audio_core/codec.h"
9#include "audio_core/common.h"
9#include "common/assert.h" 10#include "common/assert.h"
10#include "common/logging/log.h" 11#include "common/logging/log.h"
11#include "core/core.h" 12#include "core/core.h"
@@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
79 std::size_t instance_number) 80 std::size_t instance_number)
80 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 81 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
81 effects(params.effect_count), memory{memory_} { 82 effects(params.effect_count), memory{memory_} {
82 83 behavior_info.SetUserRevision(params.revision);
83 audio_out = std::make_unique<AudioCore::AudioOut>(); 84 audio_out = std::make_unique<AudioCore::AudioOut>();
84 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, 85 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
85 fmt::format("AudioRenderer-Instance{}", instance_number), 86 fmt::format("AudioRenderer-Instance{}", instance_number),
@@ -109,17 +110,17 @@ Stream::State AudioRenderer::GetStreamState() const {
109 return stream->GetState(); 110 return stream->GetState();
110} 111}
111 112
112static constexpr u32 VersionFromRevision(u32_le rev) { 113ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
113 // "REV7" -> 7
114 return ((rev >> 24) & 0xff) - 0x30;
115}
116
117std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
118 // Copy UpdateDataHeader struct 114 // Copy UpdateDataHeader struct
119 UpdateDataHeader config{}; 115 UpdateDataHeader config{};
120 std::memcpy(&config, input_params.data(), sizeof(UpdateDataHeader)); 116 std::memcpy(&config, input_params.data(), sizeof(UpdateDataHeader));
121 u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4); 117 u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4);
122 118
119 if (!behavior_info.UpdateInput(input_params, sizeof(UpdateDataHeader))) {
120 LOG_ERROR(Audio, "Failed to update behavior info input parameters");
121 return Audren::ERR_INVALID_PARAMETERS;
122 }
123
123 // Copy MemoryPoolInfo structs 124 // Copy MemoryPoolInfo structs
124 std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count); 125 std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count);
125 std::memcpy(mem_pool_info.data(), 126 std::memcpy(mem_pool_info.data(),
@@ -173,8 +174,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
173 // Copy output header 174 // Copy output header
174 UpdateDataHeader response_data{worker_params}; 175 UpdateDataHeader response_data{worker_params};
175 std::vector<u8> output_params(response_data.total_size); 176 std::vector<u8> output_params(response_data.total_size);
176 const auto audren_revision = VersionFromRevision(config.revision); 177 if (behavior_info.IsElapsedFrameCountSupported()) {
177 if (audren_revision >= 5) {
178 response_data.frame_count = 0x10; 178 response_data.frame_count = 0x10;
179 response_data.total_size += 0x10; 179 response_data.total_size += 0x10;
180 } 180 }
@@ -200,7 +200,19 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
200 sizeof(EffectOutStatus)); 200 sizeof(EffectOutStatus));
201 effect_out_status_offset += sizeof(EffectOutStatus); 201 effect_out_status_offset += sizeof(EffectOutStatus);
202 } 202 }
203 return output_params; 203
204 // Update behavior info output
205 const std::size_t behavior_out_status_offset{
206 sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
207 response_data.effects_size + response_data.sinks_size +
208 response_data.performance_manager_size};
209
210 if (!behavior_info.UpdateOutput(output_params, behavior_out_status_offset)) {
211 LOG_ERROR(Audio, "Failed to update behavior info output parameters");
212 return Audren::ERR_INVALID_PARAMETERS;
213 }
214
215 return MakeResult(output_params);
204} 216}
205 217
206void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) { 218void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 62faf9f19..b42770fae 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -8,11 +8,13 @@
8#include <memory> 8#include <memory>
9#include <vector> 9#include <vector>
10 10
11#include "audio_core/behavior_info.h"
11#include "audio_core/stream.h" 12#include "audio_core/stream.h"
12#include "common/common_funcs.h" 13#include "common/common_funcs.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "common/swap.h" 15#include "common/swap.h"
15#include "core/hle/kernel/object.h" 16#include "core/hle/kernel/object.h"
17#include "core/hle/result.h"
16 18
17namespace Core::Timing { 19namespace Core::Timing {
18class CoreTiming; 20class CoreTiming;
@@ -226,7 +228,7 @@ public:
226 std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number); 228 std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number);
227 ~AudioRenderer(); 229 ~AudioRenderer();
228 230
229 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); 231 ResultVal<std::vector<u8>> UpdateAudioRenderer(const std::vector<u8>& input_params);
230 void QueueMixedBuffer(Buffer::Tag tag); 232 void QueueMixedBuffer(Buffer::Tag tag);
231 void ReleaseAndQueueBuffers(); 233 void ReleaseAndQueueBuffers();
232 u32 GetSampleRate() const; 234 u32 GetSampleRate() const;
@@ -237,6 +239,7 @@ public:
237private: 239private:
238 class EffectState; 240 class EffectState;
239 class VoiceState; 241 class VoiceState;
242 BehaviorInfo behavior_info{};
240 243
241 AudioRendererParameter worker_params; 244 AudioRendererParameter worker_params;
242 std::shared_ptr<Kernel::WritableEvent> buffer_event; 245 std::shared_ptr<Kernel::WritableEvent> buffer_event;
diff --git a/src/audio_core/behavior_info.cpp b/src/audio_core/behavior_info.cpp
new file mode 100644
index 000000000..94b7a3bf1
--- /dev/null
+++ b/src/audio_core/behavior_info.cpp
@@ -0,0 +1,100 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include "audio_core/behavior_info.h"
7#include "audio_core/common.h"
8#include "common/logging/log.h"
9
10namespace AudioCore {
11
12BehaviorInfo::BehaviorInfo() : process_revision(CURRENT_PROCESS_REVISION) {}
13BehaviorInfo::~BehaviorInfo() = default;
14
15bool BehaviorInfo::UpdateInput(const std::vector<u8>& buffer, std::size_t offset) {
16 if (!CanConsumeBuffer(buffer.size(), offset, sizeof(InParams))) {
17 LOG_ERROR(Audio, "Buffer is an invalid size!");
18 return false;
19 }
20 InParams params{};
21 std::memcpy(&params, buffer.data() + offset, sizeof(InParams));
22
23 if (!IsValidRevision(params.revision)) {
24 LOG_ERROR(Audio, "Invalid input revision, revision=0x{:08X}", params.revision);
25 return false;
26 }
27
28 if (user_revision != params.revision) {
29 LOG_ERROR(Audio,
30 "User revision differs from input revision, expecting 0x{:08X} but got 0x{:08X}",
31 user_revision, params.revision);
32 return false;
33 }
34
35 ClearError();
36 UpdateFlags(params.flags);
37
38 // TODO(ogniK): Check input params size when InfoUpdater is used
39
40 return true;
41}
42
43bool BehaviorInfo::UpdateOutput(std::vector<u8>& buffer, std::size_t offset) {
44 if (!CanConsumeBuffer(buffer.size(), offset, sizeof(OutParams))) {
45 LOG_ERROR(Audio, "Buffer is an invalid size!");
46 return false;
47 }
48
49 OutParams params{};
50 std::memcpy(params.errors.data(), errors.data(), sizeof(ErrorInfo) * errors.size());
51 params.error_count = static_cast<u32_le>(error_count);
52 std::memcpy(buffer.data() + offset, &params, sizeof(OutParams));
53 return true;
54}
55
56void BehaviorInfo::ClearError() {
57 error_count = 0;
58}
59
60void BehaviorInfo::UpdateFlags(u64_le dest_flags) {
61 flags = dest_flags;
62}
63
64void BehaviorInfo::SetUserRevision(u32_le revision) {
65 user_revision = revision;
66}
67
68bool BehaviorInfo::IsAdpcmLoopContextBugFixed() const {
69 return IsRevisionSupported(2, user_revision);
70}
71
72bool BehaviorInfo::IsSplitterSupported() const {
73 return IsRevisionSupported(2, user_revision);
74}
75
76bool BehaviorInfo::IsLongSizePreDelaySupported() const {
77 return IsRevisionSupported(3, user_revision);
78}
79
80bool BehaviorInfo::IsAudioRenererProcessingTimeLimit80PercentSupported() const {
81 return IsRevisionSupported(5, user_revision);
82}
83
84bool BehaviorInfo::IsAudioRenererProcessingTimeLimit75PercentSupported() const {
85 return IsRevisionSupported(4, user_revision);
86}
87
88bool BehaviorInfo::IsAudioRenererProcessingTimeLimit70PercentSupported() const {
89 return IsRevisionSupported(1, user_revision);
90}
91
92bool BehaviorInfo::IsElapsedFrameCountSupported() const {
93 return IsRevisionSupported(5, user_revision);
94}
95
96bool BehaviorInfo::IsMemoryPoolForceMappingEnabled() const {
97 return (flags & 1) != 0;
98}
99
100} // namespace AudioCore
diff --git a/src/audio_core/behavior_info.h b/src/audio_core/behavior_info.h
new file mode 100644
index 000000000..c5e91ab39
--- /dev/null
+++ b/src/audio_core/behavior_info.h
@@ -0,0 +1,66 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include <vector>
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12#include "common/swap.h"
13
14namespace AudioCore {
15class BehaviorInfo {
16public:
17 explicit BehaviorInfo();
18 ~BehaviorInfo();
19
20 bool UpdateInput(const std::vector<u8>& buffer, std::size_t offset);
21 bool UpdateOutput(std::vector<u8>& buffer, std::size_t offset);
22
23 void ClearError();
24 void UpdateFlags(u64_le dest_flags);
25 void SetUserRevision(u32_le revision);
26
27 bool IsAdpcmLoopContextBugFixed() const;
28 bool IsSplitterSupported() const;
29 bool IsLongSizePreDelaySupported() const;
30 bool IsAudioRenererProcessingTimeLimit80PercentSupported() const;
31 bool IsAudioRenererProcessingTimeLimit75PercentSupported() const;
32 bool IsAudioRenererProcessingTimeLimit70PercentSupported() const;
33 bool IsElapsedFrameCountSupported() const;
34 bool IsMemoryPoolForceMappingEnabled() const;
35
36private:
37 u32_le process_revision{};
38 u32_le user_revision{};
39 u64_le flags{};
40
41 struct ErrorInfo {
42 u32_le result{};
43 INSERT_PADDING_WORDS(1);
44 u64_le result_info{};
45 };
46 static_assert(sizeof(ErrorInfo) == 0x10, "ErrorInfo is an invalid size");
47
48 std::array<ErrorInfo, 10> errors{};
49 std::size_t error_count{};
50
51 struct InParams {
52 u32_le revision{};
53 u32_le padding{};
54 u64_le flags{};
55 };
56 static_assert(sizeof(InParams) == 0x10, "InParams is an invalid size");
57
58 struct OutParams {
59 std::array<ErrorInfo, 10> errors{};
60 u32_le error_count{};
61 INSERT_PADDING_BYTES(12);
62 };
63 static_assert(sizeof(OutParams) == 0xb0, "OutParams is an invalid size");
64};
65
66} // namespace AudioCore
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
new file mode 100644
index 000000000..98478b66b
--- /dev/null
+++ b/src/audio_core/common.h
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8#include "common/swap.h"
9#include "core/hle/result.h"
10
11namespace AudioCore {
12namespace Audren {
13constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
14}
15
16constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
17
18static constexpr u32 VersionFromRevision(u32_le rev) {
19 // "REV7" -> 7
20 return ((rev >> 24) & 0xff) - 0x30;
21}
22
23static constexpr bool IsRevisionSupported(u32 required, u32_le user_revision) {
24 const auto base = VersionFromRevision(user_revision);
25 return required <= base;
26}
27
28static constexpr bool IsValidRevision(u32_le revision) {
29 const auto base = VersionFromRevision(revision);
30 constexpr auto max_rev = VersionFromRevision(CURRENT_PROCESS_REVISION);
31 return base <= max_rev;
32}
33
34static constexpr bool CanConsumeBuffer(std::size_t size, std::size_t offset, std::size_t required) {
35 if (offset > size) {
36 return false;
37 }
38 if (size < required) {
39 return false;
40 }
41 if ((size - offset) < required) {
42 return false;
43 }
44 return true;
45}
46
47} // namespace AudioCore
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 9add5d363..65cbfe5e6 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/scheduler.h" 20#include "core/hle/kernel/scheduler.h"
21#include "core/hle/kernel/svc.h" 21#include "core/hle/kernel/svc.h"
22#include "core/memory.h" 22#include "core/memory.h"
23#include "core/settings.h"
23 24
24namespace Core { 25namespace Core {
25 26
@@ -144,6 +145,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
144 config.page_table_address_space_bits = address_space_bits; 145 config.page_table_address_space_bits = address_space_bits;
145 config.silently_mirror_page_table = false; 146 config.silently_mirror_page_table = false;
146 config.absolute_offset_page_table = true; 147 config.absolute_offset_page_table = true;
148 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
149 config.only_detect_misalignment_via_page_table_on_page_boundary = true;
147 150
148 // Multi-process state 151 // Multi-process state
149 config.processor_id = core_index; 152 config.processor_id = core_index;
@@ -159,8 +162,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
159 // Unpredictable instructions 162 // Unpredictable instructions
160 config.define_unpredictable_behaviour = true; 163 config.define_unpredictable_behaviour = true;
161 164
162 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; 165 // Optimizations
163 config.only_detect_misalignment_via_page_table_on_page_boundary = true; 166 if (Settings::values.disable_cpu_opt) {
167 config.enable_optimizations = false;
168 config.enable_fast_dispatch = false;
169 }
164 170
165 return std::make_shared<Dynarmic::A64::Jit>(config); 171 return std::make_shared<Dynarmic::A64::Jit>(config);
166} 172}
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index d189efb63..b96583123 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -11,6 +11,7 @@
11#include "core/core_timing.h" 11#include "core/core_timing.h"
12#include "core/hle/kernel/scheduler.h" 12#include "core/hle/kernel/scheduler.h"
13#include "core/hle/kernel/svc.h" 13#include "core/hle/kernel/svc.h"
14#include "core/memory.h"
14 15
15namespace Core { 16namespace Core {
16 17
@@ -171,7 +172,17 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
171 172
172void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { 173void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
173 MICROPROFILE_SCOPE(ARM_Jit_Unicorn); 174 MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
175
176 // Temporarily map the code page for Unicorn
177 u64 map_addr{GetPC() & ~Memory::PAGE_MASK};
178 std::vector<u8> page_buffer(Memory::PAGE_SIZE);
179 system.Memory().ReadBlock(map_addr, page_buffer.data(), page_buffer.size());
180
181 CHECKED(uc_mem_map_ptr(uc, map_addr, page_buffer.size(),
182 UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));
174 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 183 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
184 CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size()));
185
175 system.CoreTiming().AddTicks(num_instructions); 186 system.CoreTiming().AddTicks(num_instructions);
176 if (GDBStub::IsServerEnabled()) { 187 if (GDBStub::IsServerEnabled()) {
177 if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { 188 if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index 87e6a1fd3..8997c7082 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -1202,7 +1202,8 @@ const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager:
1202 {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyAreaKey), 1202 {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyAreaKey),
1203 static_cast<u64>(KeyAreaKeyType::System)}}, 1203 static_cast<u64>(KeyAreaKeyType::System)}},
1204 {"titlekek_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::Titlekek), 0}}, 1204 {"titlekek_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::Titlekek), 0}},
1205 {"keyblob_mac_key_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyblobMAC)}}, 1205 {"keyblob_mac_key_source",
1206 {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyblobMAC), 0}},
1206 {"tsec_key", {S128KeyType::TSEC, 0, 0}}, 1207 {"tsec_key", {S128KeyType::TSEC, 0, 0}},
1207 {"secure_boot_key", {S128KeyType::SecureBoot, 0, 0}}, 1208 {"secure_boot_key", {S128KeyType::SecureBoot, 0, 0}},
1208 {"sd_seed", {S128KeyType::SDSeed, 0, 0}}, 1209 {"sd_seed", {S128KeyType::SDSeed, 0, 0}},
diff --git a/src/core/crypto/partition_data_manager.cpp b/src/core/crypto/partition_data_manager.cpp
index d64302f2e..7ed71ac3a 100644
--- a/src/core/crypto/partition_data_manager.cpp
+++ b/src/core/crypto/partition_data_manager.cpp
@@ -202,8 +202,8 @@ static std::array<Key128, 0x20> FindEncryptedMasterKeyFromHex(const std::vector<
202 return out; 202 return out;
203} 203}
204 204
205FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir, 205static FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir,
206 const std::string& name) { 206 const std::string& name) {
207 const auto upper = Common::ToUpper(name); 207 const auto upper = Common::ToUpper(name);
208 208
209 for (const auto& fname : {name, name + ".bin", upper, upper + ".BIN"}) { 209 for (const auto& fname : {name, name + ".bin", upper, upper + ".BIN"}) {
@@ -345,8 +345,7 @@ FileSys::VirtualFile PartitionDataManager::GetPackage2Raw(Package2Type type) con
345 return package2.at(static_cast<size_t>(type)); 345 return package2.at(static_cast<size_t>(type));
346} 346}
347 347
348bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) { 348static bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) {
349
350 const std::vector<u8> iv(header.header_ctr.begin(), header.header_ctr.end()); 349 const std::vector<u8> iv(header.header_ctr.begin(), header.header_ctr.end());
351 Package2Header temp = header; 350 Package2Header temp = header;
352 AESCipher<Key128> cipher(key, Mode::CTR); 351 AESCipher<Key128> cipher(key, Mode::CTR);
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index 1d6c30962..43169bf9f 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -51,6 +51,17 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
51 return Loader::ResultStatus::Success; 51 return Loader::ResultStatus::Success;
52} 52}
53 53
54/*static*/ ProgramMetadata ProgramMetadata::GetDefault() {
55 ProgramMetadata result;
56
57 result.LoadManual(
58 true /*is_64_bit*/, FileSys::ProgramAddressSpaceType::Is39Bit /*address_space*/,
59 0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x00100000 /*main_thread_stack_size*/,
60 {}, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/, {} /*capabilities*/);
61
62 return result;
63}
64
54void ProgramMetadata::LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, 65void ProgramMetadata::LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space,
55 s32 main_thread_prio, u32 main_thread_core, 66 s32 main_thread_prio, u32 main_thread_core,
56 u32 main_thread_stack_size, u64 title_id, 67 u32 main_thread_stack_size, u64 title_id,
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index f8759a396..35069972b 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -44,9 +44,13 @@ public:
44 ProgramMetadata(); 44 ProgramMetadata();
45 ~ProgramMetadata(); 45 ~ProgramMetadata();
46 46
47 /// Gets a default ProgramMetadata configuration, should only be used for homebrew formats where
48 /// we do not have an NPDM file
49 static ProgramMetadata GetDefault();
50
47 Loader::ResultStatus Load(VirtualFile file); 51 Loader::ResultStatus Load(VirtualFile file);
48 52
49 // Load from parameters instead of NPDM file, used for KIP 53 /// Load from parameters instead of NPDM file, used for KIP
50 void LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, s32 main_thread_prio, 54 void LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, s32 main_thread_prio,
51 u32 main_thread_core, u32 main_thread_stack_size, u64 title_id, 55 u32 main_thread_core, u32 main_thread_stack_size, u64 title_id,
52 u64 filesystem_permissions, KernelCapabilityDescriptors capabilities); 56 u64 filesystem_permissions, KernelCapabilityDescriptors capabilities);
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 2f15635c5..70c0f8b80 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -1389,10 +1389,9 @@ void SendTrap(Kernel::Thread* thread, int trap) {
1389 return; 1389 return;
1390 } 1390 }
1391 1391
1392 if (!halt_loop || current_thread == thread) { 1392 current_thread = thread;
1393 current_thread = thread; 1393 SendSignal(thread, trap);
1394 SendSignal(thread, trap); 1394
1395 }
1396 halt_loop = true; 1395 halt_loop = true;
1397 send_trap = false; 1396 send_trap = false;
1398} 1397}
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 4c0451c01..a919750a6 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -150,8 +150,7 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
150 context.pc = entry_point; 150 context.pc = entry_point;
151 context.sp = stack_top; 151 context.sp = stack_top;
152 // TODO(merry): Perform a hardware test to determine the below value. 152 // TODO(merry): Perform a hardware test to determine the below value.
153 // AHP = 0, DN = 1, FTZ = 1, RMode = Round towards zero 153 context.fpcr = 0;
154 context.fpcr = 0x03C00000;
155} 154}
156 155
157ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name, 156ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 557608e76..3ece2cf3c 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -903,7 +903,7 @@ private:
903 void PopOutData(Kernel::HLERequestContext& ctx) { 903 void PopOutData(Kernel::HLERequestContext& ctx) {
904 LOG_DEBUG(Service_AM, "called"); 904 LOG_DEBUG(Service_AM, "called");
905 905
906 const auto storage = applet->GetBroker().PopNormalDataToGame(); 906 auto storage = applet->GetBroker().PopNormalDataToGame();
907 if (storage == nullptr) { 907 if (storage == nullptr) {
908 LOG_ERROR(Service_AM, 908 LOG_ERROR(Service_AM,
909 "storage is a nullptr. There is no data in the current normal channel"); 909 "storage is a nullptr. There is no data in the current normal channel");
@@ -934,7 +934,7 @@ private:
934 void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { 934 void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
935 LOG_DEBUG(Service_AM, "called"); 935 LOG_DEBUG(Service_AM, "called");
936 936
937 const auto storage = applet->GetBroker().PopInteractiveDataToGame(); 937 auto storage = applet->GetBroker().PopInteractiveDataToGame();
938 if (storage == nullptr) { 938 if (storage == nullptr) {
939 LOG_ERROR(Service_AM, 939 LOG_ERROR(Service_AM,
940 "storage is a nullptr. There is no data in the current interactive channel"); 940 "storage is a nullptr. There is no data in the current interactive channel");
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 175cabf45..d8359abaa 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -92,11 +92,16 @@ private:
92 } 92 }
93 93
94 void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { 94 void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
95 LOG_WARNING(Service_Audio, "(STUBBED) called"); 95 LOG_DEBUG(Service_Audio, "(STUBBED) called");
96
97 auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer());
98
99 if (result.Succeeded()) {
100 ctx.WriteBuffer(result.Unwrap());
101 }
96 102
97 ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer()));
98 IPC::ResponseBuilder rb{ctx, 2}; 103 IPC::ResponseBuilder rb{ctx, 2};
99 rb.Push(RESULT_SUCCESS); 104 rb.Push(result.Code());
100 } 105 }
101 106
102 void Start(Kernel::HLERequestContext& ctx) { 107 void Start(Kernel::HLERequestContext& ctx) {
@@ -252,8 +257,6 @@ private:
252 } 257 }
253 258
254 void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { 259 void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
255 IPC::RequestParser rp{ctx};
256
257 const auto device_name_buffer = ctx.ReadBuffer(); 260 const auto device_name_buffer = ctx.ReadBuffer();
258 const std::string name = Common::StringFromBuffer(device_name_buffer); 261 const std::string name = Common::StringFromBuffer(device_name_buffer);
259 262
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp
index f589864ee..5febe8fc1 100644
--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -18,6 +18,7 @@
18#include "core/hle/service/bcat/backend/boxcat.h" 18#include "core/hle/service/bcat/backend/boxcat.h"
19#include "core/settings.h" 19#include "core/settings.h"
20 20
21namespace Service::BCAT {
21namespace { 22namespace {
22 23
23// Prevents conflicts with windows macro called CreateFile 24// Prevents conflicts with windows macro called CreateFile
@@ -30,10 +31,6 @@ bool VfsDeleteFileWrap(FileSys::VirtualDir dir, std::string_view name) {
30 return dir->DeleteFile(name); 31 return dir->DeleteFile(name);
31} 32}
32 33
33} // Anonymous namespace
34
35namespace Service::BCAT {
36
37constexpr ResultCode ERROR_GENERAL_BCAT_FAILURE{ErrorModule::BCAT, 1}; 34constexpr ResultCode ERROR_GENERAL_BCAT_FAILURE{ErrorModule::BCAT, 1};
38 35
39constexpr char BOXCAT_HOSTNAME[] = "api.yuzu-emu.org"; 36constexpr char BOXCAT_HOSTNAME[] = "api.yuzu-emu.org";
@@ -90,8 +87,6 @@ constexpr u32 PORT = 443;
90constexpr u32 TIMEOUT_SECONDS = 30; 87constexpr u32 TIMEOUT_SECONDS = 30;
91[[maybe_unused]] constexpr u64 VFS_COPY_BLOCK_SIZE = 1ULL << 24; // 4MB 88[[maybe_unused]] constexpr u64 VFS_COPY_BLOCK_SIZE = 1ULL << 24; // 4MB
92 89
93namespace {
94
95std::string GetBINFilePath(u64 title_id) { 90std::string GetBINFilePath(u64 title_id) {
96 return fmt::format("{}bcat/{:016X}/launchparam.bin", 91 return fmt::format("{}bcat/{:016X}/launchparam.bin",
97 FileUtil::GetUserPath(FileUtil::UserPath::CacheDir), title_id); 92 FileUtil::GetUserPath(FileUtil::UserPath::CacheDir), title_id);
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index df00ae625..f8e9df4b1 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -4,6 +4,7 @@
4 4
5#include "core/crypto/key_manager.h" 5#include "core/crypto/key_manager.h"
6#include "core/hle/ipc_helpers.h" 6#include "core/hle/ipc_helpers.h"
7#include "core/hle/service/es/es.h"
7#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
8 9
9namespace Service::ES { 10namespace Service::ES {
@@ -76,7 +77,6 @@ private:
76 } 77 }
77 78
78 void ImportTicket(Kernel::HLERequestContext& ctx) { 79 void ImportTicket(Kernel::HLERequestContext& ctx) {
79 IPC::RequestParser rp{ctx};
80 const auto ticket = ctx.ReadBuffer(); 80 const auto ticket = ctx.ReadBuffer();
81 const auto cert = ctx.ReadBuffer(1); 81 const auto cert = ctx.ReadBuffer(1);
82 82
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 2ccfffc19..c55d900e2 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -502,7 +502,7 @@ void Controller_NPad::SetNpadMode(u32 npad_id, NPadAssignments assignment_mode)
502 502
503void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids, 503void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids,
504 const std::vector<Vibration>& vibrations) { 504 const std::vector<Vibration>& vibrations) {
505 LOG_WARNING(Service_HID, "(STUBBED) called"); 505 LOG_DEBUG(Service_HID, "(STUBBED) called");
506 506
507 if (!can_controllers_vibrate) { 507 if (!can_controllers_vibrate) {
508 return; 508 return;
diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp
index c8159bcd5..69152d0ac 100644
--- a/src/core/hle/service/time/time_zone_manager.cpp
+++ b/src/core/hle/service/time/time_zone_manager.cpp
@@ -518,8 +518,8 @@ static bool ParseTimeZoneBinary(TimeZoneRule& time_zone_rule, FileSys::VirtualFi
518 constexpr s32 time_zone_max_leaps{50}; 518 constexpr s32 time_zone_max_leaps{50};
519 constexpr s32 time_zone_max_chars{50}; 519 constexpr s32 time_zone_max_chars{50};
520 if (!(0 <= header.leap_count && header.leap_count < time_zone_max_leaps && 520 if (!(0 <= header.leap_count && header.leap_count < time_zone_max_leaps &&
521 0 < header.type_count && header.type_count < time_zone_rule.ttis.size() && 521 0 < header.type_count && header.type_count < s32(time_zone_rule.ttis.size()) &&
522 0 <= header.time_count && header.time_count < time_zone_rule.ats.size() && 522 0 <= header.time_count && header.time_count < s32(time_zone_rule.ats.size()) &&
523 0 <= header.char_count && header.char_count < time_zone_max_chars && 523 0 <= header.char_count && header.char_count < time_zone_max_chars &&
524 (header.ttis_std_count == header.type_count || header.ttis_std_count == 0) && 524 (header.ttis_std_count == header.type_count || header.ttis_std_count == 0) &&
525 (header.ttis_gmt_count == header.type_count || header.ttis_gmt_count == 0))) { 525 (header.ttis_gmt_count == header.type_count || header.ttis_gmt_count == 0))) {
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 1e9ed2837..8f7615115 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -398,6 +398,11 @@ AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) {
398 Kernel::CodeSet codeset = elf_reader.LoadInto(base_address); 398 Kernel::CodeSet codeset = elf_reader.LoadInto(base_address);
399 const VAddr entry_point = codeset.entrypoint; 399 const VAddr entry_point = codeset.entrypoint;
400 400
401 // Setup the process code layout
402 if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), buffer.size()).IsError()) {
403 return {ResultStatus::ErrorNotInitialized, {}};
404 }
405
401 process.LoadModule(std::move(codeset), entry_point); 406 process.LoadModule(std::move(codeset), entry_point);
402 407
403 is_loaded = true; 408 is_loaded = true;
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 5d7e8136e..906544bc9 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -131,7 +131,7 @@ static constexpr u32 PageAlignSize(u32 size) {
131} 131}
132 132
133static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, 133static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
134 const std::string& name, VAddr load_base) { 134 const std::string& name) {
135 if (data.size() < sizeof(NroHeader)) { 135 if (data.size() < sizeof(NroHeader)) {
136 return {}; 136 return {};
137 } 137 }
@@ -187,19 +187,25 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
187 codeset.DataSegment().size += bss_size; 187 codeset.DataSegment().size += bss_size;
188 program_image.resize(static_cast<u32>(program_image.size()) + bss_size); 188 program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
189 189
190 // Setup the process code layout
191 if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size())
192 .IsError()) {
193 return false;
194 }
195
190 // Load codeset for current process 196 // Load codeset for current process
191 codeset.memory = std::move(program_image); 197 codeset.memory = std::move(program_image);
192 process.LoadModule(std::move(codeset), load_base); 198 process.LoadModule(std::move(codeset), process.PageTable().GetCodeRegionStart());
193 199
194 // Register module with GDBStub 200 // Register module with GDBStub
195 GDBStub::RegisterModule(name, load_base, load_base); 201 GDBStub::RegisterModule(name, process.PageTable().GetCodeRegionStart(),
202 process.PageTable().GetCodeRegionEnd());
196 203
197 return true; 204 return true;
198} 205}
199 206
200bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& file, 207bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& file) {
201 VAddr load_base) { 208 return LoadNroImpl(process, file.ReadAllBytes(), file.GetName());
202 return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base);
203} 209}
204 210
205AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { 211AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) {
@@ -207,10 +213,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) {
207 return {ResultStatus::ErrorAlreadyLoaded, {}}; 213 return {ResultStatus::ErrorAlreadyLoaded, {}};
208 } 214 }
209 215
210 // Load NRO 216 if (!LoadNro(process, *file)) {
211 const VAddr base_address = process.PageTable().GetCodeRegionStart();
212
213 if (!LoadNro(process, *file, base_address)) {
214 return {ResultStatus::ErrorLoadingNRO, {}}; 217 return {ResultStatus::ErrorLoadingNRO, {}};
215 } 218 }
216 219
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 71811bc29..4593d48fb 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -47,7 +47,7 @@ public:
47 bool IsRomFSUpdatable() const override; 47 bool IsRomFSUpdatable() const override;
48 48
49private: 49private:
50 bool LoadNro(Kernel::Process& process, const FileSys::VfsFile& file, VAddr load_base); 50 bool LoadNro(Kernel::Process& process, const FileSys::VfsFile& file);
51 51
52 std::vector<u8> icon_data; 52 std::vector<u8> icon_data;
53 std::unique_ptr<FileSys::NACP> nacp; 53 std::unique_ptr<FileSys::NACP> nacp;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index c1282cb80..cd6c257f5 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -92,7 +92,7 @@ void LogSettings() {
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); 94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation", 96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation); 97 Settings::values.use_asynchronous_gpu_emulation);
98 LogSetting("Renderer_UseVsync", Settings::values.use_vsync); 98 LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
@@ -109,4 +109,12 @@ void LogSettings() {
109 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); 109 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
110} 110}
111 111
112bool IsGPULevelExtreme() {
113 return values.gpu_accuracy == GPUAccuracy::Extreme;
114}
115
116bool IsGPULevelHigh() {
117 return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
118}
119
112} // namespace Settings 120} // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index 79ec01731..7d09253f5 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -376,6 +376,12 @@ enum class RendererBackend {
376 Vulkan = 1, 376 Vulkan = 1,
377}; 377};
378 378
379enum class GPUAccuracy : u32 {
380 Normal = 0,
381 High = 1,
382 Extreme = 2,
383};
384
379struct Values { 385struct Values {
380 // System 386 // System
381 bool use_docked_mode; 387 bool use_docked_mode;
@@ -436,7 +442,7 @@ struct Values {
436 bool use_frame_limit; 442 bool use_frame_limit;
437 u16 frame_limit; 443 u16 frame_limit;
438 bool use_disk_shader_cache; 444 bool use_disk_shader_cache;
439 bool use_accurate_gpu_emulation; 445 GPUAccuracy gpu_accuracy;
440 bool use_asynchronous_gpu_emulation; 446 bool use_asynchronous_gpu_emulation;
441 bool use_vsync; 447 bool use_vsync;
442 bool force_30fps_mode; 448 bool force_30fps_mode;
@@ -464,6 +470,7 @@ struct Values {
464 bool dump_nso; 470 bool dump_nso;
465 bool reporting_services; 471 bool reporting_services;
466 bool quest_flag; 472 bool quest_flag;
473 bool disable_cpu_opt;
467 474
468 // BCAT 475 // BCAT
469 std::string bcat_backend; 476 std::string bcat_backend;
@@ -479,6 +486,9 @@ struct Values {
479 std::map<u64, std::vector<std::string>> disabled_addons; 486 std::map<u64, std::vector<std::string>> disabled_addons;
480} extern values; 487} extern values;
481 488
489bool IsGPULevelExtreme();
490bool IsGPULevelHigh();
491
482void Apply(); 492void Apply();
483void LogSettings(); 493void LogSettings();
484} // namespace Settings 494} // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index fd5a3ee9f..1c3b03a1c 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
56 return "Unknown"; 56 return "Unknown";
57} 57}
58 58
59static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
60 switch (backend) {
61 case Settings::GPUAccuracy::Normal:
62 return "Normal";
63 case Settings::GPUAccuracy::High:
64 return "High";
65 case Settings::GPUAccuracy::Extreme:
66 return "Extreme";
67 }
68 return "Unknown";
69}
70
59u64 GetTelemetryId() { 71u64 GetTelemetryId() {
60 u64 telemetry_id{}; 72 u64 telemetry_id{};
61 const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + 73 const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
184 AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); 196 AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
185 AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); 197 AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
186 AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); 198 AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
187 AddField(field_type, "Renderer_UseAccurateGpuEmulation", 199 AddField(field_type, "Renderer_GPUAccuracyLevel",
188 Settings::values.use_accurate_gpu_emulation); 200 TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
189 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", 201 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
190 Settings::values.use_asynchronous_gpu_emulation); 202 Settings::values.use_asynchronous_gpu_emulation);
191 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); 203 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 1e3940801..ff2d11cc8 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -14,13 +14,14 @@
14#include "core/core.h" 14#include "core/core.h"
15#include "core/core_timing.h" 15#include "core/core_timing.h"
16 16
17namespace {
17// Numbers are chosen randomly to make sure the correct one is given. 18// Numbers are chosen randomly to make sure the correct one is given.
18static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; 19constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
19static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals 20constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
20 21
21static std::bitset<CB_IDS.size()> callbacks_ran_flags; 22std::bitset<CB_IDS.size()> callbacks_ran_flags;
22static u64 expected_callback = 0; 23u64 expected_callback = 0;
23static s64 lateness = 0; 24s64 lateness = 0;
24 25
25template <unsigned int IDX> 26template <unsigned int IDX>
26void CallbackTemplate(u64 userdata, s64 cycles_late) { 27void CallbackTemplate(u64 userdata, s64 cycles_late) {
@@ -31,7 +32,7 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
31 REQUIRE(lateness == cycles_late); 32 REQUIRE(lateness == cycles_late);
32} 33}
33 34
34static u64 callbacks_done = 0; 35u64 callbacks_done = 0;
35 36
36void EmptyCallback(u64 userdata, s64 cycles_late) { 37void EmptyCallback(u64 userdata, s64 cycles_late) {
37 ++callbacks_done; 38 ++callbacks_done;
@@ -48,8 +49,8 @@ struct ScopeInit final {
48 Core::Timing::CoreTiming core_timing; 49 Core::Timing::CoreTiming core_timing;
49}; 50};
50 51
51static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, 52void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
52 int expected_lateness = 0, int cpu_downcount = 0) { 53 int expected_lateness = 0, int cpu_downcount = 0) {
53 callbacks_ran_flags = 0; 54 callbacks_ran_flags = 0;
54 expected_callback = CB_IDS[idx]; 55 expected_callback = CB_IDS[idx];
55 lateness = expected_lateness; 56 lateness = expected_lateness;
@@ -62,6 +63,7 @@ static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32
62 63
63 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); 64 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
64} 65}
66} // Anonymous namespace
65 67
66TEST_CASE("CoreTiming[BasicOrder]", "[core]") { 68TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
67 ScopeInit guard; 69 ScopeInit guard;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 258d58eba..8ede4ba9b 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(video_core STATIC
23 engines/shader_bytecode.h 23 engines/shader_bytecode.h
24 engines/shader_header.h 24 engines/shader_header.h
25 engines/shader_type.h 25 engines/shader_type.h
26 fence_manager.h
26 gpu.cpp 27 gpu.cpp
27 gpu.h 28 gpu.h
28 gpu_asynch.cpp 29 gpu_asynch.cpp
@@ -51,6 +52,8 @@ add_library(video_core STATIC
51 renderer_opengl/gl_buffer_cache.h 52 renderer_opengl/gl_buffer_cache.h
52 renderer_opengl/gl_device.cpp 53 renderer_opengl/gl_device.cpp
53 renderer_opengl/gl_device.h 54 renderer_opengl/gl_device.h
55 renderer_opengl/gl_fence_manager.cpp
56 renderer_opengl/gl_fence_manager.h
54 renderer_opengl/gl_framebuffer_cache.cpp 57 renderer_opengl/gl_framebuffer_cache.cpp
55 renderer_opengl/gl_framebuffer_cache.h 58 renderer_opengl/gl_framebuffer_cache.h
56 renderer_opengl/gl_rasterizer.cpp 59 renderer_opengl/gl_rasterizer.cpp
@@ -160,6 +163,8 @@ if (ENABLE_VULKAN)
160 renderer_vulkan/fixed_pipeline_state.h 163 renderer_vulkan/fixed_pipeline_state.h
161 renderer_vulkan/maxwell_to_vk.cpp 164 renderer_vulkan/maxwell_to_vk.cpp
162 renderer_vulkan/maxwell_to_vk.h 165 renderer_vulkan/maxwell_to_vk.h
166 renderer_vulkan/nsight_aftermath_tracker.cpp
167 renderer_vulkan/nsight_aftermath_tracker.h
163 renderer_vulkan/renderer_vulkan.h 168 renderer_vulkan/renderer_vulkan.h
164 renderer_vulkan/renderer_vulkan.cpp 169 renderer_vulkan/renderer_vulkan.cpp
165 renderer_vulkan/vk_blit_screen.cpp 170 renderer_vulkan/vk_blit_screen.cpp
@@ -174,6 +179,8 @@ if (ENABLE_VULKAN)
174 renderer_vulkan/vk_descriptor_pool.h 179 renderer_vulkan/vk_descriptor_pool.h
175 renderer_vulkan/vk_device.cpp 180 renderer_vulkan/vk_device.cpp
176 renderer_vulkan/vk_device.h 181 renderer_vulkan/vk_device.h
182 renderer_vulkan/vk_fence_manager.cpp
183 renderer_vulkan/vk_fence_manager.h
177 renderer_vulkan/vk_graphics_pipeline.cpp 184 renderer_vulkan/vk_graphics_pipeline.cpp
178 renderer_vulkan/vk_graphics_pipeline.h 185 renderer_vulkan/vk_graphics_pipeline.h
179 renderer_vulkan/vk_image.cpp 186 renderer_vulkan/vk_image.cpp
@@ -213,19 +220,30 @@ if (ENABLE_VULKAN)
213 renderer_vulkan/wrapper.cpp 220 renderer_vulkan/wrapper.cpp
214 renderer_vulkan/wrapper.h 221 renderer_vulkan/wrapper.h
215 ) 222 )
216
217 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
218 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
219endif() 223endif()
220 224
221create_target_directory_groups(video_core) 225create_target_directory_groups(video_core)
222 226
223target_link_libraries(video_core PUBLIC common core) 227target_link_libraries(video_core PUBLIC common core)
224target_link_libraries(video_core PRIVATE glad) 228target_link_libraries(video_core PRIVATE glad)
229
225if (ENABLE_VULKAN) 230if (ENABLE_VULKAN)
231 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
232 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
226 target_link_libraries(video_core PRIVATE sirit) 233 target_link_libraries(video_core PRIVATE sirit)
227endif() 234endif()
228 235
236if (ENABLE_NSIGHT_AFTERMATH)
237 if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
238 message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided")
239 endif()
240 if (NOT WIN32)
241 message(ERROR "Nsight Aftermath doesn't support non-Windows platforms")
242 endif()
243 target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH)
244 target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include")
245endif()
246
229if (MSVC) 247if (MSVC)
230 target_compile_options(video_core PRIVATE /we4267) 248 target_compile_options(video_core PRIVATE /we4267)
231else() 249else()
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83e7a1cde..510f11089 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <list>
8#include <memory> 9#include <memory>
9#include <mutex> 10#include <mutex>
10#include <unordered_map> 11#include <unordered_map>
@@ -18,8 +19,10 @@
18 19
19#include "common/alignment.h" 20#include "common/alignment.h"
20#include "common/common_types.h" 21#include "common/common_types.h"
22#include "common/logging/log.h"
21#include "core/core.h" 23#include "core/core.h"
22#include "core/memory.h" 24#include "core/memory.h"
25#include "core/settings.h"
23#include "video_core/buffer_cache/buffer_block.h" 26#include "video_core/buffer_cache/buffer_block.h"
24#include "video_core/buffer_cache/map_interval.h" 27#include "video_core/buffer_cache/map_interval.h"
25#include "video_core/memory_manager.h" 28#include "video_core/memory_manager.h"
@@ -79,6 +82,9 @@ public:
79 auto map = MapAddress(block, gpu_addr, cpu_addr, size); 82 auto map = MapAddress(block, gpu_addr, cpu_addr, size);
80 if (is_written) { 83 if (is_written) {
81 map->MarkAsModified(true, GetModifiedTicks()); 84 map->MarkAsModified(true, GetModifiedTicks());
85 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
86 MarkForAsyncFlush(map);
87 }
82 if (!map->IsWritten()) { 88 if (!map->IsWritten()) {
83 map->MarkAsWritten(true); 89 map->MarkAsWritten(true);
84 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 90 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -137,11 +143,22 @@ public:
137 }); 143 });
138 for (auto& object : objects) { 144 for (auto& object : objects) {
139 if (object->IsModified() && object->IsRegistered()) { 145 if (object->IsModified() && object->IsRegistered()) {
146 mutex.unlock();
140 FlushMap(object); 147 FlushMap(object);
148 mutex.lock();
141 } 149 }
142 } 150 }
143 } 151 }
144 152
153 bool MustFlushRegion(VAddr addr, std::size_t size) {
154 std::lock_guard lock{mutex};
155
156 const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
157 return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
158 return map->IsModified() && map->IsRegistered();
159 });
160 }
161
145 /// Mark the specified region as being invalidated 162 /// Mark the specified region as being invalidated
146 void InvalidateRegion(VAddr addr, u64 size) { 163 void InvalidateRegion(VAddr addr, u64 size) {
147 std::lock_guard lock{mutex}; 164 std::lock_guard lock{mutex};
@@ -154,6 +171,77 @@ public:
154 } 171 }
155 } 172 }
156 173
174 void OnCPUWrite(VAddr addr, std::size_t size) {
175 std::lock_guard lock{mutex};
176
177 for (const auto& object : GetMapsInRange(addr, size)) {
178 if (object->IsMemoryMarked() && object->IsRegistered()) {
179 UnmarkMemory(object);
180 object->SetSyncPending(true);
181 marked_for_unregister.emplace_back(object);
182 }
183 }
184 }
185
186 void SyncGuestHost() {
187 std::lock_guard lock{mutex};
188
189 for (const auto& object : marked_for_unregister) {
190 if (object->IsRegistered()) {
191 object->SetSyncPending(false);
192 Unregister(object);
193 }
194 }
195 marked_for_unregister.clear();
196 }
197
198 void CommitAsyncFlushes() {
199 if (uncommitted_flushes) {
200 auto commit_list = std::make_shared<std::list<MapInterval>>();
201 for (auto& map : *uncommitted_flushes) {
202 if (map->IsRegistered() && map->IsModified()) {
203 // TODO(Blinkhawk): Implement backend asynchronous flushing
204 // AsyncFlushMap(map)
205 commit_list->push_back(map);
206 }
207 }
208 if (!commit_list->empty()) {
209 committed_flushes.push_back(commit_list);
210 } else {
211 committed_flushes.emplace_back();
212 }
213 } else {
214 committed_flushes.emplace_back();
215 }
216 uncommitted_flushes.reset();
217 }
218
219 bool ShouldWaitAsyncFlushes() const {
220 return !committed_flushes.empty() && committed_flushes.front() != nullptr;
221 }
222
223 bool HasUncommittedFlushes() const {
224 return uncommitted_flushes != nullptr;
225 }
226
227 void PopAsyncFlushes() {
228 if (committed_flushes.empty()) {
229 return;
230 }
231 auto& flush_list = committed_flushes.front();
232 if (!flush_list) {
233 committed_flushes.pop_front();
234 return;
235 }
236 for (MapInterval& map : *flush_list) {
237 if (map->IsRegistered()) {
238 // TODO(Blinkhawk): Replace this for reading the asynchronous flush
239 FlushMap(map);
240 }
241 }
242 committed_flushes.pop_front();
243 }
244
157 virtual BufferType GetEmptyBuffer(std::size_t size) = 0; 245 virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
158 246
159protected: 247protected:
@@ -196,17 +284,30 @@ protected:
196 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; 284 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
197 mapped_addresses.insert({interval, new_map}); 285 mapped_addresses.insert({interval, new_map});
198 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 286 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
287 new_map->SetMemoryMarked(true);
199 if (inherit_written) { 288 if (inherit_written) {
200 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); 289 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
201 new_map->MarkAsWritten(true); 290 new_map->MarkAsWritten(true);
202 } 291 }
203 } 292 }
204 293
205 /// Unregisters an object from the cache 294 void UnmarkMemory(const MapInterval& map) {
206 void Unregister(MapInterval& map) { 295 if (!map->IsMemoryMarked()) {
296 return;
297 }
207 const std::size_t size = map->GetEnd() - map->GetStart(); 298 const std::size_t size = map->GetEnd() - map->GetStart();
208 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); 299 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
300 map->SetMemoryMarked(false);
301 }
302
303 /// Unregisters an object from the cache
304 void Unregister(const MapInterval& map) {
305 UnmarkMemory(map);
209 map->MarkAsRegistered(false); 306 map->MarkAsRegistered(false);
307 if (map->IsSyncPending()) {
308 marked_for_unregister.remove(map);
309 map->SetSyncPending(false);
310 }
210 if (map->IsWritten()) { 311 if (map->IsWritten()) {
211 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 312 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
212 } 313 }
@@ -264,6 +365,9 @@ private:
264 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); 365 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
265 if (modified_inheritance) { 366 if (modified_inheritance) {
266 new_map->MarkAsModified(true, GetModifiedTicks()); 367 new_map->MarkAsModified(true, GetModifiedTicks());
368 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
369 MarkForAsyncFlush(new_map);
370 }
267 } 371 }
268 Register(new_map, write_inheritance); 372 Register(new_map, write_inheritance);
269 return new_map; 373 return new_map;
@@ -450,6 +554,13 @@ private:
450 return false; 554 return false;
451 } 555 }
452 556
557 void MarkForAsyncFlush(MapInterval& map) {
558 if (!uncommitted_flushes) {
559 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
560 }
561 uncommitted_flushes->insert(map);
562 }
563
453 VideoCore::RasterizerInterface& rasterizer; 564 VideoCore::RasterizerInterface& rasterizer;
454 Core::System& system; 565 Core::System& system;
455 566
@@ -479,6 +590,10 @@ private:
479 u64 modified_ticks = 0; 590 u64 modified_ticks = 0;
480 591
481 std::vector<u8> staging_buffer; 592 std::vector<u8> staging_buffer;
593 std::list<MapInterval> marked_for_unregister;
594
595 std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
596 std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
482 597
483 std::recursive_mutex mutex; 598 std::recursive_mutex mutex;
484}; 599};
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index b0956029d..29d8b26f3 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -46,6 +46,22 @@ public:
46 return is_registered; 46 return is_registered;
47 } 47 }
48 48
49 void SetMemoryMarked(bool is_memory_marked_) {
50 is_memory_marked = is_memory_marked_;
51 }
52
53 bool IsMemoryMarked() const {
54 return is_memory_marked;
55 }
56
57 void SetSyncPending(bool is_sync_pending_) {
58 is_sync_pending = is_sync_pending_;
59 }
60
61 bool IsSyncPending() const {
62 return is_sync_pending;
63 }
64
49 VAddr GetStart() const { 65 VAddr GetStart() const {
50 return start; 66 return start;
51 } 67 }
@@ -83,6 +99,8 @@ private:
83 bool is_written{}; 99 bool is_written{};
84 bool is_modified{}; 100 bool is_modified{};
85 bool is_registered{}; 101 bool is_registered{};
102 bool is_memory_marked{};
103 bool is_sync_pending{};
86 u64 ticks{}; 104 u64 ticks{};
87}; 105};
88 106
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 713c14182..324dafdcd 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,7 +12,7 @@
12 12
13namespace Tegra { 13namespace Tegra {
14 14
15DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} 15DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
16 16
17DmaPusher::~DmaPusher() = default; 17DmaPusher::~DmaPusher() = default;
18 18
@@ -21,17 +21,20 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
21void DmaPusher::DispatchCalls() { 21void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 gpu.SyncGuestHost();
24 // On entering GPU code, assume all memory may be touched by the ARM core. 25 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().OnMemoryWrite(); 26 gpu.Maxwell3D().OnMemoryWrite();
26 27
27 dma_pushbuffer_subindex = 0; 28 dma_pushbuffer_subindex = 0;
28 29
29 while (Core::System::GetInstance().IsPoweredOn()) { 30 while (system.IsPoweredOn()) {
30 if (!Step()) { 31 if (!Step()) {
31 break; 32 break;
32 } 33 }
33 } 34 }
34 gpu.FlushCommands(); 35 gpu.FlushCommands();
36 gpu.SyncGuestHost();
37 gpu.OnCommandListEnd();
35} 38}
36 39
37bool DmaPusher::Step() { 40bool DmaPusher::Step() {
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 6ab06518f..d6188614a 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -10,6 +10,10 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13namespace Core {
14class System;
15}
16
13namespace Tegra { 17namespace Tegra {
14 18
15enum class SubmissionMode : u32 { 19enum class SubmissionMode : u32 {
@@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
56 */ 60 */
57class DmaPusher { 61class DmaPusher {
58public: 62public:
59 explicit DmaPusher(GPU& gpu); 63 explicit DmaPusher(Core::System& system, GPU& gpu);
60 ~DmaPusher(); 64 ~DmaPusher();
61 65
62 void Push(CommandList&& entries) { 66 void Push(CommandList&& entries) {
@@ -72,8 +76,6 @@ private:
72 76
73 void CallMethod(u32 argument) const; 77 void CallMethod(u32 argument) const;
74 78
75 GPU& gpu;
76
77 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once 79 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
78 80
79 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 81 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
@@ -92,6 +94,9 @@ private:
92 94
93 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 95 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
94 bool ib_enable{true}; ///< IB mode enabled 96 bool ib_enable{true}; ///< IB mode enabled
97
98 GPU& gpu;
99 Core::System& system;
95}; 100};
96 101
97} // namespace Tegra 102} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 85d308e26..bace6affb 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -28,7 +28,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
28 } 28 }
29} 29}
30 30
31std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { 31static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
32 const u32 line_a = src_2 - src_1; 32 const u32 line_a = src_2 - src_1;
33 const u32 line_b = dst_2 - dst_1; 33 const u32 line_b = dst_2 - dst_1;
34 const u32 excess = std::max<s32>(0, line_a - src_line + src_1); 34 const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ba63b44b4..2824ed707 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
92 color_mask.A.Assign(1); 92 color_mask.A.Assign(1);
93 } 93 }
94 94
95 for (auto& format : regs.vertex_attrib_format) {
96 format.constant.Assign(1);
97 }
98
95 // NVN games expect these values to be enabled at boot 99 // NVN games expect these values to be enabled at boot
96 regs.rasterize_enable = 1; 100 regs.rasterize_enable = 1;
97 regs.rt_separate_frag_data = 1; 101 regs.rt_separate_frag_data = 1;
@@ -400,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() {
400 404
401 switch (regs.query.query_get.operation) { 405 switch (regs.query.query_get.operation) {
402 case Regs::QueryOperation::Release: 406 case Regs::QueryOperation::Release:
403 StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); 407 if (regs.query.query_get.fence == 1) {
408 rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
409 } else {
410 StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
411 }
404 break; 412 break;
405 case Regs::QueryOperation::Acquire: 413 case Regs::QueryOperation::Acquire:
406 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that 414 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@@ -479,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
479 const u32 increment = regs.sync_info.increment.Value(); 487 const u32 increment = regs.sync_info.increment.Value();
480 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); 488 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
481 if (increment) { 489 if (increment) {
482 system.GPU().IncrementSyncPoint(sync_point); 490 rasterizer.SignalSyncPoint(sync_point);
483 } 491 }
484} 492}
485 493
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 5cf6a4cc3..59d5752d2 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1149,7 +1149,7 @@ public:
1149 1149
1150 /// Returns whether the vertex array specified by index is supposed to be 1150 /// Returns whether the vertex array specified by index is supposed to be
1151 /// accessed per instance or not. 1151 /// accessed per instance or not.
1152 bool IsInstancingEnabled(u32 index) const { 1152 bool IsInstancingEnabled(std::size_t index) const {
1153 return is_instanced[index]; 1153 return is_instanced[index];
1154 } 1154 }
1155 } instanced_arrays; 1155 } instanced_arrays;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c2610f992..3bfed6ab8 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
104 write_buffer.resize(dst_size); 104 write_buffer.resize(dst_size);
105 } 105 }
106 106
107 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 107 if (Settings::IsGPULevelExtreme()) {
108 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 108 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
109 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
110 } else {
111 memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
112 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
113 }
109 114
110 Texture::UnswizzleSubrect( 115 Texture::UnswizzleSubrect(
111 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, 116 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
@@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() {
136 write_buffer.resize(dst_size); 141 write_buffer.resize(dst_size);
137 } 142 }
138 143
139 if (Settings::values.use_accurate_gpu_emulation) { 144 if (Settings::IsGPULevelExtreme()) {
140 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 145 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
141 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 146 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
142 } else { 147 } else {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
new file mode 100644
index 000000000..dabd1588c
--- /dev/null
+++ b/src/video_core/fence_manager.h
@@ -0,0 +1,170 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <memory>
10#include <queue>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "core/core.h"
15#include "core/memory.h"
16#include "core/settings.h"
17#include "video_core/gpu.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_interface.h"
20
21namespace VideoCommon {
22
23class FenceBase {
24public:
25 FenceBase(u32 payload, bool is_stubbed)
26 : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
27
28 FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
29 : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
30
31 GPUVAddr GetAddress() const {
32 return address;
33 }
34
35 u32 GetPayload() const {
36 return payload;
37 }
38
39 bool IsSemaphore() const {
40 return is_semaphore;
41 }
42
43private:
44 GPUVAddr address;
45 u32 payload;
46 bool is_semaphore;
47
48protected:
49 bool is_stubbed;
50};
51
52template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
53class FenceManager {
54public:
55 void SignalSemaphore(GPUVAddr addr, u32 value) {
56 TryReleasePendingFences();
57 const bool should_flush = ShouldFlush();
58 CommitAsyncFlushes();
59 TFence new_fence = CreateFence(addr, value, !should_flush);
60 fences.push(new_fence);
61 QueueFence(new_fence);
62 if (should_flush) {
63 rasterizer.FlushCommands();
64 }
65 rasterizer.SyncGuestHost();
66 }
67
68 void SignalSyncPoint(u32 value) {
69 TryReleasePendingFences();
70 const bool should_flush = ShouldFlush();
71 CommitAsyncFlushes();
72 TFence new_fence = CreateFence(value, !should_flush);
73 fences.push(new_fence);
74 QueueFence(new_fence);
75 if (should_flush) {
76 rasterizer.FlushCommands();
77 }
78 rasterizer.SyncGuestHost();
79 }
80
81 void WaitPendingFences() {
82 auto& gpu{system.GPU()};
83 auto& memory_manager{gpu.MemoryManager()};
84 while (!fences.empty()) {
85 TFence& current_fence = fences.front();
86 if (ShouldWait()) {
87 WaitFence(current_fence);
88 }
89 PopAsyncFlushes();
90 if (current_fence->IsSemaphore()) {
91 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
92 } else {
93 gpu.IncrementSyncPoint(current_fence->GetPayload());
94 }
95 fences.pop();
96 }
97 }
98
99protected:
100 FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
101 TTextureCache& texture_cache, TTBufferCache& buffer_cache,
102 TQueryCache& query_cache)
103 : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
104 buffer_cache{buffer_cache}, query_cache{query_cache} {}
105
106 virtual ~FenceManager() {}
107
108 /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
109 /// true
110 virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
111 /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
112 virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
113 /// Queues a fence into the backend if the fence isn't stubbed.
114 virtual void QueueFence(TFence& fence) = 0;
115 /// Notifies that the backend fence has been signaled/reached in host GPU.
116 virtual bool IsFenceSignaled(TFence& fence) const = 0;
117 /// Waits until a fence has been signalled by the host GPU.
118 virtual void WaitFence(TFence& fence) = 0;
119
120 Core::System& system;
121 VideoCore::RasterizerInterface& rasterizer;
122 TTextureCache& texture_cache;
123 TTBufferCache& buffer_cache;
124 TQueryCache& query_cache;
125
126private:
127 void TryReleasePendingFences() {
128 auto& gpu{system.GPU()};
129 auto& memory_manager{gpu.MemoryManager()};
130 while (!fences.empty()) {
131 TFence& current_fence = fences.front();
132 if (ShouldWait() && !IsFenceSignaled(current_fence)) {
133 return;
134 }
135 PopAsyncFlushes();
136 if (current_fence->IsSemaphore()) {
137 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
138 } else {
139 gpu.IncrementSyncPoint(current_fence->GetPayload());
140 }
141 fences.pop();
142 }
143 }
144
145 bool ShouldWait() const {
146 return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
147 query_cache.ShouldWaitAsyncFlushes();
148 }
149
150 bool ShouldFlush() const {
151 return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
152 query_cache.HasUncommittedFlushes();
153 }
154
155 void PopAsyncFlushes() {
156 texture_cache.PopAsyncFlushes();
157 buffer_cache.PopAsyncFlushes();
158 query_cache.PopAsyncFlushes();
159 }
160
161 void CommitAsyncFlushes() {
162 texture_cache.CommitAsyncFlushes();
163 buffer_cache.CommitAsyncFlushes();
164 query_cache.CommitAsyncFlushes();
165 }
166
167 std::queue<TFence> fences;
168};
169
170} // namespace VideoCommon
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8acf2eda2..3b7572d61 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render
27 : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { 27 : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
28 auto& rasterizer{renderer->Rasterizer()}; 28 auto& rasterizer{renderer->Rasterizer()};
29 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); 29 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
30 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 30 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
31 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 31 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
32 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); 32 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
33 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); 33 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
@@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
125 return true; 125 return true;
126} 126}
127 127
128u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
129 std::unique_lock lck{flush_request_mutex};
130 const u64 fence = ++last_flush_fence;
131 flush_requests.emplace_back(fence, addr, size);
132 return fence;
133}
134
135void GPU::TickWork() {
136 std::unique_lock lck{flush_request_mutex};
137 while (!flush_requests.empty()) {
138 auto& request = flush_requests.front();
139 const u64 fence = request.fence;
140 const VAddr addr = request.addr;
141 const std::size_t size = request.size;
142 flush_requests.pop_front();
143 flush_request_mutex.unlock();
144 renderer->Rasterizer().FlushRegion(addr, size);
145 current_flush_fence.store(fence);
146 flush_request_mutex.lock();
147 }
148}
149
128u64 GPU::GetTicks() const { 150u64 GPU::GetTicks() const {
129 // This values were reversed engineered by fincs from NVN 151 // This values were reversed engineered by fincs from NVN
130 // The gpu clock is reported in units of 385/625 nanoseconds 152 // The gpu clock is reported in units of 385/625 nanoseconds
@@ -142,6 +164,13 @@ void GPU::FlushCommands() {
142 renderer->Rasterizer().FlushCommands(); 164 renderer->Rasterizer().FlushCommands();
143} 165}
144 166
167void GPU::SyncGuestHost() {
168 renderer->Rasterizer().SyncGuestHost();
169}
170
171void GPU::OnCommandListEnd() {
172 renderer->Rasterizer().ReleaseFences();
173}
145// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence 174// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
146// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. 175// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
147// So the values you see in docs might be multiplied by 4. 176// So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 1a2d747be..5e3eb94e9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,7 +155,23 @@ public:
155 /// Calls a GPU method. 155 /// Calls a GPU method.
156 void CallMethod(const MethodCall& method_call); 156 void CallMethod(const MethodCall& method_call);
157 157
158 /// Flush all current written commands into the host GPU for execution.
158 void FlushCommands(); 159 void FlushCommands();
160 /// Synchronizes CPU writes with Host GPU memory.
161 void SyncGuestHost();
162 /// Signal the ending of command list.
163 virtual void OnCommandListEnd();
164
165 /// Request a host GPU memory flush from the CPU.
166 u64 RequestFlush(VAddr addr, std::size_t size);
167
168 /// Obtains current flush request fence id.
169 u64 CurrentFlushRequestFence() const {
170 return current_flush_fence.load(std::memory_order_relaxed);
171 }
172
173 /// Tick pending requests within the GPU.
174 void TickWork();
159 175
160 /// Returns a reference to the Maxwell3D GPU engine. 176 /// Returns a reference to the Maxwell3D GPU engine.
161 Engines::Maxwell3D& Maxwell3D(); 177 Engines::Maxwell3D& Maxwell3D();
@@ -325,6 +341,19 @@ private:
325 341
326 std::condition_variable sync_cv; 342 std::condition_variable sync_cv;
327 343
344 struct FlushRequest {
345 FlushRequest(u64 fence, VAddr addr, std::size_t size)
346 : fence{fence}, addr{addr}, size{size} {}
347 u64 fence;
348 VAddr addr;
349 std::size_t size;
350 };
351
352 std::list<FlushRequest> flush_requests;
353 std::atomic<u64> current_flush_fence{};
354 u64 last_flush_fence{};
355 std::mutex flush_request_mutex;
356
328 const bool is_async; 357 const bool is_async;
329}; 358};
330 359
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 20e73a37e..53305ab43 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
52 gpu_thread.WaitIdle(); 52 gpu_thread.WaitIdle();
53} 53}
54 54
55void GPUAsynch::OnCommandListEnd() {
56 gpu_thread.OnCommandListEnd();
57}
58
55} // namespace VideoCommon 59} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 03fd0eef0..517658612 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -32,6 +32,8 @@ public:
32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
33 void WaitIdle() const override; 33 void WaitIdle() const override;
34 34
35 void OnCommandListEnd() override;
36
35protected: 37protected:
36 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; 38 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
37 39
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 10cda686b..c3bb4fe06 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/frontend/emu_window.h" 8#include "core/frontend/emu_window.h"
9#include "core/settings.h"
9#include "video_core/dma_pusher.h" 10#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h" 11#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h" 12#include "video_core/gpu_thread.h"
@@ -14,8 +15,9 @@
14namespace VideoCommon::GPUThread { 15namespace VideoCommon::GPUThread {
15 16
16/// Runs the GPU thread 17/// Runs the GPU thread
17static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, 18static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
18 Tegra::DmaPusher& dma_pusher, SynchState& state) { 19 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
20 SynchState& state) {
19 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
20 22
21 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
@@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
37 dma_pusher.DispatchCalls(); 39 dma_pusher.DispatchCalls();
38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 40 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
39 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 41 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
42 } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
43 renderer.Rasterizer().ReleaseFences();
44 } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
45 system.GPU().TickWork();
40 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 46 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
41 renderer.Rasterizer().FlushRegion(data->addr, data->size); 47 renderer.Rasterizer().FlushRegion(data->addr, data->size);
42 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 48 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
43 renderer.Rasterizer().InvalidateRegion(data->addr, data->size); 49 renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
44 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { 50 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
45 return; 51 return;
46 } else { 52 } else {
@@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
65void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 71void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
66 Core::Frontend::GraphicsContext& context, 72 Core::Frontend::GraphicsContext& context,
67 Tegra::DmaPusher& dma_pusher) { 73 Tegra::DmaPusher& dma_pusher) {
68 thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), 74 thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
69 std::ref(state)}; 75 std::ref(context), std::ref(dma_pusher), std::ref(state)};
70} 76}
71 77
72void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 78void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
78} 84}
79 85
80void ThreadManager::FlushRegion(VAddr addr, u64 size) { 86void ThreadManager::FlushRegion(VAddr addr, u64 size) {
81 PushCommand(FlushRegionCommand(addr, size)); 87 if (!Settings::IsGPULevelHigh()) {
88 PushCommand(FlushRegionCommand(addr, size));
89 return;
90 }
91 if (!Settings::IsGPULevelExtreme()) {
92 return;
93 }
94 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
95 auto& gpu = system.GPU();
96 u64 fence = gpu.RequestFlush(addr, size);
97 PushCommand(GPUTickCommand());
98 while (fence > gpu.CurrentFlushRequestFence()) {
99 }
100 }
82} 101}
83 102
84void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 103void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
85 system.Renderer().Rasterizer().InvalidateRegion(addr, size); 104 system.Renderer().Rasterizer().OnCPUWrite(addr, size);
86} 105}
87 106
88void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 107void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 108 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size); 109 system.Renderer().Rasterizer().OnCPUWrite(addr, size);
91} 110}
92 111
93void ThreadManager::WaitIdle() const { 112void ThreadManager::WaitIdle() const {
@@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
95 } 114 }
96} 115}
97 116
117void ThreadManager::OnCommandListEnd() {
118 PushCommand(OnCommandListEndCommand());
119}
120
98u64 ThreadManager::PushCommand(CommandData&& command_data) { 121u64 ThreadManager::PushCommand(CommandData&& command_data) {
99 const u64 fence{++state.last_fence}; 122 const u64 fence{++state.last_fence};
100 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 123 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cd74ad330..5a28335d6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
70 u64 size; 70 u64 size;
71}; 71};
72 72
73/// Command called within the gpu, to schedule actions after a command list end
74struct OnCommandListEndCommand final {};
75
76/// Command to make the gpu look into pending requests
77struct GPUTickCommand final {};
78
73using CommandData = 79using CommandData =
74 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, 80 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
75 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; 81 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
82 GPUTickCommand>;
76 83
77struct CommandDataContainer { 84struct CommandDataContainer {
78 CommandDataContainer() = default; 85 CommandDataContainer() = default;
@@ -122,6 +129,8 @@ public:
122 // Wait until the gpu thread is idle. 129 // Wait until the gpu thread is idle.
123 void WaitIdle() const; 130 void WaitIdle() const;
124 131
132 void OnCommandListEnd();
133
125private: 134private:
126 /// Pushes a command to be executed by the GPU thread 135 /// Pushes a command to be executed by the GPU thread
127 u64 PushCommand(CommandData&& command_data); 136 u64 PushCommand(CommandData&& command_data);
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 5ea2b01f2..2f75f8801 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -12,10 +12,12 @@
12#include <mutex> 12#include <mutex>
13#include <optional> 13#include <optional>
14#include <unordered_map> 14#include <unordered_map>
15#include <unordered_set>
15#include <vector> 16#include <vector>
16 17
17#include "common/assert.h" 18#include "common/assert.h"
18#include "core/core.h" 19#include "core/core.h"
20#include "core/settings.h"
19#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h" 22#include "video_core/gpu.h"
21#include "video_core/memory_manager.h" 23#include "video_core/memory_manager.h"
@@ -130,6 +132,9 @@ public:
130 } 132 }
131 133
132 query->BindCounter(Stream(type).Current(), timestamp); 134 query->BindCounter(Stream(type).Current(), timestamp);
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 AsyncFlushQuery(cpu_addr);
137 }
133 } 138 }
134 139
135 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 140 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -170,6 +175,37 @@ public:
170 return streams[static_cast<std::size_t>(type)]; 175 return streams[static_cast<std::size_t>(type)];
171 } 176 }
172 177
178 void CommitAsyncFlushes() {
179 committed_flushes.push_back(uncommitted_flushes);
180 uncommitted_flushes.reset();
181 }
182
183 bool HasUncommittedFlushes() const {
184 return uncommitted_flushes != nullptr;
185 }
186
187 bool ShouldWaitAsyncFlushes() const {
188 if (committed_flushes.empty()) {
189 return false;
190 }
191 return committed_flushes.front() != nullptr;
192 }
193
194 void PopAsyncFlushes() {
195 if (committed_flushes.empty()) {
196 return;
197 }
198 auto& flush_list = committed_flushes.front();
199 if (!flush_list) {
200 committed_flushes.pop_front();
201 return;
202 }
203 for (VAddr query_address : *flush_list) {
204 FlushAndRemoveRegion(query_address, 4);
205 }
206 committed_flushes.pop_front();
207 }
208
173protected: 209protected:
174 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; 210 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
175 211
@@ -224,6 +260,13 @@ private:
224 return found != std::end(contents) ? &*found : nullptr; 260 return found != std::end(contents) ? &*found : nullptr;
225 } 261 }
226 262
263 void AsyncFlushQuery(VAddr addr) {
264 if (!uncommitted_flushes) {
265 uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
266 }
267 uncommitted_flushes->insert(addr);
268 }
269
227 static constexpr std::uintptr_t PAGE_SIZE = 4096; 270 static constexpr std::uintptr_t PAGE_SIZE = 4096;
228 static constexpr unsigned PAGE_SHIFT = 12; 271 static constexpr unsigned PAGE_SHIFT = 12;
229 272
@@ -235,6 +278,9 @@ private:
235 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; 278 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
236 279
237 std::array<CounterStream, VideoCore::NumQueryTypes> streams; 280 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
281
282 std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
283 std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
238}; 284};
239 285
240template <class QueryCache, class HostCounter> 286template <class QueryCache, class HostCounter>
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 8ae5b9c4e..603f61952 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,15 +49,33 @@ public:
49 /// Records a GPU query and caches it 49 /// Records a GPU query and caches it
50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; 50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
51 51
52 /// Signal a GPU based semaphore as a fence
53 virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
54
55 /// Signal a GPU based syncpoint as a fence
56 virtual void SignalSyncPoint(u32 value) = 0;
57
58 /// Release all pending fences.
59 virtual void ReleaseFences() = 0;
60
52 /// Notify rasterizer that all caches should be flushed to Switch memory 61 /// Notify rasterizer that all caches should be flushed to Switch memory
53 virtual void FlushAll() = 0; 62 virtual void FlushAll() = 0;
54 63
55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 64 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
56 virtual void FlushRegion(VAddr addr, u64 size) = 0; 65 virtual void FlushRegion(VAddr addr, u64 size) = 0;
57 66
67 /// Check if the the specified memory area requires flushing to CPU Memory.
68 virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
69
58 /// Notify rasterizer that any caches of the specified region should be invalidated 70 /// Notify rasterizer that any caches of the specified region should be invalidated
59 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 71 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
60 72
73 /// Notify rasterizer that any caches of the specified region are desync with guest
74 virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
75
76 /// Sync memory between guest and host.
77 virtual void SyncGuestHost() = 0;
78
61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 79 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
62 /// and invalidated 80 /// and invalidated
63 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index cb5792407..4efce0de7 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
52} 52}
53 53
54void OGLBufferCache::WriteBarrier() { 54void OGLBufferCache::WriteBarrier() {
55 glMemoryBarrier(GL_ALL_BARRIER_BITS); 55 glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
56} 56}
57 57
58GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { 58GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
@@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
72void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 72void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
73 u8* data) { 73 u8* data) {
74 MICROPROFILE_SCOPE(OpenGL_Buffer_Download); 74 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
75 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
75 glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), 76 glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
76 static_cast<GLsizeiptr>(size), data); 77 static_cast<GLsizeiptr>(size), data);
77} 78}
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
new file mode 100644
index 000000000..99ddcb3f8
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -0,0 +1,72 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6
7#include "video_core/renderer_opengl/gl_fence_manager.h"
8
9namespace OpenGL {
10
11GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
12 : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
13
14GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
15 : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
16
17GLInnerFence::~GLInnerFence() = default;
18
19void GLInnerFence::Queue() {
20 if (is_stubbed) {
21 return;
22 }
23 ASSERT(sync_object.handle == 0);
24 sync_object.Create();
25}
26
27bool GLInnerFence::IsSignaled() const {
28 if (is_stubbed) {
29 return true;
30 }
31 ASSERT(sync_object.handle != 0);
32 GLsizei length;
33 GLint sync_status;
34 glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
35 return sync_status == GL_SIGNALED;
36}
37
38void GLInnerFence::Wait() {
39 if (is_stubbed) {
40 return;
41 }
42 ASSERT(sync_object.handle != 0);
43 glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
44}
45
46FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
47 VideoCore::RasterizerInterface& rasterizer,
48 TextureCacheOpenGL& texture_cache,
49 OGLBufferCache& buffer_cache, QueryCache& query_cache)
50 : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
51
52Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
53 return std::make_shared<GLInnerFence>(value, is_stubbed);
54}
55
56Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
57 return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
58}
59
60void FenceManagerOpenGL::QueueFence(Fence& fence) {
61 fence->Queue();
62}
63
64bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
65 return fence->IsSignaled();
66}
67
68void FenceManagerOpenGL::WaitFence(Fence& fence) {
69 fence->Wait();
70}
71
72} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
new file mode 100644
index 000000000..c917b3343
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -0,0 +1,53 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <glad/glad.h>
9
10#include "common/common_types.h"
11#include "video_core/fence_manager.h"
12#include "video_core/renderer_opengl/gl_buffer_cache.h"
13#include "video_core/renderer_opengl/gl_query_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_texture_cache.h"
16
17namespace OpenGL {
18
19class GLInnerFence : public VideoCommon::FenceBase {
20public:
21 GLInnerFence(u32 payload, bool is_stubbed);
22 GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
23 ~GLInnerFence();
24
25 void Queue();
26
27 bool IsSignaled() const;
28
29 void Wait();
30
31private:
32 OGLSync sync_object;
33};
34
35using Fence = std::shared_ptr<GLInnerFence>;
36using GenericFenceManager =
37 VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
38
39class FenceManagerOpenGL final : public GenericFenceManager {
40public:
41 FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
42 TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
43 QueryCache& query_cache);
44
45protected:
46 Fence CreateFence(u32 value, bool is_stubbed) override;
47 Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
48 void QueueFence(Fence& fence) override;
49 bool IsFenceSignaled(Fence& fence) const override;
50 void WaitFence(Fence& fence) override;
51};
52
53} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 175374f0d..4c16c89d2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
99 ScreenInfo& info, GLShader::ProgramManager& program_manager, 99 ScreenInfo& info, GLShader::ProgramManager& program_manager,
100 StateTracker& state_tracker) 100 StateTracker& state_tracker)
101 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, 101 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
102 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, 102 shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
103 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, 103 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
104 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { 104 fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
105 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
105 CheckExtensions(); 106 CheckExtensions();
106} 107}
107 108
@@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
599 EndTransformFeedback(); 600 EndTransformFeedback();
600 601
601 ++num_queued_commands; 602 ++num_queued_commands;
603
604 system.GPU().TickWork();
602} 605}
603 606
604void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 607void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
649 query_cache.FlushRegion(addr, size); 652 query_cache.FlushRegion(addr, size);
650} 653}
651 654
655bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
656 if (!Settings::IsGPULevelHigh()) {
657 return buffer_cache.MustFlushRegion(addr, size);
658 }
659 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
660}
661
652void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 662void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
653 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 663 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
654 if (addr == 0 || size == 0) { 664 if (addr == 0 || size == 0) {
@@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
660 query_cache.InvalidateRegion(addr, size); 670 query_cache.InvalidateRegion(addr, size);
661} 671}
662 672
673void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
674 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
675 if (addr == 0 || size == 0) {
676 return;
677 }
678 texture_cache.OnCPUWrite(addr, size);
679 shader_cache.InvalidateRegion(addr, size);
680 buffer_cache.OnCPUWrite(addr, size);
681 query_cache.InvalidateRegion(addr, size);
682}
683
684void RasterizerOpenGL::SyncGuestHost() {
685 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
686 texture_cache.SyncGuestHost();
687 buffer_cache.SyncGuestHost();
688}
689
690void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
691 auto& gpu{system.GPU()};
692 if (!gpu.IsAsync()) {
693 auto& memory_manager{gpu.MemoryManager()};
694 memory_manager.Write<u32>(addr, value);
695 return;
696 }
697 fence_manager.SignalSemaphore(addr, value);
698}
699
700void RasterizerOpenGL::SignalSyncPoint(u32 value) {
701 auto& gpu{system.GPU()};
702 if (!gpu.IsAsync()) {
703 gpu.IncrementSyncPoint(value);
704 return;
705 }
706 fence_manager.SignalSyncPoint(value);
707}
708
709void RasterizerOpenGL::ReleaseFences() {
710 auto& gpu{system.GPU()};
711 if (!gpu.IsAsync()) {
712 return;
713 }
714 fence_manager.WaitPendingFences();
715}
716
663void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 717void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
664 if (Settings::values.use_accurate_gpu_emulation) { 718 if (Settings::IsGPULevelExtreme()) {
665 FlushRegion(addr, size); 719 FlushRegion(addr, size);
666 } 720 }
667 InvalidateRegion(addr, size); 721 InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index caea174d2..ebd2173eb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
23#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_fence_manager.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 27#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 29#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -66,7 +67,13 @@ public:
66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 67 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
67 void FlushAll() override; 68 void FlushAll() override;
68 void FlushRegion(VAddr addr, u64 size) override; 69 void FlushRegion(VAddr addr, u64 size) override;
70 bool MustFlushRegion(VAddr addr, u64 size) override;
69 void InvalidateRegion(VAddr addr, u64 size) override; 71 void InvalidateRegion(VAddr addr, u64 size) override;
72 void OnCPUWrite(VAddr addr, u64 size) override;
73 void SyncGuestHost() override;
74 void SignalSemaphore(GPUVAddr addr, u32 value) override;
75 void SignalSyncPoint(u32 value) override;
76 void ReleaseFences() override;
70 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 77 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
71 void FlushCommands() override; 78 void FlushCommands() override;
72 void TickFrame() override; 79 void TickFrame() override;
@@ -222,6 +229,8 @@ private:
222 SamplerCacheOpenGL sampler_cache; 229 SamplerCacheOpenGL sampler_cache;
223 FramebufferCacheOpenGL framebuffer_cache; 230 FramebufferCacheOpenGL framebuffer_cache;
224 QueryCache query_cache; 231 QueryCache query_cache;
232 OGLBufferCache buffer_cache;
233 FenceManagerOpenGL fence_manager;
225 234
226 Core::System& system; 235 Core::System& system;
227 ScreenInfo& screen_info; 236 ScreenInfo& screen_info;
@@ -229,7 +238,6 @@ private:
229 StateTracker& state_tracker; 238 StateTracker& state_tracker;
230 239
231 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 240 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
232 OGLBufferCache buffer_cache;
233 241
234 GLint vertex_binding = 0; 242 GLint vertex_binding = 0;
235 243
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 6d2ff20f9..f63156b8d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
448 448
449 // Look up shader in the cache based on address 449 // Look up shader in the cache based on address
450 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; 450 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
451 Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; 451 Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
452 if (shader) { 452 if (shader) {
453 return last_shaders[static_cast<std::size_t>(program)] = shader; 453 return last_shaders[static_cast<std::size_t>(program)] = shader;
454 } 454 }
@@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
477 const std::size_t size_in_bytes = code.size() * sizeof(u64); 477 const std::size_t size_in_bytes = code.size() * sizeof(u64);
478 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); 478 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
479 } 479 }
480 Register(shader); 480
481 if (cpu_addr) {
482 Register(shader);
483 } else {
484 null_shader = shader;
485 }
481 486
482 return last_shaders[static_cast<std::size_t>(program)] = shader; 487 return last_shaders[static_cast<std::size_t>(program)] = shader;
483} 488}
@@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
486 auto& memory_manager{system.GPU().MemoryManager()}; 491 auto& memory_manager{system.GPU().MemoryManager()};
487 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; 492 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
488 493
489 auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; 494 auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
490 if (kernel) { 495 if (kernel) {
491 return kernel; 496 return kernel;
492 } 497 }
@@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
507 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); 512 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
508 } 513 }
509 514
510 Register(kernel); 515 if (cpu_addr) {
516 Register(kernel);
517 } else {
518 null_kernel = kernel;
519 }
511 return kernel; 520 return kernel;
512} 521}
513 522
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index c836df5bd..91690b470 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -125,6 +125,9 @@ private:
125 ShaderDiskCacheOpenGL disk_cache; 125 ShaderDiskCacheOpenGL disk_cache;
126 std::unordered_map<u64, PrecompiledShader> runtime_cache; 126 std::unordered_map<u64, PrecompiledShader> runtime_cache;
127 127
128 Shader null_shader{};
129 Shader null_kernel{};
130
128 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 131 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
129}; 132};
130 133
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 22242cce9..0cd3ad7e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -484,7 +484,7 @@ private:
484 code.AddLine("switch (jmp_to) {{"); 484 code.AddLine("switch (jmp_to) {{");
485 485
486 for (const auto& pair : ir.GetBasicBlocks()) { 486 for (const auto& pair : ir.GetBasicBlocks()) {
487 const auto [address, bb] = pair; 487 const auto& [address, bb] = pair;
488 code.AddLine("case 0x{:X}U: {{", address); 488 code.AddLine("case 0x{:X}U: {{", address);
489 ++code.scope; 489 ++code.scope;
490 490
@@ -1484,8 +1484,8 @@ private:
1484 dy += '('; 1484 dy += '(';
1485 1485
1486 for (std::size_t index = 0; index < components; ++index) { 1486 for (std::size_t index = 0; index < components; ++index) {
1487 const auto operand_x{derivates.at(index * 2)}; 1487 const auto& operand_x{derivates.at(index * 2)};
1488 const auto operand_y{derivates.at(index * 2 + 1)}; 1488 const auto& operand_y{derivates.at(index * 2 + 1)};
1489 dx += Visit(operand_x).AsFloat(); 1489 dx += Visit(operand_x).AsFloat();
1490 dy += Visit(operand_y).AsFloat(); 1490 dy += Visit(operand_y).AsFloat();
1491 1491
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 2bb376555..be1c31978 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -2,10 +2,12 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
5#include <tuple> 6#include <tuple>
6 7
7#include <boost/functional/hash.hpp> 8#include <boost/functional/hash.hpp>
8 9
10#include "common/cityhash.h"
9#include "common/common_types.h" 11#include "common/common_types.h"
10#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 12#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
11 13
@@ -13,289 +15,352 @@ namespace Vulkan {
13 15
14namespace { 16namespace {
15 17
16constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) { 18constexpr std::size_t POINT = 0;
17 const FixedPipelineState::StencilFace front_stencil( 19constexpr std::size_t LINE = 1;
18 regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass, 20constexpr std::size_t POLYGON = 2;
19 regs.stencil_front_func_func); 21constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
20 const FixedPipelineState::StencilFace back_stencil = 22 POINT, // Points
21 regs.stencil_two_side_enable 23 LINE, // Lines
22 ? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail, 24 LINE, // LineLoop
23 regs.stencil_back_op_zpass, 25 LINE, // LineStrip
24 regs.stencil_back_func_func) 26 POLYGON, // Triangles
25 : front_stencil; 27 POLYGON, // TriangleStrip
26 return FixedPipelineState::DepthStencil( 28 POLYGON, // TriangleFan
27 regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1, 29 POLYGON, // Quads
28 regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil); 30 POLYGON, // QuadStrip
29} 31 POLYGON, // Polygon
30 32 LINE, // LinesAdjacency
31constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) { 33 LINE, // LineStripAdjacency
32 return FixedPipelineState::InputAssembly( 34 POLYGON, // TrianglesAdjacency
33 regs.draw.topology, regs.primitive_restart.enabled, 35 POLYGON, // TriangleStripAdjacency
34 regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f); 36 POLYGON, // Patches
35} 37};
36
37constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState(
38 const Maxwell& regs, std::size_t render_target) {
39 const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target];
40 const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0};
41
42 const FixedPipelineState::BlendingAttachment default_blending(
43 false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
44 Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
45 Maxwell::Blend::Factor::Zero, components);
46 if (render_target >= regs.rt_control.count) {
47 return default_blending;
48 }
49 38
50 if (!regs.independent_blend_enable) { 39} // Anonymous namespace
51 const auto& src = regs.blend;
52 if (!src.enable[render_target]) {
53 return default_blending;
54 }
55 return FixedPipelineState::BlendingAttachment(
56 true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
57 src.factor_source_a, src.factor_dest_a, components);
58 }
59 40
60 if (!regs.blend.enable[render_target]) { 41void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept {
61 return default_blending; 42 raw = 0;
43 front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
44 front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
45 front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
46 front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
47 if (regs.stencil_two_side_enable) {
48 back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
49 back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
50 back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
51 back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
52 } else {
53 back.action_stencil_fail.Assign(front.action_stencil_fail);
54 back.action_depth_fail.Assign(front.action_depth_fail);
55 back.action_depth_pass.Assign(front.action_depth_pass);
56 back.test_func.Assign(front.test_func);
62 } 57 }
63 const auto& src = regs.independent_blend[render_target]; 58 depth_test_enable.Assign(regs.depth_test_enable);
64 return FixedPipelineState::BlendingAttachment( 59 depth_write_enable.Assign(regs.depth_write_enabled);
65 true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, 60 depth_bounds_enable.Assign(regs.depth_bounds_enable);
66 src.factor_source_a, src.factor_dest_a, components); 61 stencil_enable.Assign(regs.stencil_enable);
62 depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
67} 63}
68 64
69constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) { 65void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
70 return FixedPipelineState::ColorBlending( 66 const auto& clip = regs.view_volume_clip_control;
71 {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a},
72 regs.rt_control.count,
73 {GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1),
74 GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3),
75 GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5),
76 GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)});
77}
78
79constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) {
80 return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim,
81 regs.tess_mode.spacing, regs.tess_mode.cw != 0);
82}
83
84constexpr std::size_t Point = 0;
85constexpr std::size_t Line = 1;
86constexpr std::size_t Polygon = 2;
87constexpr std::array PolygonOffsetEnableLUT = {
88 Point, // Points
89 Line, // Lines
90 Line, // LineLoop
91 Line, // LineStrip
92 Polygon, // Triangles
93 Polygon, // TriangleStrip
94 Polygon, // TriangleFan
95 Polygon, // Quads
96 Polygon, // QuadStrip
97 Polygon, // Polygon
98 Line, // LinesAdjacency
99 Line, // LineStripAdjacency
100 Polygon, // TrianglesAdjacency
101 Polygon, // TriangleStripAdjacency
102 Polygon, // Patches
103};
104
105constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) {
106 const std::array enabled_lut = {regs.polygon_offset_point_enable, 67 const std::array enabled_lut = {regs.polygon_offset_point_enable,
107 regs.polygon_offset_line_enable, 68 regs.polygon_offset_line_enable,
108 regs.polygon_offset_fill_enable}; 69 regs.polygon_offset_fill_enable};
109 const auto topology = static_cast<std::size_t>(regs.draw.topology.Value()); 70 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
110 const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
111 71
112 const auto& clip = regs.view_volume_clip_control; 72 u32 packed_front_face = PackFrontFace(regs.front_face);
113 const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
114
115 Maxwell::FrontFace front_face = regs.front_face;
116 if (regs.screen_y_control.triangle_rast_flip != 0 && 73 if (regs.screen_y_control.triangle_rast_flip != 0 &&
117 regs.viewport_transform[0].scale_y > 0.0f) { 74 regs.viewport_transform[0].scale_y > 0.0f) {
118 if (front_face == Maxwell::FrontFace::CounterClockWise) 75 // Flip front face
119 front_face = Maxwell::FrontFace::ClockWise; 76 packed_front_face = 1 - packed_front_face;
120 else if (front_face == Maxwell::FrontFace::ClockWise)
121 front_face = Maxwell::FrontFace::CounterClockWise;
122 } 77 }
123 78
124 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; 79 raw = 0;
125 return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, 80 topology.Assign(topology_index);
126 depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); 81 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
82 cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
83 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
84 depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0);
85 ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
86 cull_face.Assign(PackCullFace(regs.cull_face));
87 front_face.Assign(packed_front_face);
88 polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
89 patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
90 tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value()));
91 tessellation_spacing.Assign(static_cast<u32>(regs.tess_mode.spacing.Value()));
92 tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
93 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
94 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
95 std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
127} 96}
128 97
129} // Anonymous namespace 98void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept {
130 99 for (std::size_t index = 0; index < std::size(attachments); ++index) {
131std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept { 100 attachments[index].Fill(regs, index);
132 return (index << stride) ^ divisor; 101 }
133} 102}
134 103
135bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept { 104void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
136 return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor); 105 const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
137} 106
107 raw = 0;
108 mask_r.Assign(mask.R);
109 mask_g.Assign(mask.G);
110 mask_b.Assign(mask.B);
111 mask_a.Assign(mask.A);
112
113 // TODO: C++20 Use templated lambda to deduplicate code
114
115 if (!regs.independent_blend_enable) {
116 const auto& src = regs.blend;
117 if (!src.enable[index]) {
118 return;
119 }
120 equation_rgb.Assign(PackBlendEquation(src.equation_rgb));
121 equation_a.Assign(PackBlendEquation(src.equation_a));
122 factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb));
123 factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb));
124 factor_source_a.Assign(PackBlendFactor(src.factor_source_a));
125 factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a));
126 enable.Assign(1);
127 return;
128 }
138 129
139std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept { 130 if (!regs.blend.enable[index]) {
140 return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^ 131 return;
141 (static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^ 132 }
142 (static_cast<std::size_t>(offset) << 36); 133 const auto& src = regs.independent_blend[index];
134 equation_rgb.Assign(PackBlendEquation(src.equation_rgb));
135 equation_a.Assign(PackBlendEquation(src.equation_a));
136 factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb));
137 factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb));
138 factor_source_a.Assign(PackBlendFactor(src.factor_source_a));
139 factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a));
140 enable.Assign(1);
143} 141}
144 142
145bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept { 143std::size_t FixedPipelineState::Hash() const noexcept {
146 return std::tie(index, buffer, type, size, offset) == 144 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
147 std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset); 145 return static_cast<std::size_t>(hash);
148} 146}
149 147
150std::size_t FixedPipelineState::StencilFace::Hash() const noexcept { 148bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
151 return static_cast<std::size_t>(action_stencil_fail) ^ 149 return std::memcmp(this, &rhs, sizeof *this) == 0;
152 (static_cast<std::size_t>(action_depth_fail) << 4) ^
153 (static_cast<std::size_t>(action_depth_fail) << 20) ^
154 (static_cast<std::size_t>(action_depth_pass) << 36);
155} 150}
156 151
157bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept { 152FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
158 return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) == 153 FixedPipelineState fixed_state;
159 std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass, 154 fixed_state.rasterizer.Fill(regs);
160 rhs.test_func); 155 fixed_state.depth_stencil.Fill(regs);
156 fixed_state.color_blending.Fill(regs);
157 fixed_state.padding = {};
158 return fixed_state;
161} 159}
162 160
163std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { 161u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
164 return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^ 162 // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
165 (static_cast<std::size_t>(src_rgb_func) << 10) ^ 163 // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
166 (static_cast<std::size_t>(dst_rgb_func) << 15) ^ 164 // Perfect for a hash.
167 (static_cast<std::size_t>(a_equation) << 20) ^ 165 const u32 value = static_cast<u32>(op);
168 (static_cast<std::size_t>(src_a_func) << 25) ^ 166 return value - (value >= 0x200 ? 0x200 : 1);
169 (static_cast<std::size_t>(dst_a_func) << 30) ^
170 (static_cast<std::size_t>(components[0]) << 35) ^
171 (static_cast<std::size_t>(components[1]) << 36) ^
172 (static_cast<std::size_t>(components[2]) << 37) ^
173 (static_cast<std::size_t>(components[3]) << 38);
174} 167}
175 168
176bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const 169Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
177 noexcept { 170 // Read PackComparisonOp for the logic behind this.
178 return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func, 171 return static_cast<Maxwell::ComparisonOp>(packed + 1);
179 dst_a_func, components) ==
180 std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func,
181 rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components);
182} 172}
183 173
184std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { 174u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp op) noexcept {
185 std::size_t hash = num_bindings ^ (num_attributes << 32); 175 switch (op) {
186 for (std::size_t i = 0; i < num_bindings; ++i) { 176 case Maxwell::StencilOp::Keep:
187 boost::hash_combine(hash, bindings[i].Hash()); 177 case Maxwell::StencilOp::KeepOGL:
188 } 178 return 0;
189 for (std::size_t i = 0; i < num_attributes; ++i) { 179 case Maxwell::StencilOp::Zero:
190 boost::hash_combine(hash, attributes[i].Hash()); 180 case Maxwell::StencilOp::ZeroOGL:
181 return 1;
182 case Maxwell::StencilOp::Replace:
183 case Maxwell::StencilOp::ReplaceOGL:
184 return 2;
185 case Maxwell::StencilOp::Incr:
186 case Maxwell::StencilOp::IncrOGL:
187 return 3;
188 case Maxwell::StencilOp::Decr:
189 case Maxwell::StencilOp::DecrOGL:
190 return 4;
191 case Maxwell::StencilOp::Invert:
192 case Maxwell::StencilOp::InvertOGL:
193 return 5;
194 case Maxwell::StencilOp::IncrWrap:
195 case Maxwell::StencilOp::IncrWrapOGL:
196 return 6;
197 case Maxwell::StencilOp::DecrWrap:
198 case Maxwell::StencilOp::DecrWrapOGL:
199 return 7;
191 } 200 }
192 return hash; 201 return 0;
193} 202}
194 203
195bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept { 204Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
196 return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(), 205 static constexpr std::array LUT = {Maxwell::StencilOp::Keep, Maxwell::StencilOp::Zero,
197 rhs.bindings.begin() + rhs.num_bindings) && 206 Maxwell::StencilOp::Replace, Maxwell::StencilOp::Incr,
198 std::equal(attributes.begin(), attributes.begin() + num_attributes, 207 Maxwell::StencilOp::Decr, Maxwell::StencilOp::Invert,
199 rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes); 208 Maxwell::StencilOp::IncrWrap, Maxwell::StencilOp::DecrWrap};
209 return LUT[packed];
200} 210}
201 211
202std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { 212u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
203 std::size_t point_size_int = 0; 213 // FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
204 std::memcpy(&point_size_int, &point_size, sizeof(point_size)); 214 // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
205 return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^ 215 const u32 value = static_cast<u32>(cull);
206 static_cast<std::size_t>(primitive_restart_enable); 216 return value - (value == 0x408 ? 0x406 : 0x404);
207} 217}
208 218
209bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept { 219Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
210 return std::tie(topology, primitive_restart_enable, point_size) == 220 static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
211 std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size); 221 Maxwell::CullFace::FrontAndBack};
222 return LUT[packed];
212} 223}
213 224
214std::size_t FixedPipelineState::Tessellation::Hash() const noexcept { 225u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
215 return static_cast<std::size_t>(patch_control_points) ^ 226 return static_cast<u32>(face) - 0x900;
216 (static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^
217 (static_cast<std::size_t>(clockwise) << 10);
218} 227}
219 228
220bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept { 229Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
221 return std::tie(patch_control_points, primitive, spacing, clockwise) == 230 return static_cast<Maxwell::FrontFace>(packed + 0x900);
222 std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise);
223} 231}
224 232
225std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { 233u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
226 return static_cast<std::size_t>(cull_enable) ^ 234 return static_cast<u32>(mode) - 0x1B00;
227 (static_cast<std::size_t>(depth_bias_enable) << 1) ^
228 (static_cast<std::size_t>(depth_clamp_enable) << 2) ^
229 (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
230 (static_cast<std::size_t>(cull_face) << 24) ^
231 (static_cast<std::size_t>(front_face) << 48);
232} 235}
233 236
234bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { 237Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
235 return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, 238 return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
236 cull_face, front_face) ==
237 std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
238 rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
239} 239}
240 240
241std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { 241u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept {
242 std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^ 242 return static_cast<u32>(op) - 0x1500;
243 (static_cast<std::size_t>(depth_write_enable) << 1) ^
244 (static_cast<std::size_t>(depth_bounds_enable) << 2) ^
245 (static_cast<std::size_t>(stencil_enable) << 3) ^
246 (static_cast<std::size_t>(depth_test_function) << 4);
247 boost::hash_combine(hash, front_stencil.Hash());
248 boost::hash_combine(hash, back_stencil.Hash());
249 return hash;
250} 243}
251 244
252bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept { 245Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
253 return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function, 246 return static_cast<Maxwell::LogicOperation>(packed + 0x1500);
254 stencil_enable, front_stencil, back_stencil) ==
255 std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable,
256 rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil,
257 rhs.back_stencil);
258} 247}
259 248
260std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept { 249u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
261 std::size_t hash = attachments_count << 13; 250 switch (equation) {
262 for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) { 251 case Maxwell::Blend::Equation::Add:
263 boost::hash_combine(hash, attachments[rt].Hash()); 252 case Maxwell::Blend::Equation::AddGL:
253 return 0;
254 case Maxwell::Blend::Equation::Subtract:
255 case Maxwell::Blend::Equation::SubtractGL:
256 return 1;
257 case Maxwell::Blend::Equation::ReverseSubtract:
258 case Maxwell::Blend::Equation::ReverseSubtractGL:
259 return 2;
260 case Maxwell::Blend::Equation::Min:
261 case Maxwell::Blend::Equation::MinGL:
262 return 3;
263 case Maxwell::Blend::Equation::Max:
264 case Maxwell::Blend::Equation::MaxGL:
265 return 4;
264 } 266 }
265 return hash; 267 return 0;
266} 268}
267 269
268bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept { 270Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
269 return std::equal(attachments.begin(), attachments.begin() + attachments_count, 271 static constexpr std::array LUT = {
270 rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count); 272 Maxwell::Blend::Equation::Add, Maxwell::Blend::Equation::Subtract,
271} 273 Maxwell::Blend::Equation::ReverseSubtract, Maxwell::Blend::Equation::Min,
272 274 Maxwell::Blend::Equation::Max};
273std::size_t FixedPipelineState::Hash() const noexcept { 275 return LUT[packed];
274 std::size_t hash = 0;
275 boost::hash_combine(hash, vertex_input.Hash());
276 boost::hash_combine(hash, input_assembly.Hash());
277 boost::hash_combine(hash, tessellation.Hash());
278 boost::hash_combine(hash, rasterizer.Hash());
279 boost::hash_combine(hash, depth_stencil.Hash());
280 boost::hash_combine(hash, color_blending.Hash());
281 return hash;
282} 276}
283 277
284bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { 278u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
285 return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil, 279 switch (factor) {
286 color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly, 280 case Maxwell::Blend::Factor::Zero:
287 rhs.tessellation, rhs.rasterizer, rhs.depth_stencil, 281 case Maxwell::Blend::Factor::ZeroGL:
288 rhs.color_blending); 282 return 0;
283 case Maxwell::Blend::Factor::One:
284 case Maxwell::Blend::Factor::OneGL:
285 return 1;
286 case Maxwell::Blend::Factor::SourceColor:
287 case Maxwell::Blend::Factor::SourceColorGL:
288 return 2;
289 case Maxwell::Blend::Factor::OneMinusSourceColor:
290 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
291 return 3;
292 case Maxwell::Blend::Factor::SourceAlpha:
293 case Maxwell::Blend::Factor::SourceAlphaGL:
294 return 4;
295 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
296 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
297 return 5;
298 case Maxwell::Blend::Factor::DestAlpha:
299 case Maxwell::Blend::Factor::DestAlphaGL:
300 return 6;
301 case Maxwell::Blend::Factor::OneMinusDestAlpha:
302 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
303 return 7;
304 case Maxwell::Blend::Factor::DestColor:
305 case Maxwell::Blend::Factor::DestColorGL:
306 return 8;
307 case Maxwell::Blend::Factor::OneMinusDestColor:
308 case Maxwell::Blend::Factor::OneMinusDestColorGL:
309 return 9;
310 case Maxwell::Blend::Factor::SourceAlphaSaturate:
311 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
312 return 10;
313 case Maxwell::Blend::Factor::Source1Color:
314 case Maxwell::Blend::Factor::Source1ColorGL:
315 return 11;
316 case Maxwell::Blend::Factor::OneMinusSource1Color:
317 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
318 return 12;
319 case Maxwell::Blend::Factor::Source1Alpha:
320 case Maxwell::Blend::Factor::Source1AlphaGL:
321 return 13;
322 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
323 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
324 return 14;
325 case Maxwell::Blend::Factor::ConstantColor:
326 case Maxwell::Blend::Factor::ConstantColorGL:
327 return 15;
328 case Maxwell::Blend::Factor::OneMinusConstantColor:
329 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
330 return 16;
331 case Maxwell::Blend::Factor::ConstantAlpha:
332 case Maxwell::Blend::Factor::ConstantAlphaGL:
333 return 17;
334 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
335 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
336 return 18;
337 }
338 return 0;
289} 339}
290 340
291FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { 341Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
292 FixedPipelineState fixed_state; 342 static constexpr std::array LUT = {
293 fixed_state.input_assembly = GetInputAssemblyState(regs); 343 Maxwell::Blend::Factor::Zero,
294 fixed_state.tessellation = GetTessellationState(regs); 344 Maxwell::Blend::Factor::One,
295 fixed_state.rasterizer = GetRasterizerState(regs); 345 Maxwell::Blend::Factor::SourceColor,
296 fixed_state.depth_stencil = GetDepthStencilState(regs); 346 Maxwell::Blend::Factor::OneMinusSourceColor,
297 fixed_state.color_blending = GetColorBlendingState(regs); 347 Maxwell::Blend::Factor::SourceAlpha,
298 return fixed_state; 348 Maxwell::Blend::Factor::OneMinusSourceAlpha,
349 Maxwell::Blend::Factor::DestAlpha,
350 Maxwell::Blend::Factor::OneMinusDestAlpha,
351 Maxwell::Blend::Factor::DestColor,
352 Maxwell::Blend::Factor::OneMinusDestColor,
353 Maxwell::Blend::Factor::SourceAlphaSaturate,
354 Maxwell::Blend::Factor::Source1Color,
355 Maxwell::Blend::Factor::OneMinusSource1Color,
356 Maxwell::Blend::Factor::Source1Alpha,
357 Maxwell::Blend::Factor::OneMinusSource1Alpha,
358 Maxwell::Blend::Factor::ConstantColor,
359 Maxwell::Blend::Factor::OneMinusConstantColor,
360 Maxwell::Blend::Factor::ConstantAlpha,
361 Maxwell::Blend::Factor::OneMinusConstantAlpha,
362 };
363 return LUT[packed];
299} 364}
300 365
301} // namespace Vulkan 366} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 4c8ba7f90..9fe6bdbf9 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <type_traits> 8#include <type_traits>
9 9
10#include "common/bit_field.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
11 12
12#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
@@ -16,93 +17,48 @@ namespace Vulkan {
16 17
17using Maxwell = Tegra::Engines::Maxwell3D::Regs; 18using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18 19
19// TODO(Rodrigo): Optimize this structure. 20struct alignas(32) FixedPipelineState {
21 static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
22 static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
20 23
21struct FixedPipelineState { 24 static u32 PackStencilOp(Maxwell::StencilOp op) noexcept;
22 using PixelFormat = VideoCore::Surface::PixelFormat; 25 static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept;
23 26
24 struct VertexBinding { 27 static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
25 constexpr VertexBinding(u32 index, u32 stride, u32 divisor) 28 static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
26 : index{index}, stride{stride}, divisor{divisor} {}
27 VertexBinding() = default;
28 29
29 u32 index; 30 static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
30 u32 stride; 31 static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
31 u32 divisor;
32 32
33 std::size_t Hash() const noexcept; 33 static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
34 34 static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
35 bool operator==(const VertexBinding& rhs) const noexcept;
36
37 bool operator!=(const VertexBinding& rhs) const noexcept {
38 return !operator==(rhs);
39 }
40 };
41
42 struct VertexAttribute {
43 constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type,
44 Maxwell::VertexAttribute::Size size, u32 offset)
45 : index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {}
46 VertexAttribute() = default;
47
48 u32 index;
49 u32 buffer;
50 Maxwell::VertexAttribute::Type type;
51 Maxwell::VertexAttribute::Size size;
52 u32 offset;
53
54 std::size_t Hash() const noexcept;
55
56 bool operator==(const VertexAttribute& rhs) const noexcept;
57
58 bool operator!=(const VertexAttribute& rhs) const noexcept {
59 return !operator==(rhs);
60 }
61 };
62
63 struct StencilFace {
64 constexpr StencilFace(Maxwell::StencilOp action_stencil_fail,
65 Maxwell::StencilOp action_depth_fail,
66 Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func)
67 : action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail},
68 action_depth_pass{action_depth_pass}, test_func{test_func} {}
69 StencilFace() = default;
70
71 Maxwell::StencilOp action_stencil_fail;
72 Maxwell::StencilOp action_depth_fail;
73 Maxwell::StencilOp action_depth_pass;
74 Maxwell::ComparisonOp test_func;
75 35
76 std::size_t Hash() const noexcept; 36 static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept;
37 static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept;
77 38
78 bool operator==(const StencilFace& rhs) const noexcept; 39 static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
40 static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
79 41
80 bool operator!=(const StencilFace& rhs) const noexcept { 42 static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
81 return !operator==(rhs); 43 static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
82 }
83 };
84 44
85 struct BlendingAttachment { 45 struct BlendingAttachment {
86 constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, 46 union {
87 Maxwell::Blend::Factor src_rgb_func, 47 u32 raw;
88 Maxwell::Blend::Factor dst_rgb_func, 48 BitField<0, 1, u32> mask_r;
89 Maxwell::Blend::Equation a_equation, 49 BitField<1, 1, u32> mask_g;
90 Maxwell::Blend::Factor src_a_func, 50 BitField<2, 1, u32> mask_b;
91 Maxwell::Blend::Factor dst_a_func, 51 BitField<3, 1, u32> mask_a;
92 std::array<bool, 4> components) 52 BitField<4, 3, u32> equation_rgb;
93 : enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func}, 53 BitField<7, 3, u32> equation_a;
94 dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func}, 54 BitField<10, 5, u32> factor_source_rgb;
95 dst_a_func{dst_a_func}, components{components} {} 55 BitField<15, 5, u32> factor_dest_rgb;
96 BlendingAttachment() = default; 56 BitField<20, 5, u32> factor_source_a;
97 57 BitField<25, 5, u32> factor_dest_a;
98 bool enable; 58 BitField<30, 1, u32> enable;
99 Maxwell::Blend::Equation rgb_equation; 59 };
100 Maxwell::Blend::Factor src_rgb_func; 60
101 Maxwell::Blend::Factor dst_rgb_func; 61 void Fill(const Maxwell& regs, std::size_t index);
102 Maxwell::Blend::Equation a_equation;
103 Maxwell::Blend::Factor src_a_func;
104 Maxwell::Blend::Factor dst_a_func;
105 std::array<bool, 4> components;
106 62
107 std::size_t Hash() const noexcept; 63 std::size_t Hash() const noexcept;
108 64
@@ -111,136 +67,178 @@ struct FixedPipelineState {
111 bool operator!=(const BlendingAttachment& rhs) const noexcept { 67 bool operator!=(const BlendingAttachment& rhs) const noexcept {
112 return !operator==(rhs); 68 return !operator==(rhs);
113 } 69 }
114 };
115
116 struct VertexInput {
117 std::size_t num_bindings = 0;
118 std::size_t num_attributes = 0;
119 std::array<VertexBinding, Maxwell::NumVertexArrays> bindings;
120 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
121
122 std::size_t Hash() const noexcept;
123 70
124 bool operator==(const VertexInput& rhs) const noexcept; 71 constexpr std::array<bool, 4> Mask() const noexcept {
72 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
73 }
125 74
126 bool operator!=(const VertexInput& rhs) const noexcept { 75 Maxwell::Blend::Equation EquationRGB() const noexcept {
127 return !operator==(rhs); 76 return UnpackBlendEquation(equation_rgb.Value());
128 } 77 }
129 };
130 78
131 struct InputAssembly { 79 Maxwell::Blend::Equation EquationAlpha() const noexcept {
132 constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, 80 return UnpackBlendEquation(equation_a.Value());
133 float point_size) 81 }
134 : topology{topology}, primitive_restart_enable{primitive_restart_enable},
135 point_size{point_size} {}
136 InputAssembly() = default;
137 82
138 Maxwell::PrimitiveTopology topology; 83 Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
139 bool primitive_restart_enable; 84 return UnpackBlendFactor(factor_source_rgb.Value());
140 float point_size; 85 }
141 86
142 std::size_t Hash() const noexcept; 87 Maxwell::Blend::Factor DestRGBFactor() const noexcept {
88 return UnpackBlendFactor(factor_dest_rgb.Value());
89 }
143 90
144 bool operator==(const InputAssembly& rhs) const noexcept; 91 Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
92 return UnpackBlendFactor(factor_source_a.Value());
93 }
145 94
146 bool operator!=(const InputAssembly& rhs) const noexcept { 95 Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
147 return !operator==(rhs); 96 return UnpackBlendFactor(factor_dest_a.Value());
148 } 97 }
149 }; 98 };
150 99
151 struct Tessellation { 100 struct VertexInput {
152 constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive, 101 union Binding {
153 Maxwell::TessellationSpacing spacing, bool clockwise) 102 u16 raw;
154 : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing}, 103 BitField<0, 1, u16> enabled;
155 clockwise{clockwise} {} 104 BitField<1, 12, u16> stride;
156 Tessellation() = default; 105 };
157 106
158 u32 patch_control_points; 107 union Attribute {
159 Maxwell::TessellationPrimitive primitive; 108 u32 raw;
160 Maxwell::TessellationSpacing spacing; 109 BitField<0, 1, u32> enabled;
161 bool clockwise; 110 BitField<1, 5, u32> buffer;
162 111 BitField<6, 14, u32> offset;
163 std::size_t Hash() const noexcept; 112 BitField<20, 3, u32> type;
164 113 BitField<23, 6, u32> size;
165 bool operator==(const Tessellation& rhs) const noexcept; 114
115 constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
116 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
117 }
118
119 constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
120 return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
121 }
122 };
123
124 std::array<Binding, Maxwell::NumVertexArrays> bindings;
125 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
126 std::array<Attribute, Maxwell::NumVertexAttributes> attributes;
127
128 void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept {
129 auto& binding = bindings[index];
130 binding.raw = 0;
131 binding.enabled.Assign(enabled ? 1 : 0);
132 binding.stride.Assign(stride);
133 binding_divisors[index] = divisor;
134 }
166 135
167 bool operator!=(const Tessellation& rhs) const noexcept { 136 void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset,
168 return !operator==(rhs); 137 Maxwell::VertexAttribute::Type type,
138 Maxwell::VertexAttribute::Size size) noexcept {
139 auto& attribute = attributes[index];
140 attribute.raw = 0;
141 attribute.enabled.Assign(enabled ? 1 : 0);
142 attribute.buffer.Assign(buffer);
143 attribute.offset.Assign(offset);
144 attribute.type.Assign(static_cast<u32>(type));
145 attribute.size.Assign(static_cast<u32>(size));
169 } 146 }
170 }; 147 };
171 148
172 struct Rasterizer { 149 struct Rasterizer {
173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, 150 union {
174 bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, 151 u32 raw;
175 Maxwell::FrontFace front_face) 152 BitField<0, 4, u32> topology;
176 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, 153 BitField<4, 1, u32> primitive_restart_enable;
177 depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, 154 BitField<5, 1, u32> cull_enable;
178 cull_face{cull_face}, front_face{front_face} {} 155 BitField<6, 1, u32> depth_bias_enable;
179 Rasterizer() = default; 156 BitField<7, 1, u32> depth_clamp_enable;
180 157 BitField<8, 1, u32> ndc_minus_one_to_one;
181 bool cull_enable; 158 BitField<9, 2, u32> cull_face;
182 bool depth_bias_enable; 159 BitField<11, 1, u32> front_face;
183 bool depth_clamp_enable; 160 BitField<12, 2, u32> polygon_mode;
184 bool ndc_minus_one_to_one; 161 BitField<14, 5, u32> patch_control_points_minus_one;
185 Maxwell::CullFace cull_face; 162 BitField<19, 2, u32> tessellation_primitive;
186 Maxwell::FrontFace front_face; 163 BitField<21, 2, u32> tessellation_spacing;
187 164 BitField<23, 1, u32> tessellation_clockwise;
188 std::size_t Hash() const noexcept; 165 BitField<24, 1, u32> logic_op_enable;
166 BitField<25, 4, u32> logic_op;
167 };
168
169 // TODO(Rodrigo): Move this to push constants
170 u32 point_size;
171
172 void Fill(const Maxwell& regs) noexcept;
173
174 constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
175 return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
176 }
189 177
190 bool operator==(const Rasterizer& rhs) const noexcept; 178 Maxwell::CullFace CullFace() const noexcept {
179 return UnpackCullFace(cull_face.Value());
180 }
191 181
192 bool operator!=(const Rasterizer& rhs) const noexcept { 182 Maxwell::FrontFace FrontFace() const noexcept {
193 return !operator==(rhs); 183 return UnpackFrontFace(front_face.Value());
194 } 184 }
195 }; 185 };
196 186
197 struct DepthStencil { 187 struct DepthStencil {
198 constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable, 188 template <std::size_t Position>
199 bool depth_bounds_enable, bool stencil_enable, 189 union StencilFace {
200 Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil, 190 BitField<Position + 0, 3, u32> action_stencil_fail;
201 StencilFace back_stencil) 191 BitField<Position + 3, 3, u32> action_depth_fail;
202 : depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable}, 192 BitField<Position + 6, 3, u32> action_depth_pass;
203 depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable}, 193 BitField<Position + 9, 3, u32> test_func;
204 depth_test_function{depth_test_function}, front_stencil{front_stencil}, 194
205 back_stencil{back_stencil} {} 195 Maxwell::StencilOp ActionStencilFail() const noexcept {
206 DepthStencil() = default; 196 return UnpackStencilOp(action_stencil_fail);
207 197 }
208 bool depth_test_enable; 198
209 bool depth_write_enable; 199 Maxwell::StencilOp ActionDepthFail() const noexcept {
210 bool depth_bounds_enable; 200 return UnpackStencilOp(action_depth_fail);
211 bool stencil_enable; 201 }
212 Maxwell::ComparisonOp depth_test_function; 202
213 StencilFace front_stencil; 203 Maxwell::StencilOp ActionDepthPass() const noexcept {
214 StencilFace back_stencil; 204 return UnpackStencilOp(action_depth_pass);
215 205 }
216 std::size_t Hash() const noexcept; 206
217 207 Maxwell::ComparisonOp TestFunc() const noexcept {
218 bool operator==(const DepthStencil& rhs) const noexcept; 208 return UnpackComparisonOp(test_func);
219 209 }
220 bool operator!=(const DepthStencil& rhs) const noexcept { 210 };
221 return !operator==(rhs); 211
212 union {
213 u32 raw;
214 StencilFace<0> front;
215 StencilFace<12> back;
216 BitField<24, 1, u32> depth_test_enable;
217 BitField<25, 1, u32> depth_write_enable;
218 BitField<26, 1, u32> depth_bounds_enable;
219 BitField<27, 1, u32> stencil_enable;
220 BitField<28, 3, u32> depth_test_func;
221 };
222
223 void Fill(const Maxwell& regs) noexcept;
224
225 Maxwell::ComparisonOp DepthTestFunc() const noexcept {
226 return UnpackComparisonOp(depth_test_func);
222 } 227 }
223 }; 228 };
224 229
225 struct ColorBlending { 230 struct ColorBlending {
226 constexpr ColorBlending(
227 std::array<float, 4> blend_constants, std::size_t attachments_count,
228 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments)
229 : attachments_count{attachments_count}, attachments{attachments} {}
230 ColorBlending() = default;
231
232 std::size_t attachments_count;
233 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; 231 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
234 232
235 std::size_t Hash() const noexcept; 233 void Fill(const Maxwell& regs) noexcept;
236
237 bool operator==(const ColorBlending& rhs) const noexcept;
238
239 bool operator!=(const ColorBlending& rhs) const noexcept {
240 return !operator==(rhs);
241 }
242 }; 234 };
243 235
236 VertexInput vertex_input;
237 Rasterizer rasterizer;
238 DepthStencil depth_stencil;
239 ColorBlending color_blending;
240 std::array<u8, 20> padding;
241
244 std::size_t Hash() const noexcept; 242 std::size_t Hash() const noexcept;
245 243
246 bool operator==(const FixedPipelineState& rhs) const noexcept; 244 bool operator==(const FixedPipelineState& rhs) const noexcept;
@@ -248,25 +246,11 @@ struct FixedPipelineState {
248 bool operator!=(const FixedPipelineState& rhs) const noexcept { 246 bool operator!=(const FixedPipelineState& rhs) const noexcept {
249 return !operator==(rhs); 247 return !operator==(rhs);
250 } 248 }
251
252 VertexInput vertex_input;
253 InputAssembly input_assembly;
254 Tessellation tessellation;
255 Rasterizer rasterizer;
256 DepthStencil depth_stencil;
257 ColorBlending color_blending;
258}; 249};
259static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>); 250static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
260static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>);
261static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>);
262static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>);
263static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>);
264static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>);
265static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>);
266static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>);
267static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>);
268static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>);
269static_assert(std::is_trivially_copyable_v<FixedPipelineState>); 251static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
252static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
253static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
270 254
271FixedPipelineState GetFixedPipelineState(const Maxwell& regs); 255FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
272 256
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
new file mode 100644
index 000000000..435c8c1b8
--- /dev/null
+++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
@@ -0,0 +1,220 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef HAS_NSIGHT_AFTERMATH
6
7#include <mutex>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <vector>
12
13#include <fmt/format.h>
14
15#define VK_NO_PROTOTYPES
16#include <vulkan/vulkan.h>
17
18#include <GFSDK_Aftermath.h>
19#include <GFSDK_Aftermath_Defines.h>
20#include <GFSDK_Aftermath_GpuCrashDump.h>
21#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
22
23#include "common/common_paths.h"
24#include "common/common_types.h"
25#include "common/file_util.h"
26#include "common/logging/log.h"
27#include "common/scope_exit.h"
28
29#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
30
31namespace Vulkan {
32
33static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
34
35NsightAftermathTracker::NsightAftermathTracker() = default;
36
37NsightAftermathTracker::~NsightAftermathTracker() {
38 if (initialized) {
39 (void)GFSDK_Aftermath_DisableGpuCrashDumps();
40 }
41}
42
43bool NsightAftermathTracker::Initialize() {
44 if (!dl.Open(AFTERMATH_LIB_NAME)) {
45 LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
46 return false;
47 }
48
49 if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
50 &GFSDK_Aftermath_DisableGpuCrashDumps) ||
51 !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
52 &GFSDK_Aftermath_EnableGpuCrashDumps) ||
53 !dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier",
54 &GFSDK_Aftermath_GetShaderDebugInfoIdentifier) ||
55 !dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) ||
56 !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder",
57 &GFSDK_Aftermath_GpuCrashDump_CreateDecoder) ||
58 !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder",
59 &GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) ||
60 !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON",
61 &GFSDK_Aftermath_GpuCrashDump_GenerateJSON) ||
62 !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
63 &GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
64 LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
65 return false;
66 }
67
68 dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
69
70 (void)FileUtil::DeleteDirRecursively(dump_dir);
71 if (!FileUtil::CreateDir(dump_dir)) {
72 LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
73 return false;
74 }
75
76 if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
77 GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
78 GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
79 ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
80 LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
81 return false;
82 }
83
84 LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
85
86 initialized = true;
87 return true;
88}
89
90void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
91 if (!initialized) {
92 return;
93 }
94
95 std::vector<u32> spirv_copy = spirv;
96 GFSDK_Aftermath_SpirvCode shader;
97 shader.pData = spirv_copy.data();
98 shader.size = static_cast<u32>(spirv_copy.size() * 4);
99
100 std::scoped_lock lock{mutex};
101
102 GFSDK_Aftermath_ShaderHash hash;
103 if (!GFSDK_Aftermath_SUCCEED(
104 GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
105 LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
106 return;
107 }
108
109 FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
110 if (!file.IsOpen()) {
111 LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
112 return;
113 }
114 if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) {
115 LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
116 return;
117 }
118}
119
120void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
121 u32 gpu_crash_dump_size) {
122 std::scoped_lock lock{mutex};
123
124 LOG_CRITICAL(Render_Vulkan, "called");
125
126 GFSDK_Aftermath_GpuCrashDump_Decoder decoder;
127 if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(
128 GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) {
129 LOG_ERROR(Render_Vulkan, "Failed to create decoder");
130 return;
131 }
132 SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); });
133
134 u32 json_size = 0;
135 if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
136 decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
137 GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
138 this, &json_size))) {
139 LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
140 return;
141 }
142 std::vector<char> json(json_size);
143 if (!GFSDK_Aftermath_SUCCEED(
144 GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) {
145 LOG_ERROR(Render_Vulkan, "Failed to query JSON");
146 return;
147 }
148
149 const std::string base_name = [this] {
150 const int id = dump_id++;
151 if (id == 0) {
152 return fmt::format("{}/crash.nv-gpudmp", dump_dir);
153 } else {
154 return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id);
155 }
156 }();
157
158 std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
159 if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
160 LOG_ERROR(Render_Vulkan, "Failed to write dump file");
161 return;
162 }
163 const std::string_view json_view(json.data(), json.size());
164 if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
165 LOG_ERROR(Render_Vulkan, "Failed to write JSON");
166 return;
167 }
168}
169
170void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info,
171 u32 shader_debug_info_size) {
172 std::scoped_lock lock{mutex};
173
174 GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier;
175 if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(
176 GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) {
177 LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed");
178 return;
179 }
180
181 const std::string path =
182 fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
183 FileUtil::IOFile file(path, "wb");
184 if (!file.IsOpen()) {
185 LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
186 return;
187 }
188 if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) !=
189 shader_debug_info_size) {
190 LOG_ERROR(Render_Vulkan, "Failed to write file {}", path);
191 return;
192 }
193}
194
195void NsightAftermathTracker::OnCrashDumpDescriptionCallback(
196 PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) {
197 add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu");
198}
199
200void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump,
201 u32 gpu_crash_dump_size, void* user_data) {
202 static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump,
203 gpu_crash_dump_size);
204}
205
206void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info,
207 u32 shader_debug_info_size, void* user_data) {
208 static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback(
209 shader_debug_info, shader_debug_info_size);
210}
211
212void NsightAftermathTracker::CrashDumpDescriptionCallback(
213 PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) {
214 static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback(
215 add_description);
216}
217
218} // namespace Vulkan
219
220#endif // HAS_NSIGHT_AFTERMATH
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h
new file mode 100644
index 000000000..afe7ae99e
--- /dev/null
+++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h
@@ -0,0 +1,87 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <string>
9#include <vector>
10
11#define VK_NO_PROTOTYPES
12#include <vulkan/vulkan.h>
13
14#ifdef HAS_NSIGHT_AFTERMATH
15#include <GFSDK_Aftermath_Defines.h>
16#include <GFSDK_Aftermath_GpuCrashDump.h>
17#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
18#endif
19
20#include "common/common_types.h"
21#include "common/dynamic_library.h"
22
23namespace Vulkan {
24
25class NsightAftermathTracker {
26public:
27 NsightAftermathTracker();
28 ~NsightAftermathTracker();
29
30 NsightAftermathTracker(const NsightAftermathTracker&) = delete;
31 NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete;
32
33 // Delete move semantics because Aftermath initialization uses a pointer to this.
34 NsightAftermathTracker(NsightAftermathTracker&&) = delete;
35 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
36
37 bool Initialize();
38
39 void SaveShader(const std::vector<u32>& spirv) const;
40
41private:
42#ifdef HAS_NSIGHT_AFTERMATH
43 static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size,
44 void* user_data);
45
46 static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size,
47 void* user_data);
48
49 static void CrashDumpDescriptionCallback(
50 PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data);
51
52 void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size);
53
54 void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size);
55
56 void OnCrashDumpDescriptionCallback(
57 PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description);
58
59 mutable std::mutex mutex;
60
61 std::string dump_dir;
62 int dump_id = 0;
63
64 bool initialized = false;
65
66 Common::DynamicLibrary dl;
67 PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
68 PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
69 PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
70 PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
71 PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
72 PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
73 PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
74 PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
75#endif
76};
77
78#ifndef HAS_NSIGHT_AFTERMATH
79inline NsightAftermathTracker::NsightAftermathTracker() = default;
80inline NsightAftermathTracker::~NsightAftermathTracker() = default;
81inline bool NsightAftermathTracker::Initialize() {
82 return false;
83}
84inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
85#endif
86
87} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
new file mode 100644
index 000000000..5a472ba9b
--- /dev/null
+++ b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V quad_indexed.comp -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15
16layout (local_size_x = 1024) in;
17
18layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
19 uint input_indexes[];
20};
21
22layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
23 uint output_indexes[];
24};
25
26layout (push_constant) uniform PushConstants {
27 uint base_vertex;
28 int index_shift; // 0: uint8, 1: uint16, 2: uint32
29};
30
31void main() {
32 int primitive = int(gl_GlobalInvocationID.x);
33 if (primitive * 6 >= output_indexes.length()) {
34 return;
35 }
36
37 int index_size = 8 << index_shift;
38 int flipped_shift = 2 - index_shift;
39 int mask = (1 << flipped_shift) - 1;
40
41 const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
42 for (uint vertex = 0; vertex < 6; ++vertex) {
43 int offset = primitive * 4 + quad_swizzle[vertex];
44 int int_offset = offset >> flipped_shift;
45 int bit_offset = (offset & mask) * index_size;
46 uint packed_input = input_indexes[int_offset];
47 uint index = bitfieldExtract(packed_input, bit_offset, index_size);
48 output_indexes[primitive * 6 + vertex] = index + base_vertex;
49 }
50}
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 878a78755..7b0268033 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt
135 return entry; 135 return entry;
136} 136}
137 137
138VkPushConstantRange BuildQuadArrayPassPushConstantRange() { 138VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
139 VkPushConstantRange range; 139 VkPushConstantRange range;
140 range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; 140 range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
141 range.offset = 0; 141 range.offset = 0;
142 range.size = sizeof(u32); 142 range.size = static_cast<u32>(size);
143 return range; 143 return range;
144} 144}
145 145
@@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = {
220 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 220 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
221 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; 221 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
222 222
223std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { 223// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
224constexpr u8 QUAD_INDEXED_SPV[] = {
225 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
226 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
227 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
228 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
229 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
230 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
231 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
232 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
233 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
234 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
235 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
236 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
237 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
238 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
239 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
240 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
241 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
242 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
243 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
244 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
245 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
246 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
247 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
248 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
249 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
250 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
251 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
252 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
253 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
254 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
255 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
256 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
257 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
258 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
259 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
260 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
261 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
262 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
263 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
264 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
265 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
266 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
267 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
268 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
269 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
270 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
271 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
272 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
273 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
274 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
275 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
276 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
277 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
278 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
279 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
280 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
281 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
282 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
283 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
284 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
285 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
286 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
287 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
288 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
289 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
290 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
291 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
292 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
293 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
294 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
295 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
296 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
297 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
298 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
299 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
300 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
301 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
302 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
303 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
304 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
305 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
306 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
307 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
308 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
309 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
310 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
311 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
312 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
313 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
314 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
315 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
316 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
317 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
318 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
319 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
320 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
321 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
322 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
323 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
324 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
325 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
326 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
327 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
328 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
329 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
330 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
331 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
332 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
333 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
334 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
335 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
336 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
337 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
338 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
339 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
340 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
341 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
342 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
343 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
344 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
345
346std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
224 std::array<VkDescriptorSetLayoutBinding, 2> bindings; 347 std::array<VkDescriptorSetLayoutBinding, 2> bindings;
225 bindings[0].binding = 0; 348 bindings[0].binding = 0;
226 bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 349 bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings(
235 return bindings; 358 return bindings;
236} 359}
237 360
238VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { 361VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
239 VkDescriptorUpdateTemplateEntryKHR entry; 362 VkDescriptorUpdateTemplateEntryKHR entry;
240 entry.dstBinding = 0; 363 entry.dstBinding = 0;
241 entry.dstArrayElement = 0; 364 entry.dstArrayElement = 0;
@@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
337 VKUpdateDescriptorQueue& update_descriptor_queue) 460 VKUpdateDescriptorQueue& update_descriptor_queue)
338 : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), 461 : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
339 BuildQuadArrayPassDescriptorUpdateTemplateEntry(), 462 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
340 BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), 463 BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
341 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 464 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
342 update_descriptor_queue{update_descriptor_queue} {} 465 update_descriptor_queue{update_descriptor_queue} {}
343 466
344QuadArrayPass::~QuadArrayPass() = default; 467QuadArrayPass::~QuadArrayPass() = default;
345 468
346std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { 469std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
347 const u32 num_triangle_vertices = num_vertices * 6 / 4; 470 const u32 num_triangle_vertices = (num_vertices / 4) * 6;
348 const std::size_t staging_size = num_triangle_vertices * sizeof(u32); 471 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
349 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); 472 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
350 473
@@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
383Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, 506Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
384 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, 507 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
385 VKUpdateDescriptorQueue& update_descriptor_queue) 508 VKUpdateDescriptorQueue& update_descriptor_queue)
386 : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), 509 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
387 BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), 510 BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
388 uint8_pass), 511 uint8_pass),
389 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 512 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
390 update_descriptor_queue{update_descriptor_queue} {} 513 update_descriptor_queue{update_descriptor_queue} {}
@@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
425 return {*buffer.handle, 0}; 548 return {*buffer.handle, 0};
426} 549}
427 550
551QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
552 VKDescriptorPool& descriptor_pool,
553 VKStagingBufferPool& staging_buffer_pool,
554 VKUpdateDescriptorQueue& update_descriptor_queue)
555 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
556 BuildInputOutputDescriptorUpdateTemplate(),
557 BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
558 QUAD_INDEXED_SPV),
559 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
560 update_descriptor_queue{update_descriptor_queue} {}
561
562QuadIndexedPass::~QuadIndexedPass() = default;
563
564std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
565 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
566 VkBuffer src_buffer, u64 src_offset) {
567 const u32 index_shift = [index_format] {
568 switch (index_format) {
569 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
570 return 0;
571 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
572 return 1;
573 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
574 return 2;
575 }
576 UNREACHABLE();
577 return 2;
578 }();
579 const u32 input_size = num_vertices << index_shift;
580 const u32 num_tri_vertices = (num_vertices / 4) * 6;
581
582 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
583 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
584
585 update_descriptor_queue.Acquire();
586 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
587 update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
588 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
589
590 scheduler.RequestOutsideRenderPassOperationContext();
591 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
592 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
593 static constexpr u32 dispatch_size = 1024;
594 const std::array push_constants = {base_vertex, index_shift};
595 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
596 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
597 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
598 &push_constants);
599 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
600
601 VkBufferMemoryBarrier barrier;
602 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
603 barrier.pNext = nullptr;
604 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
605 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
606 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
607 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
608 barrier.buffer = buffer;
609 barrier.offset = 0;
610 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
611 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
612 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
613 });
614 return {*buffer.handle, 0};
615}
616
428} // namespace Vulkan 617} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index ec80c8683..26bf834de 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -8,6 +8,7 @@
8#include <utility> 8#include <utility>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 12#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
12#include "video_core/renderer_vulkan/wrapper.h" 13#include "video_core/renderer_vulkan/wrapper.h"
13 14
@@ -73,4 +74,22 @@ private:
73 VKUpdateDescriptorQueue& update_descriptor_queue; 74 VKUpdateDescriptorQueue& update_descriptor_queue;
74}; 75};
75 76
77class QuadIndexedPass final : public VKComputePass {
78public:
79 explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
80 VKDescriptorPool& descriptor_pool,
81 VKStagingBufferPool& staging_buffer_pool,
82 VKUpdateDescriptorQueue& update_descriptor_queue);
83 ~QuadIndexedPass();
84
85 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
86 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
87 u64 src_offset);
88
89private:
90 VKScheduler& scheduler;
91 VKStagingBufferPool& staging_buffer_pool;
92 VKUpdateDescriptorQueue& update_descriptor_queue;
93};
94
76} // namespace Vulkan 95} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 23beafa4f..52566bb79 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -105,6 +105,8 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
105} 105}
106 106
107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { 107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
108 device.SaveShader(code);
109
108 VkShaderModuleCreateInfo ci; 110 VkShaderModuleCreateInfo ci;
109 ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 111 ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
110 ci.pNext = nullptr; 112 ci.pNext = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 52d29e49d..e90c76492 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -9,6 +9,7 @@
9#include <string_view> 9#include <string_view>
10#include <thread> 10#include <thread>
11#include <unordered_set> 11#include <unordered_set>
12#include <utility>
12#include <vector> 13#include <vector>
13 14
14#include "common/assert.h" 15#include "common/assert.h"
@@ -167,6 +168,7 @@ bool VKDevice::Create() {
167 VkPhysicalDeviceFeatures2 features2; 168 VkPhysicalDeviceFeatures2 features2;
168 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 169 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
169 features2.pNext = nullptr; 170 features2.pNext = nullptr;
171 const void* first_next = &features2;
170 void** next = &features2.pNext; 172 void** next = &features2.pNext;
171 173
172 auto& features = features2.features; 174 auto& features = features2.features;
@@ -296,7 +298,19 @@ bool VKDevice::Create() {
296 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 298 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
297 } 299 }
298 300
299 logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld); 301 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
302 if (nv_device_diagnostics_config) {
303 nsight_aftermath_tracker.Initialize();
304
305 diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV;
306 diagnostics_nv.pNext = &features2;
307 diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
308 VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
309 VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV;
310 first_next = &diagnostics_nv;
311 }
312
313 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
300 if (!logical) { 314 if (!logical) {
301 LOG_ERROR(Render_Vulkan, "Failed to create logical device"); 315 LOG_ERROR(Render_Vulkan, "Failed to create logical device");
302 return false; 316 return false;
@@ -344,17 +358,12 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
344void VKDevice::ReportLoss() const { 358void VKDevice::ReportLoss() const {
345 LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); 359 LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
346 360
347 // Wait some time to let the log flush 361 // Wait for the log to flush and for Nsight Aftermath to dump the results
348 std::this_thread::sleep_for(std::chrono::seconds{1}); 362 std::this_thread::sleep_for(std::chrono::seconds{3});
349 363}
350 if (!nv_device_diagnostic_checkpoints) {
351 return;
352 }
353 364
354 [[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld); 365void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
355 // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be 366 nsight_aftermath_tracker.SaveShader(spirv);
356 // executed. It can be done on a debugger by evaluating the expression:
357 // *(VKGraphicsPipeline*)data[0]
358} 367}
359 368
360bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { 369bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
@@ -527,8 +536,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
527 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, 536 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
528 false); 537 false);
529 if (Settings::values.renderer_debug) { 538 if (Settings::values.renderer_debug) {
530 Test(extension, nv_device_diagnostic_checkpoints, 539 Test(extension, nv_device_diagnostics_config,
531 VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); 540 VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
532 } 541 }
533 } 542 }
534 543
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 60d64572a..a4d841e26 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -10,6 +10,7 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
13#include "video_core/renderer_vulkan/wrapper.h" 14#include "video_core/renderer_vulkan/wrapper.h"
14 15
15namespace Vulkan { 16namespace Vulkan {
@@ -43,6 +44,9 @@ public:
43 /// Reports a device loss. 44 /// Reports a device loss.
44 void ReportLoss() const; 45 void ReportLoss() const;
45 46
47 /// Reports a shader to Nsight Aftermath.
48 void SaveShader(const std::vector<u32>& spirv) const;
49
46 /// Returns the dispatch loader with direct function pointers of the device. 50 /// Returns the dispatch loader with direct function pointers of the device.
47 const vk::DeviceDispatch& GetDispatchLoader() const { 51 const vk::DeviceDispatch& GetDispatchLoader() const {
48 return dld; 52 return dld;
@@ -173,11 +177,6 @@ public:
173 return ext_transform_feedback; 177 return ext_transform_feedback;
174 } 178 }
175 179
176 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
177 bool IsNvDeviceDiagnosticCheckpoints() const {
178 return nv_device_diagnostic_checkpoints;
179 }
180
181 /// Returns the vendor name reported from Vulkan. 180 /// Returns the vendor name reported from Vulkan.
182 std::string_view GetVendorName() const { 181 std::string_view GetVendorName() const {
183 return vendor_name; 182 return vendor_name;
@@ -233,7 +232,7 @@ private:
233 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 232 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
234 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 233 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
235 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 234 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
236 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. 235 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
237 236
238 // Telemetry parameters 237 // Telemetry parameters
239 std::string vendor_name; ///< Device's driver name. 238 std::string vendor_name; ///< Device's driver name.
@@ -241,6 +240,9 @@ private:
241 240
242 /// Format properties dictionary. 241 /// Format properties dictionary.
243 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 242 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
243
244 /// Nsight Aftermath GPU crash tracker
245 NsightAftermathTracker nsight_aftermath_tracker;
244}; 246};
245 247
246} // namespace Vulkan 248} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
new file mode 100644
index 000000000..a02be5487
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -0,0 +1,101 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <thread>
7
8#include "video_core/renderer_vulkan/vk_buffer_cache.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_fence_manager.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/vk_texture_cache.h"
13#include "video_core/renderer_vulkan/wrapper.h"
14
15namespace Vulkan {
16
17InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
18 : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
19
20InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
21 u32 payload, bool is_stubbed)
22 : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
23
24InnerFence::~InnerFence() = default;
25
26void InnerFence::Queue() {
27 if (is_stubbed) {
28 return;
29 }
30 ASSERT(!event);
31
32 event = device.GetLogical().CreateEvent();
33 ticks = scheduler.Ticks();
34
35 scheduler.RequestOutsideRenderPassOperationContext();
36 scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
37 cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
38 });
39}
40
41bool InnerFence::IsSignaled() const {
42 if (is_stubbed) {
43 return true;
44 }
45 ASSERT(event);
46 return IsEventSignalled();
47}
48
49void InnerFence::Wait() {
50 if (is_stubbed) {
51 return;
52 }
53 ASSERT(event);
54
55 if (ticks >= scheduler.Ticks()) {
56 scheduler.Flush();
57 }
58 while (!IsEventSignalled()) {
59 std::this_thread::yield();
60 }
61}
62
63bool InnerFence::IsEventSignalled() const {
64 switch (const VkResult result = event.GetStatus()) {
65 case VK_EVENT_SET:
66 return true;
67 case VK_EVENT_RESET:
68 return false;
69 default:
70 throw vk::Exception(result);
71 }
72}
73
74VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
75 const VKDevice& device, VKScheduler& scheduler,
76 VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
77 VKQueryCache& query_cache)
78 : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
79 device{device}, scheduler{scheduler} {}
80
81Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
82 return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
83}
84
85Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
86 return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
87}
88
89void VKFenceManager::QueueFence(Fence& fence) {
90 fence->Queue();
91}
92
93bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
94 return fence->IsSignaled();
95}
96
97void VKFenceManager::WaitFence(Fence& fence) {
98 fence->Wait();
99}
100
101} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
new file mode 100644
index 000000000..04d07fe6a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -0,0 +1,74 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8
9#include "video_core/fence_manager.h"
10#include "video_core/renderer_vulkan/wrapper.h"
11
12namespace Core {
13class System;
14}
15
16namespace VideoCore {
17class RasterizerInterface;
18}
19
20namespace Vulkan {
21
22class VKBufferCache;
23class VKDevice;
24class VKQueryCache;
25class VKScheduler;
26class VKTextureCache;
27
28class InnerFence : public VideoCommon::FenceBase {
29public:
30 explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
31 bool is_stubbed);
32 explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
33 u32 payload, bool is_stubbed);
34 ~InnerFence();
35
36 void Queue();
37
38 bool IsSignaled() const;
39
40 void Wait();
41
42private:
43 bool IsEventSignalled() const;
44
45 const VKDevice& device;
46 VKScheduler& scheduler;
47 vk::Event event;
48 u64 ticks = 0;
49};
50using Fence = std::shared_ptr<InnerFence>;
51
52using GenericFenceManager =
53 VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
54
55class VKFenceManager final : public GenericFenceManager {
56public:
57 explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
58 const VKDevice& device, VKScheduler& scheduler,
59 VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
60 VKQueryCache& query_cache);
61
62protected:
63 Fence CreateFence(u32 value, bool is_stubbed) override;
64 Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
65 void QueueFence(Fence& fence) override;
66 bool IsFenceSignaled(Fence& fence) const override;
67 void WaitFence(Fence& fence) override;
68
69private:
70 const VKDevice& device;
71 VKScheduler& scheduler;
72};
73
74} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index b540b838d..8332b42aa 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -26,12 +26,13 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
26 26
27namespace { 27namespace {
28 28
29VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { 29template <class StencilFace>
30VkStencilOpState GetStencilFaceState(const StencilFace& face) {
30 VkStencilOpState state; 31 VkStencilOpState state;
31 state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail); 32 state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail());
32 state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass); 33 state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass());
33 state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail); 34 state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail());
34 state.compareOp = MaxwellToVK::ComparisonOp(face.test_func); 35 state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc());
35 state.compareMask = 0; 36 state.compareMask = 0;
36 state.writeMask = 0; 37 state.writeMask = 0;
37 state.reference = 0; 38 state.reference = 0;
@@ -147,6 +148,8 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
147 continue; 148 continue;
148 } 149 }
149 150
151 device.SaveShader(stage->code);
152
150 ci.codeSize = stage->code.size() * sizeof(u32); 153 ci.codeSize = stage->code.size() * sizeof(u32);
151 ci.pCode = stage->code.data(); 154 ci.pCode = stage->code.data();
152 modules.push_back(device.GetLogical().CreateShaderModule(ci)); 155 modules.push_back(device.GetLogical().CreateShaderModule(ci));
@@ -157,43 +160,47 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
157vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, 160vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
158 const SPIRVProgram& program) const { 161 const SPIRVProgram& program) const {
159 const auto& vi = fixed_state.vertex_input; 162 const auto& vi = fixed_state.vertex_input;
160 const auto& ia = fixed_state.input_assembly;
161 const auto& ds = fixed_state.depth_stencil; 163 const auto& ds = fixed_state.depth_stencil;
162 const auto& cd = fixed_state.color_blending; 164 const auto& cd = fixed_state.color_blending;
163 const auto& ts = fixed_state.tessellation;
164 const auto& rs = fixed_state.rasterizer; 165 const auto& rs = fixed_state.rasterizer;
165 166
166 std::vector<VkVertexInputBindingDescription> vertex_bindings; 167 std::vector<VkVertexInputBindingDescription> vertex_bindings;
167 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; 168 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
168 for (std::size_t i = 0; i < vi.num_bindings; ++i) { 169 for (std::size_t index = 0; index < std::size(vi.bindings); ++index) {
169 const auto& binding = vi.bindings[i]; 170 const auto& binding = vi.bindings[index];
170 const bool instanced = binding.divisor != 0; 171 if (!binding.enabled) {
172 continue;
173 }
174 const bool instanced = vi.binding_divisors[index] != 0;
171 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; 175 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
172 176
173 auto& vertex_binding = vertex_bindings.emplace_back(); 177 auto& vertex_binding = vertex_bindings.emplace_back();
174 vertex_binding.binding = binding.index; 178 vertex_binding.binding = static_cast<u32>(index);
175 vertex_binding.stride = binding.stride; 179 vertex_binding.stride = binding.stride;
176 vertex_binding.inputRate = rate; 180 vertex_binding.inputRate = rate;
177 181
178 if (instanced) { 182 if (instanced) {
179 auto& binding_divisor = vertex_binding_divisors.emplace_back(); 183 auto& binding_divisor = vertex_binding_divisors.emplace_back();
180 binding_divisor.binding = binding.index; 184 binding_divisor.binding = static_cast<u32>(index);
181 binding_divisor.divisor = binding.divisor; 185 binding_divisor.divisor = vi.binding_divisors[index];
182 } 186 }
183 } 187 }
184 188
185 std::vector<VkVertexInputAttributeDescription> vertex_attributes; 189 std::vector<VkVertexInputAttributeDescription> vertex_attributes;
186 const auto& input_attributes = program[0]->entries.attributes; 190 const auto& input_attributes = program[0]->entries.attributes;
187 for (std::size_t i = 0; i < vi.num_attributes; ++i) { 191 for (std::size_t index = 0; index < std::size(vi.attributes); ++index) {
188 const auto& attribute = vi.attributes[i]; 192 const auto& attribute = vi.attributes[index];
189 if (input_attributes.find(attribute.index) == input_attributes.end()) { 193 if (!attribute.enabled) {
194 continue;
195 }
196 if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) {
190 // Skip attributes not used by the vertex shaders. 197 // Skip attributes not used by the vertex shaders.
191 continue; 198 continue;
192 } 199 }
193 auto& vertex_attribute = vertex_attributes.emplace_back(); 200 auto& vertex_attribute = vertex_attributes.emplace_back();
194 vertex_attribute.location = attribute.index; 201 vertex_attribute.location = static_cast<u32>(index);
195 vertex_attribute.binding = attribute.buffer; 202 vertex_attribute.binding = attribute.buffer;
196 vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size); 203 vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size());
197 vertex_attribute.offset = attribute.offset; 204 vertex_attribute.offset = attribute.offset;
198 } 205 }
199 206
@@ -219,15 +226,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
219 input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; 226 input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
220 input_assembly_ci.pNext = nullptr; 227 input_assembly_ci.pNext = nullptr;
221 input_assembly_ci.flags = 0; 228 input_assembly_ci.flags = 0;
222 input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); 229 input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology());
223 input_assembly_ci.primitiveRestartEnable = 230 input_assembly_ci.primitiveRestartEnable =
224 ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology); 231 rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);
225 232
226 VkPipelineTessellationStateCreateInfo tessellation_ci; 233 VkPipelineTessellationStateCreateInfo tessellation_ci;
227 tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; 234 tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
228 tessellation_ci.pNext = nullptr; 235 tessellation_ci.pNext = nullptr;
229 tessellation_ci.flags = 0; 236 tessellation_ci.flags = 0;
230 tessellation_ci.patchControlPoints = ts.patch_control_points; 237 tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1;
231 238
232 VkPipelineViewportStateCreateInfo viewport_ci; 239 VkPipelineViewportStateCreateInfo viewport_ci;
233 viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; 240 viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
@@ -246,8 +253,8 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
246 rasterization_ci.rasterizerDiscardEnable = VK_FALSE; 253 rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
247 rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; 254 rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
248 rasterization_ci.cullMode = 255 rasterization_ci.cullMode =
249 rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE; 256 rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE;
250 rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face); 257 rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace());
251 rasterization_ci.depthBiasEnable = rs.depth_bias_enable; 258 rasterization_ci.depthBiasEnable = rs.depth_bias_enable;
252 rasterization_ci.depthBiasConstantFactor = 0.0f; 259 rasterization_ci.depthBiasConstantFactor = 0.0f;
253 rasterization_ci.depthBiasClamp = 0.0f; 260 rasterization_ci.depthBiasClamp = 0.0f;
@@ -271,40 +278,38 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
271 depth_stencil_ci.flags = 0; 278 depth_stencil_ci.flags = 0;
272 depth_stencil_ci.depthTestEnable = ds.depth_test_enable; 279 depth_stencil_ci.depthTestEnable = ds.depth_test_enable;
273 depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; 280 depth_stencil_ci.depthWriteEnable = ds.depth_write_enable;
274 depth_stencil_ci.depthCompareOp = ds.depth_test_enable 281 depth_stencil_ci.depthCompareOp =
275 ? MaxwellToVK::ComparisonOp(ds.depth_test_function) 282 ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS;
276 : VK_COMPARE_OP_ALWAYS;
277 depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; 283 depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable;
278 depth_stencil_ci.stencilTestEnable = ds.stencil_enable; 284 depth_stencil_ci.stencilTestEnable = ds.stencil_enable;
279 depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil); 285 depth_stencil_ci.front = GetStencilFaceState(ds.front);
280 depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil); 286 depth_stencil_ci.back = GetStencilFaceState(ds.back);
281 depth_stencil_ci.minDepthBounds = 0.0f; 287 depth_stencil_ci.minDepthBounds = 0.0f;
282 depth_stencil_ci.maxDepthBounds = 0.0f; 288 depth_stencil_ci.maxDepthBounds = 0.0f;
283 289
284 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 290 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
285 const std::size_t num_attachments = 291 const std::size_t num_attachments = renderpass_params.color_attachments.size();
286 std::min(cd.attachments_count, renderpass_params.color_attachments.size()); 292 for (std::size_t index = 0; index < num_attachments; ++index) {
287 for (std::size_t i = 0; i < num_attachments; ++i) { 293 static constexpr std::array COMPONENT_TABLE = {
288 static constexpr std::array component_table = {
289 VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, 294 VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
290 VK_COLOR_COMPONENT_A_BIT}; 295 VK_COLOR_COMPONENT_A_BIT};
291 const auto& blend = cd.attachments[i]; 296 const auto& blend = cd.attachments[index];
292 297
293 VkColorComponentFlags color_components = 0; 298 VkColorComponentFlags color_components = 0;
294 for (std::size_t j = 0; j < component_table.size(); ++j) { 299 for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
295 if (blend.components[j]) { 300 if (blend.Mask()[i]) {
296 color_components |= component_table[j]; 301 color_components |= COMPONENT_TABLE[i];
297 } 302 }
298 } 303 }
299 304
300 VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i]; 305 VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index];
301 attachment.blendEnable = blend.enable; 306 attachment.blendEnable = blend.enable != 0;
302 attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func); 307 attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor());
303 attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func); 308 attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor());
304 attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation); 309 attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB());
305 attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func); 310 attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor());
306 attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func); 311 attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor());
307 attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation); 312 attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha());
308 attachment.colorWriteMask = color_components; 313 attachment.colorWriteMask = color_components;
309 } 314 }
310 315
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 90e3a8edd..91b1b16a5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
207 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 207 const GPUVAddr program_addr{GetShaderAddress(system, program)};
208 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 208 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
209 ASSERT(cpu_addr); 209 ASSERT(cpu_addr);
210 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; 210 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
211 if (!shader) { 211 if (!shader) {
212 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 212 const auto host_ptr{memory_manager.GetPointer(program_addr)};
213 213
@@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
218 218
219 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, 219 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
220 std::move(code), stage_offset); 220 std::move(code), stage_offset);
221 Register(shader); 221 if (cpu_addr) {
222 Register(shader);
223 } else {
224 null_shader = shader;
225 }
222 } 226 }
223 shaders[index] = std::move(shader); 227 shaders[index] = std::move(shader);
224 } 228 }
@@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
261 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 265 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
262 ASSERT(cpu_addr); 266 ASSERT(cpu_addr);
263 267
264 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; 268 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
265 if (!shader) { 269 if (!shader) {
266 // No shader found - create a new one 270 // No shader found - create a new one
267 const auto host_ptr = memory_manager.GetPointer(program_addr); 271 const auto host_ptr = memory_manager.GetPointer(program_addr);
@@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
271 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 275 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
272 program_addr, *cpu_addr, std::move(code), 276 program_addr, *cpu_addr, std::move(code),
273 kernel_main_offset); 277 kernel_main_offset);
274 Register(shader); 278 if (cpu_addr) {
279 Register(shader);
280 } else {
281 null_kernel = shader;
282 }
275 } 283 }
276 284
277 Specialization specialization; 285 Specialization specialization;
@@ -329,12 +337,14 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
329 const auto& gpu = system.GPU().Maxwell3D(); 337 const auto& gpu = system.GPU().Maxwell3D();
330 338
331 Specialization specialization; 339 Specialization specialization;
332 if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { 340 if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) {
333 ASSERT(fixed_state.input_assembly.point_size != 0.0f); 341 float point_size;
334 specialization.point_size = fixed_state.input_assembly.point_size; 342 std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float));
343 specialization.point_size = point_size;
344 ASSERT(point_size != 0.0f);
335 } 345 }
336 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { 346 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
337 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; 347 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
338 } 348 }
339 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 349 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
340 350
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 7ccdb7083..602a0a340 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -182,6 +182,9 @@ private:
182 VKUpdateDescriptorQueue& update_descriptor_queue; 182 VKUpdateDescriptorQueue& update_descriptor_queue;
183 VKRenderPassCache& renderpass_cache; 183 VKRenderPassCache& renderpass_cache;
184 184
185 Shader null_shader{};
186 Shader null_kernel{};
187
185 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 188 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
186 189
187 GraphicsPipelineCacheKey last_graphics_key; 190 GraphicsPipelineCacheKey last_graphics_key;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 0966c7ff7..813f7c162 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -113,8 +113,19 @@ u64 HostCounter::BlockingQuery() const {
113 if (ticks >= cache.Scheduler().Ticks()) { 113 if (ticks >= cache.Scheduler().Ticks()) {
114 cache.Scheduler().Flush(); 114 cache.Scheduler().Flush();
115 } 115 }
116 return cache.Device().GetLogical().GetQueryResult<u64>( 116 u64 data;
117 query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); 117 const VkResult result = cache.Device().GetLogical().GetQueryResults(
118 query.first, query.second, 1, sizeof(data), &data, sizeof(data),
119 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
120 switch (result) {
121 case VK_SUCCESS:
122 return data;
123 case VK_ERROR_DEVICE_LOST:
124 cache.Device().ReportLoss();
125 [[fallthrough]];
126 default:
127 throw vk::Exception(result);
128 }
118} 129}
119 130
120} // namespace Vulkan 131} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 4ca0febb8..8a1f57891 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -17,6 +17,7 @@
17#include "common/microprofile.h" 17#include "common/microprofile.h"
18#include "core/core.h" 18#include "core/core.h"
19#include "core/memory.h" 19#include "core/memory.h"
20#include "core/settings.h"
20#include "video_core/engines/kepler_compute.h" 21#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -292,13 +293,16 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
292 staging_pool(device, memory_manager, scheduler), descriptor_pool(device), 293 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
293 update_descriptor_queue(device, scheduler), renderpass_cache(device), 294 update_descriptor_queue(device, scheduler), renderpass_cache(device),
294 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 295 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
296 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
295 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 297 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
296 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, 298 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
297 staging_pool), 299 staging_pool),
298 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, 300 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
299 renderpass_cache), 301 renderpass_cache),
300 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 302 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
301 sampler_cache(device), query_cache(system, *this, device, scheduler) { 303 sampler_cache(device),
304 fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
305 query_cache(system, *this, device, scheduler) {
302 scheduler.SetQueryCache(query_cache); 306 scheduler.SetQueryCache(query_cache);
303} 307}
304 308
@@ -346,11 +350,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
346 350
347 buffer_bindings.Bind(scheduler); 351 buffer_bindings.Bind(scheduler);
348 352
349 if (device.IsNvDeviceDiagnosticCheckpoints()) {
350 scheduler.Record(
351 [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); });
352 }
353
354 BeginTransformFeedback(); 353 BeginTransformFeedback();
355 354
356 const auto pipeline_layout = pipeline.GetLayout(); 355 const auto pipeline_layout = pipeline.GetLayout();
@@ -364,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
364 }); 363 });
365 364
366 EndTransformFeedback(); 365 EndTransformFeedback();
366
367 system.GPU().TickWork();
367} 368}
368 369
369void RasterizerVulkan::Clear() { 370void RasterizerVulkan::Clear() {
@@ -477,11 +478,6 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
477 TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 478 TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
478 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); 479 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
479 480
480 if (device.IsNvDeviceDiagnosticCheckpoints()) {
481 scheduler.Record(
482 [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); });
483 }
484
485 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, 481 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
486 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), 482 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
487 layout = pipeline.GetLayout(), 483 layout = pipeline.GetLayout(),
@@ -513,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
513 query_cache.FlushRegion(addr, size); 509 query_cache.FlushRegion(addr, size);
514} 510}
515 511
512bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
513 if (!Settings::IsGPULevelHigh()) {
514 return buffer_cache.MustFlushRegion(addr, size);
515 }
516 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
517}
518
516void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 519void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
517 if (addr == 0 || size == 0) { 520 if (addr == 0 || size == 0) {
518 return; 521 return;
@@ -523,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
523 query_cache.InvalidateRegion(addr, size); 526 query_cache.InvalidateRegion(addr, size);
524} 527}
525 528
529void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
530 if (addr == 0 || size == 0) {
531 return;
532 }
533 texture_cache.OnCPUWrite(addr, size);
534 pipeline_cache.InvalidateRegion(addr, size);
535 buffer_cache.OnCPUWrite(addr, size);
536 query_cache.InvalidateRegion(addr, size);
537}
538
539void RasterizerVulkan::SyncGuestHost() {
540 texture_cache.SyncGuestHost();
541 buffer_cache.SyncGuestHost();
542}
543
544void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
545 auto& gpu{system.GPU()};
546 if (!gpu.IsAsync()) {
547 gpu.MemoryManager().Write<u32>(addr, value);
548 return;
549 }
550 fence_manager.SignalSemaphore(addr, value);
551}
552
553void RasterizerVulkan::SignalSyncPoint(u32 value) {
554 auto& gpu{system.GPU()};
555 if (!gpu.IsAsync()) {
556 gpu.IncrementSyncPoint(value);
557 return;
558 }
559 fence_manager.SignalSyncPoint(value);
560}
561
562void RasterizerVulkan::ReleaseFences() {
563 auto& gpu{system.GPU()};
564 if (!gpu.IsAsync()) {
565 return;
566 }
567 fence_manager.WaitPendingFences();
568}
569
526void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { 570void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
527 FlushRegion(addr, size); 571 FlushRegion(addr, size);
528 InvalidateRegion(addr, size); 572 InvalidateRegion(addr, size);
@@ -806,25 +850,29 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
806 BufferBindings& buffer_bindings) { 850 BufferBindings& buffer_bindings) {
807 const auto& regs = system.GPU().Maxwell3D().regs; 851 const auto& regs = system.GPU().Maxwell3D().regs;
808 852
809 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { 853 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
810 const auto& attrib = regs.vertex_attrib_format[index]; 854 const auto& attrib = regs.vertex_attrib_format[index];
811 if (!attrib.IsValid()) { 855 if (!attrib.IsValid()) {
856 vertex_input.SetAttribute(index, false, 0, 0, {}, {});
812 continue; 857 continue;
813 } 858 }
814 859
815 const auto& buffer = regs.vertex_array[attrib.buffer]; 860 [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
816 ASSERT(buffer.IsEnabled()); 861 ASSERT(buffer.IsEnabled());
817 862
818 vertex_input.attributes[vertex_input.num_attributes++] = 863 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
819 FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, 864 attrib.size.Value());
820 attrib.offset);
821 } 865 }
822 866
823 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { 867 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
824 const auto& vertex_array = regs.vertex_array[index]; 868 const auto& vertex_array = regs.vertex_array[index];
825 if (!vertex_array.IsEnabled()) { 869 if (!vertex_array.IsEnabled()) {
870 vertex_input.SetBinding(index, false, 0, 0);
826 continue; 871 continue;
827 } 872 }
873 vertex_input.SetBinding(
874 index, true, vertex_array.stride,
875 regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
828 876
829 const GPUVAddr start{vertex_array.StartAddress()}; 877 const GPUVAddr start{vertex_array.StartAddress()};
830 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 878 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
@@ -832,10 +880,6 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
832 ASSERT(end > start); 880 ASSERT(end > start);
833 const std::size_t size{end - start + 1}; 881 const std::size_t size{end - start + 1};
834 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); 882 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
835
836 vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
837 index, vertex_array.stride,
838 regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
839 buffer_bindings.AddVertexBinding(buffer, offset); 883 buffer_bindings.AddVertexBinding(buffer, offset);
840 } 884 }
841} 885}
@@ -844,18 +888,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
844 bool is_indexed) { 888 bool is_indexed) {
845 const auto& regs = system.GPU().Maxwell3D().regs; 889 const auto& regs = system.GPU().Maxwell3D().regs;
846 switch (regs.draw.topology) { 890 switch (regs.draw.topology) {
847 case Maxwell::PrimitiveTopology::Quads: 891 case Maxwell::PrimitiveTopology::Quads: {
848 if (params.is_indexed) { 892 if (!params.is_indexed) {
849 UNIMPLEMENTED();
850 } else {
851 const auto [buffer, offset] = 893 const auto [buffer, offset] =
852 quad_array_pass.Assemble(params.num_vertices, params.base_vertex); 894 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
853 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); 895 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
854 params.base_vertex = 0; 896 params.base_vertex = 0;
855 params.num_vertices = params.num_vertices * 6 / 4; 897 params.num_vertices = params.num_vertices * 6 / 4;
856 params.is_indexed = true; 898 params.is_indexed = true;
899 break;
857 } 900 }
901 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
902 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
903 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
904 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
905
906 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
907 params.num_vertices = (params.num_vertices / 4) * 6;
908 params.base_vertex = 0;
858 break; 909 break;
910 }
859 default: { 911 default: {
860 if (!is_indexed) { 912 if (!is_indexed) {
861 break; 913 break;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 46037860a..2fa46b0cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,6 +21,7 @@
21#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_compute_pass.h" 22#include "video_core/renderer_vulkan/vk_compute_pass.h"
23#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 23#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
24#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_memory_manager.h" 25#include "video_core/renderer_vulkan/vk_memory_manager.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 27#include "video_core/renderer_vulkan/vk_query_cache.h"
@@ -118,7 +119,13 @@ public:
118 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
119 void FlushAll() override; 120 void FlushAll() override;
120 void FlushRegion(VAddr addr, u64 size) override; 121 void FlushRegion(VAddr addr, u64 size) override;
122 bool MustFlushRegion(VAddr addr, u64 size) override;
121 void InvalidateRegion(VAddr addr, u64 size) override; 123 void InvalidateRegion(VAddr addr, u64 size) override;
124 void OnCPUWrite(VAddr addr, u64 size) override;
125 void SyncGuestHost() override;
126 void SignalSemaphore(GPUVAddr addr, u32 value) override;
127 void SignalSyncPoint(u32 value) override;
128 void ReleaseFences() override;
122 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
123 void FlushCommands() override; 130 void FlushCommands() override;
124 void TickFrame() override; 131 void TickFrame() override;
@@ -254,12 +261,14 @@ private:
254 VKUpdateDescriptorQueue update_descriptor_queue; 261 VKUpdateDescriptorQueue update_descriptor_queue;
255 VKRenderPassCache renderpass_cache; 262 VKRenderPassCache renderpass_cache;
256 QuadArrayPass quad_array_pass; 263 QuadArrayPass quad_array_pass;
264 QuadIndexedPass quad_indexed_pass;
257 Uint8Pass uint8_pass; 265 Uint8Pass uint8_pass;
258 266
259 VKTextureCache texture_cache; 267 VKTextureCache texture_cache;
260 VKPipelineCache pipeline_cache; 268 VKPipelineCache pipeline_cache;
261 VKBufferCache buffer_cache; 269 VKBufferCache buffer_cache;
262 VKSamplerCache sampler_cache; 270 VKSamplerCache sampler_cache;
271 VKFenceManager fence_manager;
263 VKQueryCache query_cache; 272 VKQueryCache query_cache;
264 273
265 std::array<View, Maxwell::NumRenderTargets> color_attachments; 274 std::array<View, Maxwell::NumRenderTargets> color_attachments;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 900f551b3..ae7ba3eb5 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -166,7 +166,15 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
166 submit_info.pCommandBuffers = current_cmdbuf.address(); 166 submit_info.pCommandBuffers = current_cmdbuf.address();
167 submit_info.signalSemaphoreCount = semaphore ? 1 : 0; 167 submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
168 submit_info.pSignalSemaphores = &semaphore; 168 submit_info.pSignalSemaphores = &semaphore;
169 device.GetGraphicsQueue().Submit(submit_info, *current_fence); 169 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
170 case VK_SUCCESS:
171 break;
172 case VK_ERROR_DEVICE_LOST:
173 device.ReportLoss();
174 [[fallthrough]];
175 default:
176 vk::Check(result);
177 }
170} 178}
171 179
172void VKScheduler::AllocateNewContext() { 180void VKScheduler::AllocateNewContext() {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 38a93a01a..868447af2 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <limits>
6#include <optional> 7#include <optional>
7#include <tuple> 8#include <tuple>
8#include <vector> 9#include <vector>
@@ -22,22 +23,38 @@ namespace {
22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 23constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 24constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 25
25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; 26constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
26 27
27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, 28/// Find a memory type with the passed requirements
28 VkMemoryPropertyFlags wanted) { 29std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
29 const auto properties = device.GetPhysical().GetMemoryProperties(); 30 VkMemoryPropertyFlags wanted,
30 for (u32 i = 0; i < properties.memoryTypeCount; i++) { 31 u32 filter = std::numeric_limits<u32>::max()) {
31 if (!(filter & (1 << i))) { 32 for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
32 continue; 33 const auto flags = properties.memoryTypes[i].propertyFlags;
33 } 34 if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
34 if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
35 return i; 35 return i;
36 } 36 }
37 } 37 }
38 return std::nullopt; 38 return std::nullopt;
39} 39}
40 40
41/// Get the preferred host visible memory type.
42u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
43 u32 filter = std::numeric_limits<u32>::max()) {
44 // Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
45 // Otherwise search for a host visible allocation.
46 static constexpr auto HOST_MEMORY =
47 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
48 static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
49
50 std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
51 if (!preferred_type) {
52 preferred_type = FindMemoryType(properties, HOST_MEMORY);
53 ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
54 }
55 return preferred_type.value_or(0);
56}
57
41} // Anonymous namespace 58} // Anonymous namespace
42 59
43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 60VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
@@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
51VKStreamBuffer::~VKStreamBuffer() = default; 68VKStreamBuffer::~VKStreamBuffer() = default;
52 69
53std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { 70std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
54 ASSERT(size <= STREAM_BUFFER_SIZE); 71 ASSERT(size <= stream_buffer_size);
55 mapped_size = size; 72 mapped_size = size;
56 73
57 if (alignment > 0) { 74 if (alignment > 0) {
@@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
61 WaitPendingOperations(offset); 78 WaitPendingOperations(offset);
62 79
63 bool invalidated = false; 80 bool invalidated = false;
64 if (offset + size > STREAM_BUFFER_SIZE) { 81 if (offset + size > stream_buffer_size) {
65 // The buffer would overflow, save the amount of used watches and reset the state. 82 // The buffer would overflow, save the amount of used watches and reset the state.
66 invalidation_mark = current_watch_cursor; 83 invalidation_mark = current_watch_cursor;
67 current_watch_cursor = 0; 84 current_watch_cursor = 0;
@@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) {
98} 115}
99 116
100void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { 117void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
118 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
119 const u32 preferred_type = GetMemoryType(memory_properties);
120 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
121
122 // Substract from the preferred heap size some bytes to avoid getting out of memory.
123 const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
124 const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024;
125
101 VkBufferCreateInfo buffer_ci; 126 VkBufferCreateInfo buffer_ci;
102 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 127 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
103 buffer_ci.pNext = nullptr; 128 buffer_ci.pNext = nullptr;
104 buffer_ci.flags = 0; 129 buffer_ci.flags = 0;
105 buffer_ci.size = STREAM_BUFFER_SIZE; 130 buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
106 buffer_ci.usage = usage; 131 buffer_ci.usage = usage;
107 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 132 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
108 buffer_ci.queueFamilyIndexCount = 0; 133 buffer_ci.queueFamilyIndexCount = 0;
109 buffer_ci.pQueueFamilyIndices = nullptr; 134 buffer_ci.pQueueFamilyIndices = nullptr;
110 135
111 const auto& dev = device.GetLogical(); 136 buffer = device.GetLogical().CreateBuffer(buffer_ci);
112 buffer = dev.CreateBuffer(buffer_ci); 137
113 138 const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
114 const auto& dld = device.GetDispatchLoader(); 139 const u32 required_flags = requirements.memoryTypeBits;
115 const auto requirements = dev.GetBufferMemoryRequirements(*buffer); 140 stream_buffer_size = static_cast<u64>(requirements.size);
116 // Prefer device local host visible allocations (this should hit AMD's pinned memory). 141
117 auto type =
118 FindMemoryType(device, requirements.memoryTypeBits,
119 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
120 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
121 if (!type) {
122 // Otherwise search for a host visible allocation.
123 type = FindMemoryType(device, requirements.memoryTypeBits,
124 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
125 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
126 ASSERT_MSG(type, "No host visible and coherent memory type found");
127 }
128 VkMemoryAllocateInfo memory_ai; 142 VkMemoryAllocateInfo memory_ai;
129 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 143 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
130 memory_ai.pNext = nullptr; 144 memory_ai.pNext = nullptr;
131 memory_ai.allocationSize = requirements.size; 145 memory_ai.allocationSize = requirements.size;
132 memory_ai.memoryTypeIndex = *type; 146 memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
133 147
134 memory = dev.AllocateMemory(memory_ai); 148 memory = device.GetLogical().AllocateMemory(memory_ai);
135 buffer.BindMemory(*memory, 0); 149 buffer.BindMemory(*memory, 0);
136} 150}
137 151
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 58ce8b973..dfddf7ad6 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -56,8 +56,9 @@ private:
56 const VKDevice& device; ///< Vulkan device manager. 56 const VKDevice& device; ///< Vulkan device manager.
57 VKScheduler& scheduler; ///< Command scheduler. 57 VKScheduler& scheduler; ///< Command scheduler.
58 58
59 vk::Buffer buffer; ///< Mapped buffer. 59 vk::Buffer buffer; ///< Mapped buffer.
60 vk::DeviceMemory memory; ///< Memory allocation. 60 vk::DeviceMemory memory; ///< Memory allocation.
61 u64 stream_buffer_size{}; ///< Stream buffer size.
61 62
62 u64 offset{}; ///< Buffer iterator. 63 u64 offset{}; ///< Buffer iterator.
63 u64 mapped_size{}; ///< Size reserved for the current copy. 64 u64 mapped_size{}; ///< Size reserved for the current copy.
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 9b94dfff1..539f3c974 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -61,9 +61,9 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
61 X(vkCmdPipelineBarrier); 61 X(vkCmdPipelineBarrier);
62 X(vkCmdPushConstants); 62 X(vkCmdPushConstants);
63 X(vkCmdSetBlendConstants); 63 X(vkCmdSetBlendConstants);
64 X(vkCmdSetCheckpointNV);
65 X(vkCmdSetDepthBias); 64 X(vkCmdSetDepthBias);
66 X(vkCmdSetDepthBounds); 65 X(vkCmdSetDepthBounds);
66 X(vkCmdSetEvent);
67 X(vkCmdSetScissor); 67 X(vkCmdSetScissor);
68 X(vkCmdSetStencilCompareMask); 68 X(vkCmdSetStencilCompareMask);
69 X(vkCmdSetStencilReference); 69 X(vkCmdSetStencilReference);
@@ -76,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
76 X(vkCreateDescriptorPool); 76 X(vkCreateDescriptorPool);
77 X(vkCreateDescriptorSetLayout); 77 X(vkCreateDescriptorSetLayout);
78 X(vkCreateDescriptorUpdateTemplateKHR); 78 X(vkCreateDescriptorUpdateTemplateKHR);
79 X(vkCreateEvent);
79 X(vkCreateFence); 80 X(vkCreateFence);
80 X(vkCreateFramebuffer); 81 X(vkCreateFramebuffer);
81 X(vkCreateGraphicsPipelines); 82 X(vkCreateGraphicsPipelines);
@@ -94,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
94 X(vkDestroyDescriptorPool); 95 X(vkDestroyDescriptorPool);
95 X(vkDestroyDescriptorSetLayout); 96 X(vkDestroyDescriptorSetLayout);
96 X(vkDestroyDescriptorUpdateTemplateKHR); 97 X(vkDestroyDescriptorUpdateTemplateKHR);
98 X(vkDestroyEvent);
97 X(vkDestroyFence); 99 X(vkDestroyFence);
98 X(vkDestroyFramebuffer); 100 X(vkDestroyFramebuffer);
99 X(vkDestroyImage); 101 X(vkDestroyImage);
@@ -113,10 +115,10 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
113 X(vkFreeMemory); 115 X(vkFreeMemory);
114 X(vkGetBufferMemoryRequirements); 116 X(vkGetBufferMemoryRequirements);
115 X(vkGetDeviceQueue); 117 X(vkGetDeviceQueue);
118 X(vkGetEventStatus);
116 X(vkGetFenceStatus); 119 X(vkGetFenceStatus);
117 X(vkGetImageMemoryRequirements); 120 X(vkGetImageMemoryRequirements);
118 X(vkGetQueryPoolResults); 121 X(vkGetQueryPoolResults);
119 X(vkGetQueueCheckpointDataNV);
120 X(vkMapMemory); 122 X(vkMapMemory);
121 X(vkQueueSubmit); 123 X(vkQueueSubmit);
122 X(vkResetFences); 124 X(vkResetFences);
@@ -271,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
271 dld.vkFreeMemory(device, handle, nullptr); 273 dld.vkFreeMemory(device, handle, nullptr);
272} 274}
273 275
276void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
277 dld.vkDestroyEvent(device, handle, nullptr);
278}
279
274void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { 280void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
275 dld.vkDestroyFence(device, handle, nullptr); 281 dld.vkDestroyFence(device, handle, nullptr);
276} 282}
@@ -409,17 +415,6 @@ DebugCallback Instance::TryCreateDebugCallback(
409 return DebugCallback(messenger, handle, *dld); 415 return DebugCallback(messenger, handle, *dld);
410} 416}
411 417
412std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const {
413 if (!dld.vkGetQueueCheckpointDataNV) {
414 return {};
415 }
416 u32 num;
417 dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr);
418 std::vector<VkCheckpointDataNV> checkpoints(num);
419 dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data());
420 return checkpoints;
421}
422
423void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 418void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
424 Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); 419 Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
425} 420}
@@ -469,12 +464,11 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
469} 464}
470 465
471Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 466Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
472 Span<const char*> enabled_extensions, 467 Span<const char*> enabled_extensions, const void* next,
473 const VkPhysicalDeviceFeatures2& enabled_features,
474 DeviceDispatch& dld) noexcept { 468 DeviceDispatch& dld) noexcept {
475 VkDeviceCreateInfo ci; 469 VkDeviceCreateInfo ci;
476 ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; 470 ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
477 ci.pNext = &enabled_features; 471 ci.pNext = next;
478 ci.flags = 0; 472 ci.flags = 0;
479 ci.queueCreateInfoCount = queues_ci.size(); 473 ci.queueCreateInfoCount = queues_ci.size();
480 ci.pQueueCreateInfos = queues_ci.data(); 474 ci.pQueueCreateInfos = queues_ci.data();
@@ -613,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
613 return ShaderModule(object, handle, *dld); 607 return ShaderModule(object, handle, *dld);
614} 608}
615 609
610Event Device::CreateEvent() const {
611 VkEventCreateInfo ci;
612 ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
613 ci.pNext = nullptr;
614 ci.flags = 0;
615 VkEvent object;
616 Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
617 return Event(object, handle, *dld);
618}
619
616SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { 620SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
617 VkSwapchainKHR object; 621 VkSwapchainKHR object;
618 Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); 622 Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index fb3657819..bda16a2cb 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -197,9 +197,9 @@ struct DeviceDispatch : public InstanceDispatch {
197 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; 197 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
198 PFN_vkCmdPushConstants vkCmdPushConstants; 198 PFN_vkCmdPushConstants vkCmdPushConstants;
199 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; 199 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
200 PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV;
201 PFN_vkCmdSetDepthBias vkCmdSetDepthBias; 200 PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
202 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; 201 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
202 PFN_vkCmdSetEvent vkCmdSetEvent;
203 PFN_vkCmdSetScissor vkCmdSetScissor; 203 PFN_vkCmdSetScissor vkCmdSetScissor;
204 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; 204 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
205 PFN_vkCmdSetStencilReference vkCmdSetStencilReference; 205 PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
@@ -212,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
212 PFN_vkCreateDescriptorPool vkCreateDescriptorPool; 212 PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
213 PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; 213 PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
214 PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; 214 PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
215 PFN_vkCreateEvent vkCreateEvent;
215 PFN_vkCreateFence vkCreateFence; 216 PFN_vkCreateFence vkCreateFence;
216 PFN_vkCreateFramebuffer vkCreateFramebuffer; 217 PFN_vkCreateFramebuffer vkCreateFramebuffer;
217 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; 218 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
@@ -230,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
230 PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; 231 PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
231 PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; 232 PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
232 PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; 233 PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
234 PFN_vkDestroyEvent vkDestroyEvent;
233 PFN_vkDestroyFence vkDestroyFence; 235 PFN_vkDestroyFence vkDestroyFence;
234 PFN_vkDestroyFramebuffer vkDestroyFramebuffer; 236 PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
235 PFN_vkDestroyImage vkDestroyImage; 237 PFN_vkDestroyImage vkDestroyImage;
@@ -249,10 +251,10 @@ struct DeviceDispatch : public InstanceDispatch {
249 PFN_vkFreeMemory vkFreeMemory; 251 PFN_vkFreeMemory vkFreeMemory;
250 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; 252 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
251 PFN_vkGetDeviceQueue vkGetDeviceQueue; 253 PFN_vkGetDeviceQueue vkGetDeviceQueue;
254 PFN_vkGetEventStatus vkGetEventStatus;
252 PFN_vkGetFenceStatus vkGetFenceStatus; 255 PFN_vkGetFenceStatus vkGetFenceStatus;
253 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; 256 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
254 PFN_vkGetQueryPoolResults vkGetQueryPoolResults; 257 PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
255 PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV;
256 PFN_vkMapMemory vkMapMemory; 258 PFN_vkMapMemory vkMapMemory;
257 PFN_vkQueueSubmit vkQueueSubmit; 259 PFN_vkQueueSubmit vkQueueSubmit;
258 PFN_vkResetFences vkResetFences; 260 PFN_vkResetFences vkResetFences;
@@ -281,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
281void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; 283void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
282void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; 284void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
283void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; 285void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
286void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
284void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; 287void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
285void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; 288void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
286void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; 289void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
@@ -567,12 +570,8 @@ public:
567 /// Construct a queue handle. 570 /// Construct a queue handle.
568 constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} 571 constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
569 572
570 /// Returns the checkpoint data. 573 VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
571 /// @note Returns an empty vector when the function pointer is not present. 574 return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
572 std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const;
573
574 void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const {
575 Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence));
576 } 575 }
577 576
578 VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { 577 VkResult Present(const VkPresentInfoKHR& present_info) const noexcept {
@@ -654,13 +653,21 @@ public:
654 std::vector<VkImage> GetImages() const; 653 std::vector<VkImage> GetImages() const;
655}; 654};
656 655
656class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
657 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
658
659public:
660 VkResult GetStatus() const noexcept {
661 return dld->vkGetEventStatus(owner, handle);
662 }
663};
664
657class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { 665class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
658 using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; 666 using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
659 667
660public: 668public:
661 static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 669 static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
662 Span<const char*> enabled_extensions, 670 Span<const char*> enabled_extensions, const void* next,
663 const VkPhysicalDeviceFeatures2& enabled_features,
664 DeviceDispatch& dld) noexcept; 671 DeviceDispatch& dld) noexcept;
665 672
666 Queue GetQueue(u32 family_index) const noexcept; 673 Queue GetQueue(u32 family_index) const noexcept;
@@ -702,6 +709,8 @@ public:
702 709
703 ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; 710 ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
704 711
712 Event CreateEvent() const;
713
705 SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; 714 SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
706 715
707 DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; 716 DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
@@ -734,18 +743,11 @@ public:
734 dld->vkResetQueryPoolEXT(handle, query_pool, first, count); 743 dld->vkResetQueryPoolEXT(handle, query_pool, first, count);
735 } 744 }
736 745
737 void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, 746 VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
738 void* data, VkDeviceSize stride, VkQueryResultFlags flags) const { 747 void* data, VkDeviceSize stride, VkQueryResultFlags flags) const
739 Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, 748 noexcept {
740 flags)); 749 return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
741 } 750 flags);
742
743 template <typename T>
744 T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const {
745 static_assert(std::is_trivially_copyable_v<T>);
746 T value;
747 GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags);
748 return value;
749 } 751 }
750}; 752};
751 753
@@ -920,10 +922,6 @@ public:
920 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); 922 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
921 } 923 }
922 924
923 void SetCheckpointNV(const void* checkpoint_marker) const noexcept {
924 dld->vkCmdSetCheckpointNV(handle, checkpoint_marker);
925 }
926
927 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { 925 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
928 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); 926 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
929 } 927 }
@@ -956,6 +954,10 @@ public:
956 dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); 954 dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
957 } 955 }
958 956
957 void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
958 dld->vkCmdSetEvent(handle, event, stage_flags);
959 }
960
959 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, 961 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
960 const VkDeviceSize* offsets, 962 const VkDeviceSize* offsets,
961 const VkDeviceSize* sizes) const noexcept { 963 const VkDeviceSize* sizes) const noexcept {
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 6d313963a..e00a3fb70 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -587,8 +587,6 @@ bool TryQuery(CFGRebuildState& state) {
587 return true; 587 return true;
588} 588}
589 589
590} // Anonymous namespace
591
592void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { 590void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
593 const auto get_expr = ([&](const Condition& cond) -> Expr { 591 const auto get_expr = ([&](const Condition& cond) -> Expr {
594 Expr result{}; 592 Expr result{};
@@ -655,6 +653,8 @@ void DecompileShader(CFGRebuildState& state) {
655 state.manager->Decompile(); 653 state.manager->Decompile();
656} 654}
657 655
656} // Anonymous namespace
657
658std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 658std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
659 const CompilerSettings& settings, 659 const CompilerSettings& settings,
660 Registry& registry) { 660 Registry& registry) {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 8112ead3e..9392f065b 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -479,7 +479,7 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
479 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); 479 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
480 480
481 const GlobalMemoryBase descriptor{index, offset}; 481 const GlobalMemoryBase descriptor{index, offset};
482 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 482 const auto& entry = used_global_memory.try_emplace(descriptor).first;
483 auto& usage = entry->second; 483 auto& usage = entry->second;
484 usage.is_written |= is_write; 484 usage.is_written |= is_write;
485 usage.is_read |= is_read; 485 usage.is_read |= is_read;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 6c4a1358b..e68f1d305 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -139,7 +139,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
139 } 139 }
140 const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); 140 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
141 141
142 const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare}; 142 const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare, false};
143 const Sampler& sampler = *GetSampler(instr.sampler, info); 143 const Sampler& sampler = *GetSampler(instr.sampler, info);
144 144
145 Node4 values; 145 Node4 values;
@@ -171,13 +171,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
171 const auto coord_count = GetCoordCount(texture_type); 171 const auto coord_count = GetCoordCount(texture_type);
172 Node index_var{}; 172 Node index_var{};
173 const Sampler* sampler = 173 const Sampler* sampler =
174 is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) 174 is_bindless
175 : GetSampler(instr.sampler, {{texture_type, is_array, false}}); 175 ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false, false}})
176 : GetSampler(instr.sampler, {{texture_type, is_array, false, false}});
176 Node4 values; 177 Node4 values;
177 if (sampler == nullptr) { 178 if (sampler == nullptr) {
178 for (u32 element = 0; element < values.size(); ++element) { 179 std::generate(values.begin(), values.end(), [] { return Immediate(0); });
179 values[element] = Immediate(0);
180 }
181 WriteTexInstructionFloat(bb, instr, values); 180 WriteTexInstructionFloat(bb, instr, values);
182 break; 181 break;
183 } 182 }
@@ -269,7 +268,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
269 "NDV is not implemented"); 268 "NDV is not implemented");
270 269
271 auto texture_type = instr.tmml.texture_type.Value(); 270 auto texture_type = instr.tmml.texture_type.Value();
272 const bool is_array = instr.tmml.array != 0;
273 Node index_var{}; 271 Node index_var{};
274 const Sampler* sampler = 272 const Sampler* sampler =
275 is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); 273 is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
@@ -593,8 +591,9 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
593 ++parameter_register; 591 ++parameter_register;
594 } 592 }
595 593
596 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( 594 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
597 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); 595 lod_bias_enabled, 4, 5);
596 const auto coord_count = std::get<0>(coord_counts);
598 // If enabled arrays index is always stored in the gpr8 field 597 // If enabled arrays index is always stored in the gpr8 field
599 const u64 array_register = instr.gpr8.Value(); 598 const u64 array_register = instr.gpr8.Value();
600 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used 599 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
@@ -632,8 +631,10 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
632 const bool lod_bias_enabled = 631 const bool lod_bias_enabled =
633 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); 632 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
634 633
635 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( 634 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
636 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); 635 lod_bias_enabled, 4, 4);
636 const auto coord_count = std::get<0>(coord_counts);
637
637 // If enabled arrays index is always stored in the gpr8 field 638 // If enabled arrays index is always stored in the gpr8 field
638 const u64 array_register = instr.gpr8.Value(); 639 const u64 array_register = instr.gpr8.Value();
639 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used 640 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index c5ab21f56..79e10ffbb 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -192,6 +192,22 @@ public:
192 index = index_; 192 index = index_;
193 } 193 }
194 194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
195 void MarkAsPicked(bool is_picked_) { 211 void MarkAsPicked(bool is_picked_) {
196 is_picked = is_picked_; 212 is_picked = is_picked_;
197 } 213 }
@@ -303,6 +319,8 @@ private:
303 bool is_target{}; 319 bool is_target{};
304 bool is_registered{}; 320 bool is_registered{};
305 bool is_picked{}; 321 bool is_picked{};
322 bool is_memory_marked{};
323 bool is_sync_pending{};
306 u32 index{NO_RT}; 324 u32 index{NO_RT};
307 u64 modification_tick{}; 325 u64 modification_tick{};
308}; 326};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 69ca08fd1..cf6bd005a 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,6 +6,7 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list>
9#include <memory> 10#include <memory>
10#include <mutex> 11#include <mutex>
11#include <set> 12#include <set>
@@ -62,6 +63,30 @@ public:
62 } 63 }
63 } 64 }
64 65
66 void OnCPUWrite(VAddr addr, std::size_t size) {
67 std::lock_guard lock{mutex};
68
69 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
70 if (surface->IsMemoryMarked()) {
71 UnmarkMemory(surface);
72 surface->SetSyncPending(true);
73 marked_for_unregister.emplace_back(surface);
74 }
75 }
76 }
77
78 void SyncGuestHost() {
79 std::lock_guard lock{mutex};
80
81 for (const auto& surface : marked_for_unregister) {
82 if (surface->IsRegistered()) {
83 surface->SetSyncPending(false);
84 Unregister(surface);
85 }
86 }
87 marked_for_unregister.clear();
88 }
89
65 /** 90 /**
66 * Guarantees that rendertargets don't unregister themselves if the 91 * Guarantees that rendertargets don't unregister themselves if the
67 * collide. Protection is currently only done on 3D slices. 92 * collide. Protection is currently only done on 3D slices.
@@ -85,10 +110,20 @@ public:
85 return a->GetModificationTick() < b->GetModificationTick(); 110 return a->GetModificationTick() < b->GetModificationTick();
86 }); 111 });
87 for (const auto& surface : surfaces) { 112 for (const auto& surface : surfaces) {
113 mutex.unlock();
88 FlushSurface(surface); 114 FlushSurface(surface);
115 mutex.lock();
89 } 116 }
90 } 117 }
91 118
119 bool MustFlushRegion(VAddr addr, std::size_t size) {
120 std::lock_guard lock{mutex};
121
122 const auto surfaces = GetSurfacesInRegion(addr, size);
123 return std::any_of(surfaces.cbegin(), surfaces.cend(),
124 [](const TSurface& surface) { return surface->IsModified(); });
125 }
126
92 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 127 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
93 const VideoCommon::Shader::Sampler& entry) { 128 const VideoCommon::Shader::Sampler& entry) {
94 std::lock_guard lock{mutex}; 129 std::lock_guard lock{mutex};
@@ -206,8 +241,14 @@ public:
206 241
207 auto surface_view = GetSurface(gpu_addr, *cpu_addr, 242 auto surface_view = GetSurface(gpu_addr, *cpu_addr,
208 SurfaceParams::CreateForFramebuffer(system, index), true); 243 SurfaceParams::CreateForFramebuffer(system, index), true);
209 if (render_targets[index].target) 244 if (render_targets[index].target) {
210 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 245 auto& surface = render_targets[index].target;
246 surface->MarkAsRenderTarget(false, NO_RT);
247 const auto& cr_params = surface->GetSurfaceParams();
248 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
249 AsyncFlushSurface(surface);
250 }
251 }
211 render_targets[index].target = surface_view.first; 252 render_targets[index].target = surface_view.first;
212 render_targets[index].view = surface_view.second; 253 render_targets[index].view = surface_view.second;
213 if (render_targets[index].target) 254 if (render_targets[index].target)
@@ -284,6 +325,34 @@ public:
284 return ++ticks; 325 return ++ticks;
285 } 326 }
286 327
328 void CommitAsyncFlushes() {
329 committed_flushes.push_back(uncommitted_flushes);
330 uncommitted_flushes.reset();
331 }
332
333 bool HasUncommittedFlushes() const {
334 return uncommitted_flushes != nullptr;
335 }
336
337 bool ShouldWaitAsyncFlushes() const {
338 return !committed_flushes.empty() && committed_flushes.front() != nullptr;
339 }
340
341 void PopAsyncFlushes() {
342 if (committed_flushes.empty()) {
343 return;
344 }
345 auto& flush_list = committed_flushes.front();
346 if (!flush_list) {
347 committed_flushes.pop_front();
348 return;
349 }
350 for (TSurface& surface : *flush_list) {
351 FlushSurface(surface);
352 }
353 committed_flushes.pop_front();
354 }
355
287protected: 356protected:
288 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 357 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
289 bool is_astc_supported) 358 bool is_astc_supported)
@@ -345,9 +414,20 @@ protected:
345 surface->SetCpuAddr(*cpu_addr); 414 surface->SetCpuAddr(*cpu_addr);
346 RegisterInnerCache(surface); 415 RegisterInnerCache(surface);
347 surface->MarkAsRegistered(true); 416 surface->MarkAsRegistered(true);
417 surface->SetMemoryMarked(true);
348 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); 418 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
349 } 419 }
350 420
421 void UnmarkMemory(TSurface surface) {
422 if (!surface->IsMemoryMarked()) {
423 return;
424 }
425 const std::size_t size = surface->GetSizeInBytes();
426 const VAddr cpu_addr = surface->GetCpuAddr();
427 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
428 surface->SetMemoryMarked(false);
429 }
430
351 void Unregister(TSurface surface) { 431 void Unregister(TSurface surface) {
352 if (guard_render_targets && surface->IsProtected()) { 432 if (guard_render_targets && surface->IsProtected()) {
353 return; 433 return;
@@ -355,9 +435,11 @@ protected:
355 if (!guard_render_targets && surface->IsRenderTarget()) { 435 if (!guard_render_targets && surface->IsRenderTarget()) {
356 ManageRenderTargetUnregister(surface); 436 ManageRenderTargetUnregister(surface);
357 } 437 }
358 const std::size_t size = surface->GetSizeInBytes(); 438 UnmarkMemory(surface);
359 const VAddr cpu_addr = surface->GetCpuAddr(); 439 if (surface->IsSyncPending()) {
360 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 440 marked_for_unregister.remove(surface);
441 surface->SetSyncPending(false);
442 }
361 UnregisterInnerCache(surface); 443 UnregisterInnerCache(surface);
362 surface->MarkAsRegistered(false); 444 surface->MarkAsRegistered(false);
363 ReserveSurface(surface->GetSurfaceParams(), surface); 445 ReserveSurface(surface->GetSurfaceParams(), surface);
@@ -417,7 +499,7 @@ private:
417 **/ 499 **/
418 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 500 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
419 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 501 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
420 if (Settings::values.use_accurate_gpu_emulation) { 502 if (Settings::IsGPULevelExtreme()) {
421 return RecycleStrategy::Flush; 503 return RecycleStrategy::Flush;
422 } 504 }
423 // 3D Textures decision 505 // 3D Textures decision
@@ -461,7 +543,7 @@ private:
461 } 543 }
462 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { 544 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
463 case RecycleStrategy::Ignore: { 545 case RecycleStrategy::Ignore: {
464 return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); 546 return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
465 } 547 }
466 case RecycleStrategy::Flush: { 548 case RecycleStrategy::Flush: {
467 std::sort(overlaps.begin(), overlaps.end(), 549 std::sort(overlaps.begin(), overlaps.end(),
@@ -509,7 +591,7 @@ private:
509 } 591 }
510 const auto& final_params = new_surface->GetSurfaceParams(); 592 const auto& final_params = new_surface->GetSurfaceParams();
511 if (cr_params.type != final_params.type) { 593 if (cr_params.type != final_params.type) {
512 if (Settings::values.use_accurate_gpu_emulation) { 594 if (Settings::IsGPULevelExtreme()) {
513 BufferCopy(current_surface, new_surface); 595 BufferCopy(current_surface, new_surface);
514 } 596 }
515 } else { 597 } else {
@@ -598,7 +680,7 @@ private:
598 if (passed_tests == 0) { 680 if (passed_tests == 0) {
599 return {}; 681 return {};
600 // In Accurate GPU all tests should pass, else we recycle 682 // In Accurate GPU all tests should pass, else we recycle
601 } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { 683 } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
602 return {}; 684 return {};
603 } 685 }
604 for (const auto& surface : overlaps) { 686 for (const auto& surface : overlaps) {
@@ -668,7 +750,7 @@ private:
668 for (const auto& surface : overlaps) { 750 for (const auto& surface : overlaps) {
669 if (!surface->MatchTarget(params.target)) { 751 if (!surface->MatchTarget(params.target)) {
670 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { 752 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
671 if (Settings::values.use_accurate_gpu_emulation) { 753 if (Settings::IsGPULevelExtreme()) {
672 return std::nullopt; 754 return std::nullopt;
673 } 755 }
674 Unregister(surface); 756 Unregister(surface);
@@ -1106,6 +1188,13 @@ private:
1106 TView view; 1188 TView view;
1107 }; 1189 };
1108 1190
1191 void AsyncFlushSurface(TSurface& surface) {
1192 if (!uncommitted_flushes) {
1193 uncommitted_flushes = std::make_shared<std::list<TSurface>>();
1194 }
1195 uncommitted_flushes->push_back(surface);
1196 }
1197
1109 VideoCore::RasterizerInterface& rasterizer; 1198 VideoCore::RasterizerInterface& rasterizer;
1110 1199
1111 FormatLookupTable format_lookup_table; 1200 FormatLookupTable format_lookup_table;
@@ -1150,6 +1239,11 @@ private:
1150 std::unordered_map<u32, TSurface> invalid_cache; 1239 std::unordered_map<u32, TSurface> invalid_cache;
1151 std::vector<u8> invalid_memory; 1240 std::vector<u8> invalid_memory;
1152 1241
1242 std::list<TSurface> marked_for_unregister;
1243
1244 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
1245 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
1246
1153 StagingCache staging_cache; 1247 StagingCache staging_cache;
1154 std::recursive_mutex mutex; 1248 std::recursive_mutex mutex;
1155}; 1249};
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7df5f1452..fae8638ec 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -11,6 +11,7 @@
11#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
12 12
13namespace Tegra::Texture { 13namespace Tegra::Texture {
14namespace {
14 15
15/** 16/**
16 * This table represents the internal swizzle of a gob, 17 * This table represents the internal swizzle of a gob,
@@ -174,6 +175,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
174 } 175 }
175} 176}
176 177
178} // Anonymous namespace
179
177void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 180void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
178 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 181 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
179 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { 182 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index e5eac3f3b..9f2d6d308 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -56,8 +56,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
56 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, 56 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
57 u32 offset_x, u32 offset_y); 57 u32 offset_x, u32 offset_y);
58 58
59void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, 59void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
60 const u32 block_height, const std::size_t copy_size, const u8* source_data, 60 std::size_t copy_size, const u8* source_data, u8* swizzle_data);
61 u8* swizzle_data);
62 61
63} // namespace Tegra::Texture 62} // namespace Tegra::Texture
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 6aff38735..4bc8ee726 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -17,6 +17,7 @@
17#include "yuzu/applets/profile_select.h" 17#include "yuzu/applets/profile_select.h"
18#include "yuzu/main.h" 18#include "yuzu/main.h"
19 19
20namespace {
20QString FormatUserEntryText(const QString& username, Common::UUID uuid) { 21QString FormatUserEntryText(const QString& username, Common::UUID uuid) {
21 return QtProfileSelectionDialog::tr( 22 return QtProfileSelectionDialog::tr(
22 "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. " 23 "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. "
@@ -41,6 +42,7 @@ QPixmap GetIcon(Common::UUID uuid) {
41 42
42 return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); 43 return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
43} 44}
45} // Anonymous namespace
44 46
45QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent) 47QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
46 : QDialog(parent), profile_manager(std::make_unique<Service::Account::ProfileManager>()) { 48 : QDialog(parent), profile_manager(std::make_unique<Service::Account::ProfileManager>()) {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 3b9ab38dd..196a3a116 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -532,6 +532,8 @@ void Config::ReadDebuggingValues() {
532 Settings::values.reporting_services = 532 Settings::values.reporting_services =
533 ReadSetting(QStringLiteral("reporting_services"), false).toBool(); 533 ReadSetting(QStringLiteral("reporting_services"), false).toBool();
534 Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); 534 Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
535 Settings::values.disable_cpu_opt =
536 ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool();
535 537
536 qt_config->endGroup(); 538 qt_config->endGroup();
537} 539}
@@ -637,8 +639,8 @@ void Config::ReadRendererValues() {
637 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 639 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
638 Settings::values.use_disk_shader_cache = 640 Settings::values.use_disk_shader_cache =
639 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); 641 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
640 Settings::values.use_accurate_gpu_emulation = 642 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
641 ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); 643 Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
642 Settings::values.use_asynchronous_gpu_emulation = 644 Settings::values.use_asynchronous_gpu_emulation =
643 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); 645 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
644 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); 646 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
@@ -1001,6 +1003,7 @@ void Config::SaveDebuggingValues() {
1001 WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); 1003 WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false);
1002 WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); 1004 WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
1003 WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); 1005 WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
1006 WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false);
1004 1007
1005 qt_config->endGroup(); 1008 qt_config->endGroup();
1006} 1009}
@@ -1077,8 +1080,8 @@ void Config::SaveRendererValues() {
1077 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); 1080 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
1078 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, 1081 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
1079 true); 1082 true);
1080 WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), 1083 WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy),
1081 Settings::values.use_accurate_gpu_emulation, false); 1084 0);
1082 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), 1085 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
1083 Settings::values.use_asynchronous_gpu_emulation, false); 1086 Settings::values.use_asynchronous_gpu_emulation, false);
1084 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); 1087 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 9631059c7..c2026763e 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -36,6 +36,7 @@ void ConfigureDebug::SetConfiguration() {
36 ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); 36 ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args));
37 ui->reporting_services->setChecked(Settings::values.reporting_services); 37 ui->reporting_services->setChecked(Settings::values.reporting_services);
38 ui->quest_flag->setChecked(Settings::values.quest_flag); 38 ui->quest_flag->setChecked(Settings::values.quest_flag);
39 ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt);
39 ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); 40 ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn());
40 ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); 41 ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug);
41} 42}
@@ -48,6 +49,7 @@ void ConfigureDebug::ApplyConfiguration() {
48 Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); 49 Settings::values.program_args = ui->homebrew_args_edit->text().toStdString();
49 Settings::values.reporting_services = ui->reporting_services->isChecked(); 50 Settings::values.reporting_services = ui->reporting_services->isChecked();
50 Settings::values.quest_flag = ui->quest_flag->isChecked(); 51 Settings::values.quest_flag = ui->quest_flag->isChecked();
52 Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked();
51 Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); 53 Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
52 Debugger::ToggleConsole(); 54 Debugger::ToggleConsole();
53 Log::Filter filter; 55 Log::Filter filter;
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index e028c4c80..e0d4c4a44 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -215,6 +215,13 @@
215 </property> 215 </property>
216 </widget> 216 </widget>
217 </item> 217 </item>
218 <item>
219 <widget class="QCheckBox" name="disable_cpu_opt">
220 <property name="text">
221 <string>Disable CPU JIT optimizations</string>
222 </property>
223 </widget>
224 </item>
218 </layout> 225 </layout>
219 </widget> 226 </widget>
220 </item> 227 </item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index b9f429f84..0a3f47339 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
19 19
20void ConfigureGraphicsAdvanced::SetConfiguration() { 20void ConfigureGraphicsAdvanced::SetConfiguration() {
21 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); 21 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
22 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 22 ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
23 ui->use_vsync->setEnabled(runtime_lock); 23 ui->use_vsync->setEnabled(runtime_lock);
24 ui->use_vsync->setChecked(Settings::values.use_vsync); 24 ui->use_vsync->setChecked(Settings::values.use_vsync);
25 ui->force_30fps_mode->setEnabled(runtime_lock); 25 ui->force_30fps_mode->setEnabled(runtime_lock);
@@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
29} 29}
30 30
31void ConfigureGraphicsAdvanced::ApplyConfiguration() { 31void ConfigureGraphicsAdvanced::ApplyConfiguration() {
32 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 32 auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
33 Settings::values.gpu_accuracy = gpu_accuracy;
33 Settings::values.use_vsync = ui->use_vsync->isChecked(); 34 Settings::values.use_vsync = ui->use_vsync->isChecked();
34 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); 35 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
35 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); 36 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 42eec278e..0c7b383e0 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -23,11 +23,34 @@
23 </property> 23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_3"> 24 <layout class="QVBoxLayout" name="verticalLayout_3">
25 <item> 25 <item>
26 <widget class="QCheckBox" name="use_accurate_gpu_emulation"> 26 <layout class="QHBoxLayout" name="horizontalLayout_2">
27 <property name="text"> 27 <item>
28 <string>Use accurate GPU emulation (slow)</string> 28 <widget class="QLabel" name="label_gpu_accuracy">
29 </property> 29 <property name="text">
30 </widget> 30 <string>Accuracy Level:</string>
31 </property>
32 </widget>
33 </item>
34 <item>
35 <widget class="QComboBox" name="gpu_accuracy">
36 <item>
37 <property name="text">
38 <string notr="true">Normal</string>
39 </property>
40 </item>
41 <item>
42 <property name="text">
43 <string notr="true">High</string>
44 </property>
45 </item>
46 <item>
47 <property name="text">
48 <string notr="true">Extreme(very slow)</string>
49 </property>
50 </item>
51 </widget>
52 </item>
53 </layout>
31 </item> 54 </item>
32 <item> 55 <item>
33 <widget class="QCheckBox" name="use_vsync"> 56 <widget class="QCheckBox" name="use_vsync">
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f4cd905c9..d1ac354bf 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -388,8 +388,8 @@ void Config::ReadValues() {
388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
389 Settings::values.use_disk_shader_cache = 389 Settings::values.use_disk_shader_cache =
390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
391 Settings::values.use_accurate_gpu_emulation = 391 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
392 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 392 Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
393 Settings::values.use_asynchronous_gpu_emulation = 393 Settings::values.use_asynchronous_gpu_emulation =
394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
395 Settings::values.use_vsync = 395 Settings::values.use_vsync =
@@ -425,6 +425,8 @@ void Config::ReadValues() {
425 Settings::values.reporting_services = 425 Settings::values.reporting_services =
426 sdl2_config->GetBoolean("Debugging", "reporting_services", false); 426 sdl2_config->GetBoolean("Debugging", "reporting_services", false);
427 Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); 427 Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false);
428 Settings::values.disable_cpu_opt =
429 sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false);
428 430
429 const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); 431 const auto title_list = sdl2_config->Get("AddOns", "title_ids", "");
430 std::stringstream ss(title_list); 432 std::stringstream ss(title_list);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index d63d7a58e..60b1a62fa 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -146,9 +146,9 @@ frame_limit =
146# 0 (default): Off, 1 : On 146# 0 (default): Off, 1 : On
147use_disk_shader_cache = 147use_disk_shader_cache =
148 148
149# Whether to use accurate GPU emulation 149# Which gpu accuracy level to use
150# 0 (default): Off (fast), 1 : On (slow) 150# 0 (Normal), 1 (High), 2 (Extreme)
151use_accurate_gpu_emulation = 151gpu_accuracy =
152 152
153# Whether to use asynchronous GPU emulation 153# Whether to use asynchronous GPU emulation
154# 0 : Off (slow), 1 (default): On (fast) 154# 0 : Off (slow), 1 (default): On (fast)
@@ -280,6 +280,9 @@ dump_nso=false
280# Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode 280# Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode
281# false: Retail/Normal Mode (default), true: Kiosk Mode 281# false: Retail/Normal Mode (default), true: Kiosk Mode
282quest_flag = 282quest_flag =
283# Determines whether or not JIT CPU optimizations are enabled
284# false: Optimizations Enabled, true: Optimizations Disabled
285disable_cpu_opt =
283 286
284[WebService] 287[WebService]
285# Whether or not to enable telemetry 288# Whether or not to enable telemetry
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index f2990910e..cb8e68a39 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -29,6 +29,7 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
29 SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); 29 SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
30 30
31 SDL_SysWMinfo wm; 31 SDL_SysWMinfo wm;
32 SDL_VERSION(&wm.version);
32 if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { 33 if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) {
33 LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); 34 LOG_CRITICAL(Frontend, "Failed to get information from the window manager");
34 std::exit(EXIT_FAILURE); 35 std::exit(EXIT_FAILURE);
@@ -70,7 +71,7 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
70EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; 71EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default;
71 72
72std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { 73std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const {
73 return nullptr; 74 return std::make_unique<DummyContext>();
74} 75}
75 76
76void EmuWindow_SDL2_VK::Present() { 77void EmuWindow_SDL2_VK::Present() {
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
index b8021ebea..77a6ca72b 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
@@ -22,3 +22,5 @@ public:
22 22
23 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 23 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
24}; 24};
25
26class DummyContext : public Core::Frontend::GraphicsContext {};
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index ee2591c8f..c0325cc3c 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -126,8 +126,8 @@ void Config::ReadValues() {
126 Settings::values.frame_limit = 100; 126 Settings::values.frame_limit = 100;
127 Settings::values.use_disk_shader_cache = 127 Settings::values.use_disk_shader_cache =
128 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 128 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
129 Settings::values.use_accurate_gpu_emulation = 129 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
130 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 130 Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
131 Settings::values.use_asynchronous_gpu_emulation = 131 Settings::values.use_asynchronous_gpu_emulation =
132 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 132 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
133 133