summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/audio_core/audio_renderer.cpp19
-rw-r--r--src/audio_core/audio_renderer.h13
-rw-r--r--src/audio_core/stream.cpp25
-rw-r--r--src/audio_core/stream.h5
-rw-r--r--src/common/CMakeLists.txt15
-rw-r--r--src/common/atomic_ops.cpp70
-rw-r--r--src/common/atomic_ops.h17
-rw-r--r--src/common/fiber.cpp222
-rw-r--r--src/common/fiber.h92
-rw-r--r--src/common/spin_lock.cpp54
-rw-r--r--src/common/spin_lock.h26
-rw-r--r--src/common/telemetry.cpp1
-rw-r--r--src/common/thread.cpp52
-rw-r--r--src/common/thread.h13
-rw-r--r--src/common/uint128.cpp26
-rw-r--r--src/common/uint128.h3
-rw-r--r--src/common/wall_clock.cpp91
-rw-r--r--src/common/wall_clock.h53
-rw-r--r--src/common/x64/cpu_detect.cpp38
-rw-r--r--src/common/x64/cpu_detect.h13
-rw-r--r--src/common/x64/native_clock.cpp103
-rw-r--r--src/common/x64/native_clock.h48
-rw-r--r--src/common/x64/xbyak_abi.h95
-rw-r--r--src/core/CMakeLists.txt16
-rw-r--r--src/core/arm/arm_interface.cpp57
-rw-r--r--src/core/arm/arm_interface.h20
-rw-r--r--src/core/arm/cpu_interrupt_handler.cpp29
-rw-r--r--src/core/arm/cpu_interrupt_handler.h39
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp103
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h12
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp110
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.h26
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp81
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h126
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.cpp76
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.h48
-rw-r--r--src/core/arm/exclusive_monitor.cpp2
-rw-r--r--src/core/arm/exclusive_monitor.h6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp19
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h5
-rw-r--r--src/core/core.cpp128
-rw-r--r--src/core/core.h48
-rw-r--r--src/core/core_manager.cpp67
-rw-r--r--src/core/core_manager.h63
-rw-r--r--src/core/core_timing.cpp256
-rw-r--r--src/core/core_timing.h123
-rw-r--r--src/core/core_timing_util.cpp44
-rw-r--r--src/core/core_timing_util.h18
-rw-r--r--src/core/cpu_manager.cpp368
-rw-r--r--src/core/cpu_manager.h80
-rw-r--r--src/core/file_sys/system_archive/mii_model.cpp2
-rw-r--r--src/core/file_sys/system_archive/shared_font.cpp2
-rw-r--r--src/core/gdbstub/gdbstub.cpp1
-rw-r--r--src/core/hardware_properties.h4
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp212
-rw-r--r--src/core/hle/kernel/address_arbiter.h3
-rw-r--r--src/core/hle/kernel/client_port.cpp2
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp87
-rw-r--r--src/core/hle/kernel/kernel.cpp255
-rw-r--r--src/core/hle/kernel/kernel.h39
-rw-r--r--src/core/hle/kernel/memory/memory_manager.cpp5
-rw-r--r--src/core/hle/kernel/mutex.cpp118
-rw-r--r--src/core/hle/kernel/mutex.h4
-rw-r--r--src/core/hle/kernel/physical_core.cpp52
-rw-r--r--src/core/hle/kernel/physical_core.h44
-rw-r--r--src/core/hle/kernel/process.cpp29
-rw-r--r--src/core/hle/kernel/readable_event.cpp3
-rw-r--r--src/core/hle/kernel/resource_limit.cpp6
-rw-r--r--src/core/hle/kernel/scheduler.cpp576
-rw-r--r--src/core/hle/kernel/scheduler.h123
-rw-r--r--src/core/hle/kernel/server_session.cpp16
-rw-r--r--src/core/hle/kernel/svc.cpp464
-rw-r--r--src/core/hle/kernel/svc_wrap.h137
-rw-r--r--src/core/hle/kernel/synchronization.cpp137
-rw-r--r--src/core/hle/kernel/synchronization_object.cpp64
-rw-r--r--src/core/hle/kernel/synchronization_object.h18
-rw-r--r--src/core/hle/kernel/thread.cpp424
-rw-r--r--src/core/hle/kernel/thread.h277
-rw-r--r--src/core/hle/kernel/time_manager.cpp23
-rw-r--r--src/core/hle/kernel/time_manager.h4
-rw-r--r--src/core/hle/service/acc/acc.cpp341
-rw-r--r--src/core/hle/service/acc/acc_aa.cpp4
-rw-r--r--src/core/hle/service/acc/acc_su.cpp34
-rw-r--r--src/core/hle/service/acc/acc_u0.cpp18
-rw-r--r--src/core/hle/service/acc/acc_u1.cpp29
-rw-r--r--src/core/hle/service/am/am.cpp18
-rw-r--r--src/core/hle/service/am/am.h2
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp6
-rw-r--r--src/core/hle/service/am/spsm.cpp16
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp1
-rw-r--r--src/core/hle/service/bcat/bcat.cpp2
-rw-r--r--src/core/hle/service/bcat/module.cpp3
-rw-r--r--src/core/hle/service/bpc/bpc.cpp20
-rw-r--r--src/core/hle/service/btdrv/btdrv.cpp167
-rw-r--r--src/core/hle/service/btm/btm.cpp147
-rw-r--r--src/core/hle/service/caps/caps.cpp2
-rw-r--r--src/core/hle/service/caps/caps.h76
-rw-r--r--src/core/hle/service/caps/caps_a.cpp2
-rw-r--r--src/core/hle/service/caps/caps_a.h2
-rw-r--r--src/core/hle/service/caps/caps_c.cpp2
-rw-r--r--src/core/hle/service/caps/caps_c.h2
-rw-r--r--src/core/hle/service/caps/caps_sc.cpp2
-rw-r--r--src/core/hle/service/caps/caps_sc.h2
-rw-r--r--src/core/hle/service/caps/caps_ss.cpp2
-rw-r--r--src/core/hle/service/caps/caps_ss.h2
-rw-r--r--src/core/hle/service/caps/caps_su.cpp2
-rw-r--r--src/core/hle/service/caps/caps_su.h2
-rw-r--r--src/core/hle/service/caps/caps_u.cpp26
-rw-r--r--src/core/hle/service/caps/caps_u.h2
-rw-r--r--src/core/hle/service/es/es.cpp47
-rw-r--r--src/core/hle/service/eupld/eupld.cpp1
-rw-r--r--src/core/hle/service/friend/friend.cpp6
-rw-r--r--src/core/hle/service/grc/grc.cpp3
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/gesture.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/mouse.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp10
-rw-r--r--src/core/hle/service/hid/controllers/npad.h10
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.cpp4
-rw-r--r--src/core/hle/service/hid/controllers/xpad.cpp2
-rw-r--r--src/core/hle/service/hid/hid.cpp151
-rw-r--r--src/core/hle/service/hid/hid.h15
-rw-r--r--src/core/hle/service/hid/irs.cpp2
-rw-r--r--src/core/hle/service/lbl/lbl.cpp1
-rw-r--r--src/core/hle/service/ldn/ldn.cpp1
-rw-r--r--src/core/hle/service/ldr/ldr.cpp105
-rw-r--r--src/core/hle/service/lm/manager.cpp3
-rw-r--r--src/core/hle/service/mig/mig.cpp6
-rw-r--r--src/core/hle/service/mm/mm_u.cpp32
-rw-r--r--src/core/hle/service/ncm/ncm.cpp20
-rw-r--r--src/core/hle/service/nfc/nfc.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp28
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h18
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp64
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h21
-rw-r--r--src/core/hle/service/sm/sm.cpp2
-rw-r--r--src/core/hle/service/time/standard_steady_clock_core.cpp5
-rw-r--r--src/core/hle/service/time/tick_based_steady_clock_core.cpp5
-rw-r--r--src/core/hle/service/time/time.cpp5
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp3
-rw-r--r--src/core/hle/service/vi/vi.cpp2
-rw-r--r--src/core/memory.cpp109
-rw-r--r--src/core/memory.h67
-rw-r--r--src/core/memory/cheat_engine.cpp8
-rw-r--r--src/core/perf_stats.cpp2
-rw-r--r--src/core/settings.cpp7
-rw-r--r--src/core/settings.h5
-rw-r--r--src/core/tools/freezer.cpp8
-rw-r--r--src/input_common/keyboard.cpp2
-rw-r--r--src/input_common/motion_emu.cpp2
-rw-r--r--src/tests/CMakeLists.txt1
-rw-r--r--src/tests/common/fibers.cpp358
-rw-r--r--src/tests/core/core_timing.cpp182
-rw-r--r--src/video_core/CMakeLists.txt9
-rw-r--r--src/video_core/buffer_cache/buffer_block.h27
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h232
-rw-r--r--src/video_core/compatible_formats.cpp162
-rw-r--r--src/video_core/compatible_formats.h32
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp7
-rw-r--r--src/video_core/engines/maxwell_3d.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_synch.cpp8
-rw-r--r--src/video_core/gpu_synch.h2
-rw-r--r--src/video_core/gpu_thread.cpp7
-rw-r--r--src/video_core/macro/macro.cpp60
-rw-r--r--src/video_core/macro/macro.h19
-rw-r--r--src/video_core/macro/macro_hle.cpp113
-rw-r--r--src/video_core/macro/macro_hle.h44
-rw-r--r--src/video_core/macro/macro_interpreter.cpp3
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp137
-rw-r--r--src/video_core/macro/macro_jit_x64.h10
-rw-r--r--src/video_core/memory_manager.cpp40
-rw-r--r--src/video_core/memory_manager.h12
-rw-r--r--src/video_core/query_cache.h10
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h253
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2073
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h49
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp34
-rw-r--r--src/video_core/renderer_opengl/gl_device.h15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp211
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h52
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h25
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp53
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h114
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp17
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp176
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp32
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp97
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp72
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp122
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp19
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h6
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/shader/decode/image.cpp26
-rw-r--r--src/video_core/shader/decode/texture.cpp55
-rw-r--r--src/video_core/shader/memory_util.cpp4
-rw-r--r--src/video_core/shader/node.h75
-rw-r--r--src/video_core/shader/node_helper.h2
-rw-r--r--src/video_core/shader/registry.cpp20
-rw-r--r--src/video_core/shader/registry.h35
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp78
-rw-r--r--src/video_core/shader_cache.h228
-rw-r--r--src/video_core/texture_cache/surface_base.cpp10
-rw-r--r--src/video_core/texture_cache/surface_base.h13
-rw-r--r--src/video_core/texture_cache/surface_params.cpp19
-rw-r--r--src/video_core/texture_cache/texture_cache.h144
-rw-r--r--src/yuzu/CMakeLists.txt4
-rw-r--r--src/yuzu/bootmanager.cpp71
-rw-r--r--src/yuzu/bootmanager.h8
-rw-r--r--src/yuzu/configuration/config.cpp13
-rw-r--r--src/yuzu/configuration/config.h2
-rw-r--r--src/yuzu/configuration/configure_general.cpp6
-rw-r--r--src/yuzu/configuration/configure_general.ui7
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp45
-rw-r--r--src/yuzu/configuration/configure_graphics.ui40
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp3
-rw-r--r--src/yuzu/configuration/configure_service.cpp6
-rw-r--r--src/yuzu/debugger/wait_tree.cpp54
-rw-r--r--src/yuzu/main.cpp93
-rw-r--r--src/yuzu/main.h5
-rw-r--r--src/yuzu/main.ui18
-rw-r--r--src/yuzu/yuzu.rc2
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h5
-rw-r--r--src/yuzu_cmd/yuzu.cpp5
-rw-r--r--src/yuzu_cmd/yuzu.rc2
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h5
-rw-r--r--src/yuzu_tester/service/yuzutest.cpp2
-rw-r--r--src/yuzu_tester/yuzu.cpp5
-rw-r--r--src/yuzu_tester/yuzu.rc2
264 files changed, 10795 insertions, 3987 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3a57356ab..1e977e8a8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -62,6 +62,10 @@ else()
62 -Wno-unused-parameter 62 -Wno-unused-parameter
63 ) 63 )
64 64
65 if (ARCHITECTURE_x86_64)
66 add_compile_options("-mcx16")
67 endif()
68
65 if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) 69 if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
66 add_compile_options("-stdlib=libc++") 70 add_compile_options("-stdlib=libc++")
67 endif() 71 endif()
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 50846a854..d64452617 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -180,11 +180,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
180 180
181 // Copy output header 181 // Copy output header
182 UpdateDataHeader response_data{worker_params}; 182 UpdateDataHeader response_data{worker_params};
183 std::vector<u8> output_params(response_data.total_size);
184 if (behavior_info.IsElapsedFrameCountSupported()) { 183 if (behavior_info.IsElapsedFrameCountSupported()) {
185 response_data.frame_count = 0x10; 184 response_data.render_info = sizeof(RendererInfo);
186 response_data.total_size += 0x10; 185 response_data.total_size += sizeof(RendererInfo);
187 } 186 }
187
188 std::vector<u8> output_params(response_data.total_size);
188 std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader)); 189 std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));
189 190
190 // Copy output memory pool entries 191 // Copy output memory pool entries
@@ -219,6 +220,17 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
219 return Audren::ERR_INVALID_PARAMETERS; 220 return Audren::ERR_INVALID_PARAMETERS;
220 } 221 }
221 222
223 if (behavior_info.IsElapsedFrameCountSupported()) {
224 const std::size_t renderer_info_offset{
225 sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
226 response_data.effects_size + response_data.sinks_size +
227 response_data.performance_manager_size + response_data.behavior_size};
228 RendererInfo renderer_info{};
229 renderer_info.elasped_frame_count = elapsed_frame_count;
230 std::memcpy(output_params.data() + renderer_info_offset, &renderer_info,
231 sizeof(RendererInfo));
232 }
233
222 return MakeResult(output_params); 234 return MakeResult(output_params);
223} 235}
224 236
@@ -447,6 +459,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
447 } 459 }
448 } 460 }
449 audio_out->QueueBuffer(stream, tag, std::move(buffer)); 461 audio_out->QueueBuffer(stream, tag, std::move(buffer));
462 elapsed_frame_count++;
450} 463}
451 464
452void AudioRenderer::ReleaseAndQueueBuffers() { 465void AudioRenderer::ReleaseAndQueueBuffers() {
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 1f9114c07..f0b691a86 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -196,6 +196,12 @@ struct EffectOutStatus {
196}; 196};
197static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size"); 197static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");
198 198
199struct RendererInfo {
200 u64_le elasped_frame_count{};
201 INSERT_PADDING_WORDS(2);
202};
203static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size");
204
199struct UpdateDataHeader { 205struct UpdateDataHeader {
200 UpdateDataHeader() {} 206 UpdateDataHeader() {}
201 207
@@ -209,7 +215,7 @@ struct UpdateDataHeader {
209 mixes_size = 0x0; 215 mixes_size = 0x0;
210 sinks_size = config.sink_count * 0x20; 216 sinks_size = config.sink_count * 0x20;
211 performance_manager_size = 0x10; 217 performance_manager_size = 0x10;
212 frame_count = 0; 218 render_info = 0;
213 total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size + 219 total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
214 effects_size + sinks_size + performance_manager_size; 220 effects_size + sinks_size + performance_manager_size;
215 } 221 }
@@ -223,8 +229,8 @@ struct UpdateDataHeader {
223 u32_le mixes_size{}; 229 u32_le mixes_size{};
224 u32_le sinks_size{}; 230 u32_le sinks_size{};
225 u32_le performance_manager_size{}; 231 u32_le performance_manager_size{};
226 INSERT_PADDING_WORDS(1); 232 u32_le splitter_size{};
227 u32_le frame_count{}; 233 u32_le render_info{};
228 INSERT_PADDING_WORDS(4); 234 INSERT_PADDING_WORDS(4);
229 u32_le total_size{}; 235 u32_le total_size{};
230}; 236};
@@ -258,6 +264,7 @@ private:
258 std::unique_ptr<AudioOut> audio_out; 264 std::unique_ptr<AudioOut> audio_out;
259 StreamPtr stream; 265 StreamPtr stream;
260 Core::Memory::Memory& memory; 266 Core::Memory::Memory& memory;
267 std::size_t elapsed_frame_count{};
261}; 268};
262 269
263} // namespace AudioCore 270} // namespace AudioCore
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ca98f8ea..dfc4805d9 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -59,15 +59,24 @@ Stream::State Stream::GetState() const {
59 return state; 59 return state;
60} 60}
61 61
62s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { 62s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const {
63 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; 63 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
64 const auto us = 64 const auto ns =
65 std::chrono::microseconds((static_cast<u64>(num_samples) * 1000000) / sample_rate); 65 std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate);
66 return Core::Timing::usToCycles(us); 66 return ns.count();
67}
68
69s64 Stream::GetBufferReleaseNSHostTiming(const Buffer& buffer) const {
70 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
71 /// DSP signals before playing the last sample, in HLE we emulate this in this way
72 s64 base_samples = std::max<s64>(static_cast<s64>(num_samples) - 1, 0);
73 const auto ns =
74 std::chrono::nanoseconds((static_cast<u64>(base_samples) * 1000000000ULL) / sample_rate);
75 return ns.count();
67} 76}
68 77
69static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { 78static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) {
70 const float volume{std::clamp(Settings::values.volume - (1.0f - game_volume), 0.0f, 1.0f)}; 79 const float volume{std::clamp(Settings::Volume() - (1.0f - game_volume), 0.0f, 1.0f)};
71 80
72 if (volume == 1.0f) { 81 if (volume == 1.0f) {
73 return; 82 return;
@@ -105,7 +114,11 @@ void Stream::PlayNextBuffer() {
105 114
106 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 115 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
107 116
108 core_timing.ScheduleEvent(GetBufferReleaseCycles(*active_buffer), release_event, {}); 117 if (core_timing.IsHostTiming()) {
118 core_timing.ScheduleEvent(GetBufferReleaseNSHostTiming(*active_buffer), release_event, {});
119 } else {
120 core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer), release_event, {});
121 }
109} 122}
110 123
111void Stream::ReleaseActiveBuffer() { 124void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 1708a4d98..e309d60fe 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -96,7 +96,10 @@ private:
96 void ReleaseActiveBuffer(); 96 void ReleaseActiveBuffer();
97 97
98 /// Gets the number of core cycles when the specified buffer will be released 98 /// Gets the number of core cycles when the specified buffer will be released
99 s64 GetBufferReleaseCycles(const Buffer& buffer) const; 99 s64 GetBufferReleaseNS(const Buffer& buffer) const;
100
101 /// Gets the number of core cycles when the specified buffer will be released
102 s64 GetBufferReleaseNSHostTiming(const Buffer& buffer) const;
100 103
101 u32 sample_rate; ///< Sample rate of the stream 104 u32 sample_rate; ///< Sample rate of the stream
102 Format format; ///< Format of the stream 105 Format format; ///< Format of the stream
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c..d120c8d3d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
32 DEPENDS 32 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well 34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
35 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
@@ -96,6 +98,8 @@ add_library(common STATIC
96 algorithm.h 98 algorithm.h
97 alignment.h 99 alignment.h
98 assert.h 100 assert.h
101 atomic_ops.cpp
102 atomic_ops.h
99 detached_tasks.cpp 103 detached_tasks.cpp
100 detached_tasks.h 104 detached_tasks.h
101 bit_field.h 105 bit_field.h
@@ -108,6 +112,8 @@ add_library(common STATIC
108 common_types.h 112 common_types.h
109 dynamic_library.cpp 113 dynamic_library.cpp
110 dynamic_library.h 114 dynamic_library.h
115 fiber.cpp
116 fiber.h
111 file_util.cpp 117 file_util.cpp
112 file_util.h 118 file_util.h
113 hash.h 119 hash.h
@@ -141,6 +147,8 @@ add_library(common STATIC
141 scm_rev.cpp 147 scm_rev.cpp
142 scm_rev.h 148 scm_rev.h
143 scope_exit.h 149 scope_exit.h
150 spin_lock.cpp
151 spin_lock.h
144 string_util.cpp 152 string_util.cpp
145 string_util.h 153 string_util.h
146 swap.h 154 swap.h
@@ -161,6 +169,8 @@ add_library(common STATIC
161 vector_math.h 169 vector_math.h
162 virtual_buffer.cpp 170 virtual_buffer.cpp
163 virtual_buffer.h 171 virtual_buffer.h
172 wall_clock.cpp
173 wall_clock.h
164 web_result.h 174 web_result.h
165 zstd_compression.cpp 175 zstd_compression.cpp
166 zstd_compression.h 176 zstd_compression.h
@@ -171,12 +181,15 @@ if(ARCHITECTURE_x86_64)
171 PRIVATE 181 PRIVATE
172 x64/cpu_detect.cpp 182 x64/cpu_detect.cpp
173 x64/cpu_detect.h 183 x64/cpu_detect.h
184 x64/native_clock.cpp
185 x64/native_clock.h
174 x64/xbyak_abi.h 186 x64/xbyak_abi.h
175 x64/xbyak_util.h 187 x64/xbyak_util.h
176 ) 188 )
177endif() 189endif()
178 190
179create_target_directory_groups(common) 191create_target_directory_groups(common)
192find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
180 193
181target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) 194target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
182target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) 195target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp
new file mode 100644
index 000000000..1098e21ff
--- /dev/null
+++ b/src/common/atomic_ops.cpp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include "common/atomic_ops.h"
8
9#if _MSC_VER
10#include <intrin.h>
11#endif
12
13namespace Common {
14
15#if _MSC_VER
16
17bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
18 u8 result = _InterlockedCompareExchange8((char*)pointer, value, expected);
19 return result == expected;
20}
21
22bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
23 u16 result = _InterlockedCompareExchange16((short*)pointer, value, expected);
24 return result == expected;
25}
26
27bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
28 u32 result = _InterlockedCompareExchange((long*)pointer, value, expected);
29 return result == expected;
30}
31
32bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
33 u64 result = _InterlockedCompareExchange64((__int64*)pointer, value, expected);
34 return result == expected;
35}
36
37bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
38 return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0],
39 (__int64*)expected.data()) != 0;
40}
41
42#else
43
44bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
45 return __sync_bool_compare_and_swap(pointer, expected, value);
46}
47
48bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
49 return __sync_bool_compare_and_swap(pointer, expected, value);
50}
51
52bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
53 return __sync_bool_compare_and_swap(pointer, expected, value);
54}
55
56bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
57 return __sync_bool_compare_and_swap(pointer, expected, value);
58}
59
60bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
61 unsigned __int128 value_a;
62 unsigned __int128 expected_a;
63 std::memcpy(&value_a, value.data(), sizeof(u128));
64 std::memcpy(&expected_a, expected.data(), sizeof(u128));
65 return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
66}
67
68#endif
69
70} // namespace Common
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
new file mode 100644
index 000000000..e6181d521
--- /dev/null
+++ b/src/common/atomic_ops.h
@@ -0,0 +1,17 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Common {
10
11bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected);
12bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected);
13bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected);
14bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected);
15bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected);
16
17} // namespace Common
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
new file mode 100644
index 000000000..1c1d09ccb
--- /dev/null
+++ b/src/common/fiber.cpp
@@ -0,0 +1,222 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/fiber.h"
7#if defined(_WIN32) || defined(WIN32)
8#include <windows.h>
9#else
10#include <boost/context/detail/fcontext.hpp>
11#endif
12
13namespace Common {
14
15constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
16
17#if defined(_WIN32) || defined(WIN32)
18
19struct Fiber::FiberImpl {
20 LPVOID handle = nullptr;
21 LPVOID rewind_handle = nullptr;
22};
23
24void Fiber::Start() {
25 ASSERT(previous_fiber != nullptr);
26 previous_fiber->guard.unlock();
27 previous_fiber.reset();
28 entry_point(start_parameter);
29 UNREACHABLE();
30}
31
32void Fiber::OnRewind() {
33 ASSERT(impl->handle != nullptr);
34 DeleteFiber(impl->handle);
35 impl->handle = impl->rewind_handle;
36 impl->rewind_handle = nullptr;
37 rewind_point(rewind_parameter);
38 UNREACHABLE();
39}
40
41void Fiber::FiberStartFunc(void* fiber_parameter) {
42 auto fiber = static_cast<Fiber*>(fiber_parameter);
43 fiber->Start();
44}
45
46void Fiber::RewindStartFunc(void* fiber_parameter) {
47 auto fiber = static_cast<Fiber*>(fiber_parameter);
48 fiber->OnRewind();
49}
50
51Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
52 : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
53 impl = std::make_unique<FiberImpl>();
54 impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
55}
56
57Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
58
59Fiber::~Fiber() {
60 if (released) {
61 return;
62 }
63 // Make sure the Fiber is not being used
64 const bool locked = guard.try_lock();
65 ASSERT_MSG(locked, "Destroying a fiber that's still running");
66 if (locked) {
67 guard.unlock();
68 }
69 DeleteFiber(impl->handle);
70}
71
72void Fiber::Exit() {
73 ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
74 if (!is_thread_fiber) {
75 return;
76 }
77 ConvertFiberToThread();
78 guard.unlock();
79 released = true;
80}
81
82void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
83 rewind_point = std::move(rewind_func);
84 rewind_parameter = start_parameter;
85}
86
87void Fiber::Rewind() {
88 ASSERT(rewind_point);
89 ASSERT(impl->rewind_handle == nullptr);
90 impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
91 SwitchToFiber(impl->rewind_handle);
92}
93
94void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
95 ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
96 ASSERT_MSG(to != nullptr, "Next fiber is null!");
97 to->guard.lock();
98 to->previous_fiber = from;
99 SwitchToFiber(to->impl->handle);
100 ASSERT(from->previous_fiber != nullptr);
101 from->previous_fiber->guard.unlock();
102 from->previous_fiber.reset();
103}
104
105std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
106 std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
107 fiber->guard.lock();
108 fiber->impl->handle = ConvertThreadToFiber(nullptr);
109 fiber->is_thread_fiber = true;
110 return fiber;
111}
112
113#else
114
115struct Fiber::FiberImpl {
116 alignas(64) std::array<u8, default_stack_size> stack;
117 alignas(64) std::array<u8, default_stack_size> rewind_stack;
118 u8* stack_limit;
119 u8* rewind_stack_limit;
120 boost::context::detail::fcontext_t context;
121 boost::context::detail::fcontext_t rewind_context;
122};
123
124void Fiber::Start(boost::context::detail::transfer_t& transfer) {
125 ASSERT(previous_fiber != nullptr);
126 previous_fiber->impl->context = transfer.fctx;
127 previous_fiber->guard.unlock();
128 previous_fiber.reset();
129 entry_point(start_parameter);
130 UNREACHABLE();
131}
132
133void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
134 ASSERT(impl->context != nullptr);
135 impl->context = impl->rewind_context;
136 impl->rewind_context = nullptr;
137 u8* tmp = impl->stack_limit;
138 impl->stack_limit = impl->rewind_stack_limit;
139 impl->rewind_stack_limit = tmp;
140 rewind_point(rewind_parameter);
141 UNREACHABLE();
142}
143
144void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
145 auto fiber = static_cast<Fiber*>(transfer.data);
146 fiber->Start(transfer);
147}
148
149void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
150 auto fiber = static_cast<Fiber*>(transfer.data);
151 fiber->OnRewind(transfer);
152}
153
154Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
155 : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
156 impl = std::make_unique<FiberImpl>();
157 impl->stack_limit = impl->stack.data();
158 impl->rewind_stack_limit = impl->rewind_stack.data();
159 u8* stack_base = impl->stack_limit + default_stack_size;
160 impl->context =
161 boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
162}
163
164void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
165 rewind_point = std::move(rewind_func);
166 rewind_parameter = start_parameter;
167}
168
169Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
170
171Fiber::~Fiber() {
172 if (released) {
173 return;
174 }
175 // Make sure the Fiber is not being used
176 const bool locked = guard.try_lock();
177 ASSERT_MSG(locked, "Destroying a fiber that's still running");
178 if (locked) {
179 guard.unlock();
180 }
181}
182
183void Fiber::Exit() {
184
185 ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
186 if (!is_thread_fiber) {
187 return;
188 }
189 guard.unlock();
190 released = true;
191}
192
193void Fiber::Rewind() {
194 ASSERT(rewind_point);
195 ASSERT(impl->rewind_context == nullptr);
196 u8* stack_base = impl->rewind_stack_limit + default_stack_size;
197 impl->rewind_context =
198 boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
199 boost::context::detail::jump_fcontext(impl->rewind_context, this);
200}
201
202void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
203 ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
204 ASSERT_MSG(to != nullptr, "Next fiber is null!");
205 to->guard.lock();
206 to->previous_fiber = from;
207 auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
208 ASSERT(from->previous_fiber != nullptr);
209 from->previous_fiber->impl->context = transfer.fctx;
210 from->previous_fiber->guard.unlock();
211 from->previous_fiber.reset();
212}
213
214std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
215 std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
216 fiber->guard.lock();
217 fiber->is_thread_fiber = true;
218 return fiber;
219}
220
221#endif
222} // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
new file mode 100644
index 000000000..dafc1100e
--- /dev/null
+++ b/src/common/fiber.h
@@ -0,0 +1,92 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <memory>
9
10#include "common/common_types.h"
11#include "common/spin_lock.h"
12
13#if !defined(_WIN32) && !defined(WIN32)
14namespace boost::context::detail {
15struct transfer_t;
16}
17#endif
18
19namespace Common {
20
21/**
22 * Fiber class
23 * a fiber is a userspace thread with it's own context. They can be used to
24 * implement coroutines, emulated threading systems and certain asynchronous
25 * patterns.
26 *
27 * This class implements fibers at a low level, thus allowing greater freedom
28 * to implement such patterns. This fiber class is 'threadsafe' only one fiber
29 * can be running at a time and threads will be locked while trying to yield to
30 * a running fiber until it yields. WARNING exchanging two running fibers between
31 * threads will cause a deadlock. In order to prevent a deadlock, each thread should
32 * have an intermediary fiber, you switch to the intermediary fiber of the current
33 * thread and then from it switch to the expected fiber. This way you can exchange
34 * 2 fibers within 2 different threads.
35 */
36class Fiber {
37public:
38 Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
39 ~Fiber();
40
41 Fiber(const Fiber&) = delete;
42 Fiber& operator=(const Fiber&) = delete;
43
44 Fiber(Fiber&&) = default;
45 Fiber& operator=(Fiber&&) = default;
46
47 /// Yields control from Fiber 'from' to Fiber 'to'
48 /// Fiber 'from' must be the currently running fiber.
49 static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
50 static std::shared_ptr<Fiber> ThreadToFiber();
51
52 void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
53
54 void Rewind();
55
56 /// Only call from main thread's fiber
57 void Exit();
58
59 /// Changes the start parameter of the fiber. Has no effect if the fiber already started
60 void SetStartParameter(void* new_parameter) {
61 start_parameter = new_parameter;
62 }
63
64private:
65 Fiber();
66
67#if defined(_WIN32) || defined(WIN32)
68 void OnRewind();
69 void Start();
70 static void FiberStartFunc(void* fiber_parameter);
71 static void RewindStartFunc(void* fiber_parameter);
72#else
73 void OnRewind(boost::context::detail::transfer_t& transfer);
74 void Start(boost::context::detail::transfer_t& transfer);
75 static void FiberStartFunc(boost::context::detail::transfer_t transfer);
76 static void RewindStartFunc(boost::context::detail::transfer_t transfer);
77#endif
78
79 struct FiberImpl;
80
81 SpinLock guard{};
82 std::function<void(void*)> entry_point;
83 std::function<void(void*)> rewind_point;
84 void* rewind_parameter{};
85 void* start_parameter{};
86 std::shared_ptr<Fiber> previous_fiber;
87 std::unique_ptr<FiberImpl> impl;
88 bool is_thread_fiber{};
89 bool released{};
90};
91
92} // namespace Common
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
new file mode 100644
index 000000000..c1524220f
--- /dev/null
+++ b/src/common/spin_lock.cpp
@@ -0,0 +1,54 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/spin_lock.h"
6
7#if _MSC_VER
8#include <intrin.h>
9#if _M_AMD64
10#define __x86_64__ 1
11#endif
12#if _M_ARM64
13#define __aarch64__ 1
14#endif
15#else
16#if __x86_64__
17#include <xmmintrin.h>
18#endif
19#endif
20
21namespace {
22
23void ThreadPause() {
24#if __x86_64__
25 _mm_pause();
26#elif __aarch64__ && _MSC_VER
27 __yield();
28#elif __aarch64__
29 asm("yield");
30#endif
31}
32
33} // Anonymous namespace
34
35namespace Common {
36
37void SpinLock::lock() {
38 while (lck.test_and_set(std::memory_order_acquire)) {
39 ThreadPause();
40 }
41}
42
43void SpinLock::unlock() {
44 lck.clear(std::memory_order_release);
45}
46
47bool SpinLock::try_lock() {
48 if (lck.test_and_set(std::memory_order_acquire)) {
49 return false;
50 }
51 return true;
52}
53
54} // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
new file mode 100644
index 000000000..1df5528c4
--- /dev/null
+++ b/src/common/spin_lock.h
@@ -0,0 +1,26 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8
9namespace Common {
10
11/**
12 * SpinLock class
13 * a lock similar to mutex that forces a thread to spin wait instead calling the
14 * supervisor. Should be used on short sequences of code.
15 */
16class SpinLock {
17public:
18 void lock();
19 void unlock();
20 bool try_lock();
21
22private:
23 std::atomic_flag lck = ATOMIC_FLAG_INIT;
24};
25
26} // namespace Common
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 200c6489a..16d42facd 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) {
60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); 60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); 61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); 62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); 64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); 65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); 66 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 0cd2d10bf..8e5935e6a 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -25,6 +25,52 @@
25 25
26namespace Common { 26namespace Common {
27 27
28#ifdef _WIN32
29
30void SetCurrentThreadPriority(ThreadPriority new_priority) {
31 auto handle = GetCurrentThread();
32 int windows_priority = 0;
33 switch (new_priority) {
34 case ThreadPriority::Low:
35 windows_priority = THREAD_PRIORITY_BELOW_NORMAL;
36 break;
37 case ThreadPriority::Normal:
38 windows_priority = THREAD_PRIORITY_NORMAL;
39 break;
40 case ThreadPriority::High:
41 windows_priority = THREAD_PRIORITY_ABOVE_NORMAL;
42 break;
43 case ThreadPriority::VeryHigh:
44 windows_priority = THREAD_PRIORITY_HIGHEST;
45 break;
46 default:
47 windows_priority = THREAD_PRIORITY_NORMAL;
48 break;
49 }
50 SetThreadPriority(handle, windows_priority);
51}
52
53#else
54
55void SetCurrentThreadPriority(ThreadPriority new_priority) {
56 pthread_t this_thread = pthread_self();
57
58 s32 max_prio = sched_get_priority_max(SCHED_OTHER);
59 s32 min_prio = sched_get_priority_min(SCHED_OTHER);
60 u32 level = static_cast<u32>(new_priority) + 1;
61
62 struct sched_param params;
63 if (max_prio > min_prio) {
64 params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4;
65 } else {
66 params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;
67 }
68
69 pthread_setschedparam(this_thread, SCHED_OTHER, &params);
70}
71
72#endif
73
28#ifdef _MSC_VER 74#ifdef _MSC_VER
29 75
30// Sets the debugger-visible name of the current thread. 76// Sets the debugger-visible name of the current thread.
@@ -70,6 +116,12 @@ void SetCurrentThreadName(const char* name) {
70} 116}
71#endif 117#endif
72 118
119#if defined(_WIN32)
120void SetCurrentThreadName(const char* name) {
121 // Do Nothing on MingW
122}
123#endif
124
73#endif 125#endif
74 126
75} // namespace Common 127} // namespace Common
diff --git a/src/common/thread.h b/src/common/thread.h
index 2fc071685..52b359413 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <mutex> 10#include <mutex>
11#include <thread> 11#include <thread>
12#include "common/common_types.h"
12 13
13namespace Common { 14namespace Common {
14 15
@@ -28,8 +29,7 @@ public:
28 is_set = false; 29 is_set = false;
29 } 30 }
30 31
31 template <class Duration> 32 bool WaitFor(const std::chrono::nanoseconds& time) {
32 bool WaitFor(const std::chrono::duration<Duration>& time) {
33 std::unique_lock lk{mutex}; 33 std::unique_lock lk{mutex};
34 if (!condvar.wait_for(lk, time, [this] { return is_set; })) 34 if (!condvar.wait_for(lk, time, [this] { return is_set; }))
35 return false; 35 return false;
@@ -86,6 +86,15 @@ private:
86 std::size_t generation = 0; // Incremented once each time the barrier is used 86 std::size_t generation = 0; // Incremented once each time the barrier is used
87}; 87};
88 88
89enum class ThreadPriority : u32 {
90 Low = 0,
91 Normal = 1,
92 High = 2,
93 VeryHigh = 3,
94};
95
96void SetCurrentThreadPriority(ThreadPriority new_priority);
97
89void SetCurrentThreadName(const char* name); 98void SetCurrentThreadName(const char* name);
90 99
91} // namespace Common 100} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 32bf56730..16bf7c828 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -6,12 +6,38 @@
6#include <intrin.h> 6#include <intrin.h>
7 7
8#pragma intrinsic(_umul128) 8#pragma intrinsic(_umul128)
9#pragma intrinsic(_udiv128)
9#endif 10#endif
10#include <cstring> 11#include <cstring>
11#include "common/uint128.h" 12#include "common/uint128.h"
12 13
13namespace Common { 14namespace Common {
14 15
16#ifdef _MSC_VER
17
18u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
19 u128 r{};
20 r[0] = _umul128(a, b, &r[1]);
21 u64 remainder;
22#if _MSC_VER < 1923
23 return udiv128(r[1], r[0], d, &remainder);
24#else
25 return _udiv128(r[1], r[0], d, &remainder);
26#endif
27}
28
29#else
30
31u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
32 const u64 diva = a / d;
33 const u64 moda = a % d;
34 const u64 divb = b / d;
35 const u64 modb = b % d;
36 return diva * b + moda * divb + moda * modb / d;
37}
38
39#endif
40
15u128 Multiply64Into128(u64 a, u64 b) { 41u128 Multiply64Into128(u64 a, u64 b) {
16 u128 result; 42 u128 result;
17#ifdef _MSC_VER 43#ifdef _MSC_VER
diff --git a/src/common/uint128.h b/src/common/uint128.h
index a3be2a2cb..503cd2d0c 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -9,6 +9,9 @@
9 9
10namespace Common { 10namespace Common {
11 11
12// This function multiplies 2 u64 values and divides it by a u64 value.
13u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
14
12// This function multiplies 2 u64 values and produces a u128 value; 15// This function multiplies 2 u64 values and produces a u128 value;
13u128 Multiply64Into128(u64 a, u64 b); 16u128 Multiply64Into128(u64 a, u64 b);
14 17
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
new file mode 100644
index 000000000..3afbdb898
--- /dev/null
+++ b/src/common/wall_clock.cpp
@@ -0,0 +1,91 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/uint128.h"
6#include "common/wall_clock.h"
7
8#ifdef ARCHITECTURE_x86_64
9#include "common/x64/cpu_detect.h"
10#include "common/x64/native_clock.h"
11#endif
12
13namespace Common {
14
15using base_timer = std::chrono::steady_clock;
16using base_time_point = std::chrono::time_point<base_timer>;
17
18class StandardWallClock : public WallClock {
19public:
20 StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
21 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
22 start_time = base_timer::now();
23 }
24
25 std::chrono::nanoseconds GetTimeNS() override {
26 base_time_point current = base_timer::now();
27 auto elapsed = current - start_time;
28 return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
29 }
30
31 std::chrono::microseconds GetTimeUS() override {
32 base_time_point current = base_timer::now();
33 auto elapsed = current - start_time;
34 return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
35 }
36
37 std::chrono::milliseconds GetTimeMS() override {
38 base_time_point current = base_timer::now();
39 auto elapsed = current - start_time;
40 return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
41 }
42
43 u64 GetClockCycles() override {
44 std::chrono::nanoseconds time_now = GetTimeNS();
45 const u128 temporary =
46 Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
47 return Common::Divide128On32(temporary, 1000000000).first;
48 }
49
50 u64 GetCPUCycles() override {
51 std::chrono::nanoseconds time_now = GetTimeNS();
52 const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
53 return Common::Divide128On32(temporary, 1000000000).first;
54 }
55
56 void Pause(bool is_paused) override {
57 // Do nothing in this clock type.
58 }
59
60private:
61 base_time_point start_time;
62};
63
64#ifdef ARCHITECTURE_x86_64
65
66std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
67 u32 emulated_clock_frequency) {
68 const auto& caps = GetCPUCaps();
69 u64 rtsc_frequency = 0;
70 if (caps.invariant_tsc) {
71 rtsc_frequency = EstimateRDTSCFrequency();
72 }
73 if (rtsc_frequency == 0) {
74 return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
75 emulated_clock_frequency);
76 } else {
77 return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
78 rtsc_frequency);
79 }
80}
81
82#else
83
84std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
85 u32 emulated_clock_frequency) {
86 return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
87}
88
89#endif
90
91} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
new file mode 100644
index 000000000..367d72134
--- /dev/null
+++ b/src/common/wall_clock.h
@@ -0,0 +1,53 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <memory>
9
10#include "common/common_types.h"
11
12namespace Common {
13
14class WallClock {
15public:
16 /// Returns current wall time in nanoseconds
17 virtual std::chrono::nanoseconds GetTimeNS() = 0;
18
19 /// Returns current wall time in microseconds
20 virtual std::chrono::microseconds GetTimeUS() = 0;
21
22 /// Returns current wall time in milliseconds
23 virtual std::chrono::milliseconds GetTimeMS() = 0;
24
25 /// Returns current wall time in emulated clock cycles
26 virtual u64 GetClockCycles() = 0;
27
28 /// Returns current wall time in emulated cpu cycles
29 virtual u64 GetCPUCycles() = 0;
30
31 virtual void Pause(bool is_paused) = 0;
32
33 /// Tells if the wall clock, uses the host CPU's hardware clock
34 bool IsNative() const {
35 return is_native;
36 }
37
38protected:
39 WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
40 : emulated_cpu_frequency{emulated_cpu_frequency},
41 emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
42
43 u64 emulated_cpu_frequency;
44 u64 emulated_clock_frequency;
45
46private:
47 bool is_native;
48};
49
50std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
51 u32 emulated_clock_frequency);
52
53} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..fccd2eee5 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); 62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); 63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); 64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
65 if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
66 caps.manufacturer = Manufacturer::Intel;
67 else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
68 caps.manufacturer = Manufacturer::AMD;
69 else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
70 caps.manufacturer = Manufacturer::Hygon;
71 else
72 caps.manufacturer = Manufacturer::Unknown;
73
74 u32 family = {};
75 u32 model = {};
65 76
66 __cpuid(cpu_id, 0x80000000); 77 __cpuid(cpu_id, 0x80000000);
67 78
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
73 // Detect family and other miscellaneous features 84 // Detect family and other miscellaneous features
74 if (max_std_fn >= 1) { 85 if (max_std_fn >= 1) {
75 __cpuid(cpu_id, 0x00000001); 86 __cpuid(cpu_id, 0x00000001);
87 family = (cpu_id[0] >> 8) & 0xf;
88 model = (cpu_id[0] >> 4) & 0xf;
89 if (family == 0xf) {
90 family += (cpu_id[0] >> 20) & 0xff;
91 }
92 if (family >= 6) {
93 model += ((cpu_id[0] >> 16) & 0xf) << 4;
94 }
76 95
77 if ((cpu_id[3] >> 25) & 1) 96 if ((cpu_id[3] >> 25) & 1)
78 caps.sse = true; 97 caps.sse = true;
@@ -110,6 +129,11 @@ static CPUCaps Detect() {
110 caps.bmi1 = true; 129 caps.bmi1 = true;
111 if ((cpu_id[1] >> 8) & 1) 130 if ((cpu_id[1] >> 8) & 1)
112 caps.bmi2 = true; 131 caps.bmi2 = true;
132 // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
133 if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
134 (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
135 caps.avx512 = caps.avx2;
136 }
113 } 137 }
114 } 138 }
115 139
@@ -130,6 +154,20 @@ static CPUCaps Detect() {
130 caps.fma4 = true; 154 caps.fma4 = true;
131 } 155 }
132 156
157 if (max_ex_fn >= 0x80000007) {
158 __cpuid(cpu_id, 0x80000007);
159 if (cpu_id[3] & (1 << 8)) {
160 caps.invariant_tsc = true;
161 }
162 }
163
164 if (max_std_fn >= 0x16) {
165 __cpuid(cpu_id, 0x16);
166 caps.base_frequency = cpu_id[0];
167 caps.max_frequency = cpu_id[1];
168 caps.bus_frequency = cpu_id[2];
169 }
170
133 return caps; 171 return caps;
134} 172}
135 173
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..e3b63302e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
6 6
7namespace Common { 7namespace Common {
8 8
9enum class Manufacturer : u32 {
10 Intel = 0,
11 AMD = 1,
12 Hygon = 2,
13 Unknown = 3,
14};
15
9/// x86/x64 CPU capabilities that may be detected by this module 16/// x86/x64 CPU capabilities that may be detected by this module
10struct CPUCaps { 17struct CPUCaps {
18 Manufacturer manufacturer;
11 char cpu_string[0x21]; 19 char cpu_string[0x21];
12 char brand_string[0x41]; 20 char brand_string[0x41];
13 bool sse; 21 bool sse;
@@ -19,11 +27,16 @@ struct CPUCaps {
19 bool lzcnt; 27 bool lzcnt;
20 bool avx; 28 bool avx;
21 bool avx2; 29 bool avx2;
30 bool avx512;
22 bool bmi1; 31 bool bmi1;
23 bool bmi2; 32 bool bmi2;
24 bool fma; 33 bool fma;
25 bool fma4; 34 bool fma4;
26 bool aes; 35 bool aes;
36 bool invariant_tsc;
37 u32 base_frequency;
38 u32 max_frequency;
39 u32 bus_frequency;
27}; 40};
28 41
29/** 42/**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..424b39b1f
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,103 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <mutex>
7#include <thread>
8
9#ifdef _MSC_VER
10#include <intrin.h>
11#else
12#include <x86intrin.h>
13#endif
14
15#include "common/uint128.h"
16#include "common/x64/native_clock.h"
17
18namespace Common {
19
20u64 EstimateRDTSCFrequency() {
21 const auto milli_10 = std::chrono::milliseconds{10};
22 // get current time
23 _mm_mfence();
24 const u64 tscStart = __rdtsc();
25 const auto startTime = std::chrono::high_resolution_clock::now();
26 // wait roughly 3 seconds
27 while (true) {
28 auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
29 std::chrono::high_resolution_clock::now() - startTime);
30 if (milli.count() >= 3000)
31 break;
32 std::this_thread::sleep_for(milli_10);
33 }
34 const auto endTime = std::chrono::high_resolution_clock::now();
35 _mm_mfence();
36 const u64 tscEnd = __rdtsc();
37 // calculate difference
38 const u64 timer_diff =
39 std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
40 const u64 tsc_diff = tscEnd - tscStart;
41 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
42 return tsc_freq;
43}
44
45namespace X64 {
46NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
47 u64 rtsc_frequency)
48 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
49 rtsc_frequency} {
50 _mm_mfence();
51 last_measure = __rdtsc();
52 accumulated_ticks = 0U;
53}
54
55u64 NativeClock::GetRTSC() {
56 std::scoped_lock scope{rtsc_serialize};
57 _mm_mfence();
58 const u64 current_measure = __rdtsc();
59 u64 diff = current_measure - last_measure;
60 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
61 if (current_measure > last_measure) {
62 last_measure = current_measure;
63 }
64 accumulated_ticks += diff;
65 /// The clock cannot be more precise than the guest timer, remove the lower bits
66 return accumulated_ticks & inaccuracy_mask;
67}
68
69void NativeClock::Pause(bool is_paused) {
70 if (!is_paused) {
71 _mm_mfence();
72 last_measure = __rdtsc();
73 }
74}
75
76std::chrono::nanoseconds NativeClock::GetTimeNS() {
77 const u64 rtsc_value = GetRTSC();
78 return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
79}
80
81std::chrono::microseconds NativeClock::GetTimeUS() {
82 const u64 rtsc_value = GetRTSC();
83 return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
84}
85
86std::chrono::milliseconds NativeClock::GetTimeMS() {
87 const u64 rtsc_value = GetRTSC();
88 return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
89}
90
91u64 NativeClock::GetClockCycles() {
92 const u64 rtsc_value = GetRTSC();
93 return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
94}
95
96u64 NativeClock::GetCPUCycles() {
97 const u64 rtsc_value = GetRTSC();
98 return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
99}
100
101} // namespace X64
102
103} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..891a3bbfd
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/spin_lock.h"
10#include "common/wall_clock.h"
11
12namespace Common {
13
14namespace X64 {
15class NativeClock : public WallClock {
16public:
17 NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
18
19 std::chrono::nanoseconds GetTimeNS() override;
20
21 std::chrono::microseconds GetTimeUS() override;
22
23 std::chrono::milliseconds GetTimeMS() override;
24
25 u64 GetClockCycles() override;
26
27 u64 GetCPUCycles() override;
28
29 void Pause(bool is_paused) override;
30
31private:
32 u64 GetRTSC();
33
34 /// value used to reduce the native clocks accuracy as some apss rely on
35 /// undefined behavior where the level of accuracy in the clock shouldn't
36 /// be higher.
37 static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
38
39 SpinLock rtsc_serialize{};
40 u64 last_measure{};
41 u64 accumulated_ticks{};
42 u64 rtsc_frequency;
43};
44} // namespace X64
45
46u64 EstimateRDTSCFrequency();
47
48} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 794da8a52..a5f5d4fc1 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@
11 11
12namespace Common::X64 { 12namespace Common::X64 {
13 13
14inline int RegToIndex(const Xbyak::Reg& reg) { 14inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind; 15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, 16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers."); 17 "RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); 19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20} 20}
21 21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) { 22inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
23 ASSERT(reg_index < 16); 23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index); 24 return Xbyak::Reg64(static_cast<int>(reg_index));
25} 25}
26 26
27inline Xbyak::Xmm IndexToXmm(int reg_index) { 27inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32); 28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16); 29 return Xbyak::Xmm(static_cast<int>(reg_index - 16));
30} 30}
31 31
32inline Xbyak::Reg IndexToReg(int reg_index) { 32inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
33 if (reg_index < 16) { 33 if (reg_index < 16) {
34 return IndexToReg64(reg_index); 34 return IndexToReg64(reg_index);
35 } else { 35 } else {
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
151 151
152#endif 152#endif
153 153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, 154struct ABIFrameInfo {
155 size_t needed_frame_size, s32* out_subtraction, 155 s32 subtraction;
156 s32* out_xmm_offset) { 156 s32 xmm_offset;
157};
158
159inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
160 size_t needed_frame_size) {
157 const auto count = (regs & ABI_ALL_GPRS).count(); 161 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8; 162 rsp_alignment -= count * 8;
159 size_t subtraction = 0; 163 size_t subtraction = 0;
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
170 rsp_alignment -= subtraction; 174 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF; 175 subtraction += rsp_alignment & 0xF;
172 176
173 *out_subtraction = (s32)subtraction; 177 return ABIFrameInfo{static_cast<s32>(subtraction),
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); 178 static_cast<s32>(subtraction - xmm_base_subtraction)};
175} 179}
176 180
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 181inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) { 182 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset; 183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); 184
181 for (std::size_t i = 0; i < regs.size(); ++i) { 185 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) { 186 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i))); 187 code.push(IndexToReg64(i));
184 } 188 }
185 } 189 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189 190
190 for (int i = 0; i < regs.count(); i++) { 191 if (frame_info.subtraction != 0) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) { 192 code.sub(code.rsp, frame_info.subtraction);
192 code.push(IndexToReg64(i));
193 }
194 } 193 }
195 194
196 for (std::size_t i = 0; i < regs.size(); ++i) { 195 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) { 196 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); 197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
199 xmm_offset += 0x10; 198 frame_info.xmm_offset += 0x10;
200 } 199 }
201 } 200 }
202 201
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
205 204
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 205inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) { 206 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset; 207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210 208
211 for (std::size_t i = 0; i < regs.size(); ++i) { 209 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) { 210 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); 211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
214 xmm_offset += 0x10; 212 frame_info.xmm_offset += 0x10;
215 } 213 }
216 } 214 }
217 215
218 if (subtraction != 0) { 216 if (frame_info.subtraction != 0) {
219 code.add(code.rsp, subtraction); 217 code.add(code.rsp, frame_info.subtraction);
220 } 218 }
221 219
222 // GPRs need to be popped in reverse order 220 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) { 221 for (std::size_t j = 0; j < regs.size(); ++j) {
224 if (regs[i]) { 222 const std::size_t i = regs.size() - j - 1;
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) { 223 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i)); 224 code.pop(IndexToReg64(i));
262 } 225 }
263 } 226 }
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 47418006b..f87d67db5 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -7,6 +7,16 @@ endif()
7add_library(core STATIC 7add_library(core STATIC
8 arm/arm_interface.h 8 arm/arm_interface.h
9 arm/arm_interface.cpp 9 arm/arm_interface.cpp
10 arm/cpu_interrupt_handler.cpp
11 arm/cpu_interrupt_handler.h
12 arm/dynarmic/arm_dynarmic_32.cpp
13 arm/dynarmic/arm_dynarmic_32.h
14 arm/dynarmic/arm_dynarmic_64.cpp
15 arm/dynarmic/arm_dynarmic_64.h
16 arm/dynarmic/arm_dynarmic_cp15.cpp
17 arm/dynarmic/arm_dynarmic_cp15.h
18 arm/dynarmic/arm_exclusive_monitor.cpp
19 arm/dynarmic/arm_exclusive_monitor.h
10 arm/exclusive_monitor.cpp 20 arm/exclusive_monitor.cpp
11 arm/exclusive_monitor.h 21 arm/exclusive_monitor.h
12 arm/unicorn/arm_unicorn.cpp 22 arm/unicorn/arm_unicorn.cpp
@@ -15,8 +25,6 @@ add_library(core STATIC
15 constants.h 25 constants.h
16 core.cpp 26 core.cpp
17 core.h 27 core.h
18 core_manager.cpp
19 core_manager.h
20 core_timing.cpp 28 core_timing.cpp
21 core_timing.h 29 core_timing.h
22 core_timing_util.cpp 30 core_timing_util.cpp
@@ -606,11 +614,11 @@ endif()
606create_target_directory_groups(core) 614create_target_directory_groups(core)
607 615
608target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) 616target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
609target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn) 617target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn zip)
610 618
611if (YUZU_ENABLE_BOXCAT) 619if (YUZU_ENABLE_BOXCAT)
612 target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT) 620 target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
613 target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json zip) 621 target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json)
614endif() 622endif()
615 623
616if (ENABLE_WEB_SERVICE) 624if (ENABLE_WEB_SERVICE)
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index d079a1bc8..d2295ed90 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -139,6 +139,63 @@ std::optional<std::string> GetSymbolName(const Symbols& symbols, VAddr func_addr
139 139
140constexpr u64 SEGMENT_BASE = 0x7100000000ull; 140constexpr u64 SEGMENT_BASE = 0x7100000000ull;
141 141
142std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext(
143 System& system, const ThreadContext64& ctx) {
144 std::vector<BacktraceEntry> out;
145 auto& memory = system.Memory();
146
147 auto fp = ctx.cpu_registers[29];
148 auto lr = ctx.cpu_registers[30];
149 while (true) {
150 out.push_back({"", 0, lr, 0});
151 if (!fp) {
152 break;
153 }
154 lr = memory.Read64(fp + 8) - 4;
155 fp = memory.Read64(fp);
156 }
157
158 std::map<VAddr, std::string> modules;
159 auto& loader{system.GetAppLoader()};
160 if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) {
161 return {};
162 }
163
164 std::map<std::string, Symbols> symbols;
165 for (const auto& module : modules) {
166 symbols.insert_or_assign(module.second, GetSymbols(module.first, memory));
167 }
168
169 for (auto& entry : out) {
170 VAddr base = 0;
171 for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) {
172 const auto& module{*iter};
173 if (entry.original_address >= module.first) {
174 entry.module = module.second;
175 base = module.first;
176 break;
177 }
178 }
179
180 entry.offset = entry.original_address - base;
181 entry.address = SEGMENT_BASE + entry.offset;
182
183 if (entry.module.empty())
184 entry.module = "unknown";
185
186 const auto symbol_set = symbols.find(entry.module);
187 if (symbol_set != symbols.end()) {
188 const auto symbol = GetSymbolName(symbol_set->second, entry.offset);
189 if (symbol.has_value()) {
190 // TODO(DarkLordZach): Add demangling of symbol names.
191 entry.name = *symbol;
192 }
193 }
194 }
195
196 return out;
197}
198
142std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const { 199std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const {
143 std::vector<BacktraceEntry> out; 200 std::vector<BacktraceEntry> out;
144 auto& memory = system.Memory(); 201 auto& memory = system.Memory();
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index cb2e640e2..1f24051e4 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hardware_properties.h"
10 11
11namespace Common { 12namespace Common {
12struct PageTable; 13struct PageTable;
@@ -18,25 +19,29 @@ enum class VMAPermission : u8;
18 19
19namespace Core { 20namespace Core {
20class System; 21class System;
22class CPUInterruptHandler;
23
24using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>;
21 25
22/// Generic ARMv8 CPU interface 26/// Generic ARMv8 CPU interface
23class ARM_Interface : NonCopyable { 27class ARM_Interface : NonCopyable {
24public: 28public:
25 explicit ARM_Interface(System& system_) : system{system_} {} 29 explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
30 : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
31 uses_wall_clock} {}
26 virtual ~ARM_Interface() = default; 32 virtual ~ARM_Interface() = default;
27 33
28 struct ThreadContext32 { 34 struct ThreadContext32 {
29 std::array<u32, 16> cpu_registers{}; 35 std::array<u32, 16> cpu_registers{};
36 std::array<u32, 64> extension_registers{};
30 u32 cpsr{}; 37 u32 cpsr{};
31 std::array<u8, 4> padding{};
32 std::array<u64, 32> fprs{};
33 u32 fpscr{}; 38 u32 fpscr{};
34 u32 fpexc{}; 39 u32 fpexc{};
35 u32 tpidr{}; 40 u32 tpidr{};
36 }; 41 };
37 // Internally within the kernel, it expects the AArch32 version of the 42 // Internally within the kernel, it expects the AArch32 version of the
38 // thread context to be 344 bytes in size. 43 // thread context to be 344 bytes in size.
39 static_assert(sizeof(ThreadContext32) == 0x158); 44 static_assert(sizeof(ThreadContext32) == 0x150);
40 45
41 struct ThreadContext64 { 46 struct ThreadContext64 {
42 std::array<u64, 31> cpu_registers{}; 47 std::array<u64, 31> cpu_registers{};
@@ -143,6 +148,8 @@ public:
143 */ 148 */
144 virtual void SetTPIDR_EL0(u64 value) = 0; 149 virtual void SetTPIDR_EL0(u64 value) = 0;
145 150
151 virtual void ChangeProcessorID(std::size_t new_core_id) = 0;
152
146 virtual void SaveContext(ThreadContext32& ctx) = 0; 153 virtual void SaveContext(ThreadContext32& ctx) = 0;
147 virtual void SaveContext(ThreadContext64& ctx) = 0; 154 virtual void SaveContext(ThreadContext64& ctx) = 0;
148 virtual void LoadContext(const ThreadContext32& ctx) = 0; 155 virtual void LoadContext(const ThreadContext32& ctx) = 0;
@@ -162,6 +169,9 @@ public:
162 std::string name; 169 std::string name;
163 }; 170 };
164 171
172 static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system,
173 const ThreadContext64& ctx);
174
165 std::vector<BacktraceEntry> GetBacktrace() const; 175 std::vector<BacktraceEntry> GetBacktrace() const;
166 176
167 /// fp (= r29) points to the last frame record. 177 /// fp (= r29) points to the last frame record.
@@ -175,6 +185,8 @@ public:
175protected: 185protected:
176 /// System context that this ARM interface is running under. 186 /// System context that this ARM interface is running under.
177 System& system; 187 System& system;
188 CPUInterrupts& interrupt_handlers;
189 bool uses_wall_clock;
178}; 190};
179 191
180} // namespace Core 192} // namespace Core
diff --git a/src/core/arm/cpu_interrupt_handler.cpp b/src/core/arm/cpu_interrupt_handler.cpp
new file mode 100644
index 000000000..2f1a1a269
--- /dev/null
+++ b/src/core/arm/cpu_interrupt_handler.cpp
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/thread.h"
8#include "core/arm/cpu_interrupt_handler.h"
9
10namespace Core {
11
12CPUInterruptHandler::CPUInterruptHandler() : is_interrupted{} {
13 interrupt_event = std::make_unique<Common::Event>();
14}
15
16CPUInterruptHandler::~CPUInterruptHandler() = default;
17
18void CPUInterruptHandler::SetInterrupt(bool is_interrupted_) {
19 if (is_interrupted_) {
20 interrupt_event->Set();
21 }
22 this->is_interrupted = is_interrupted_;
23}
24
25void CPUInterruptHandler::AwaitInterrupt() {
26 interrupt_event->Wait();
27}
28
29} // namespace Core
diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h
new file mode 100644
index 000000000..3d062d326
--- /dev/null
+++ b/src/core/arm/cpu_interrupt_handler.h
@@ -0,0 +1,39 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8
9namespace Common {
10class Event;
11}
12
13namespace Core {
14
15class CPUInterruptHandler {
16public:
17 CPUInterruptHandler();
18 ~CPUInterruptHandler();
19
20 CPUInterruptHandler(const CPUInterruptHandler&) = delete;
21 CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete;
22
23 CPUInterruptHandler(CPUInterruptHandler&&) = default;
24 CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default;
25
26 bool IsInterrupted() const {
27 return is_interrupted;
28 }
29
30 void SetInterrupt(bool is_interrupted);
31
32 void AwaitInterrupt();
33
34private:
35 bool is_interrupted{};
36 std::unique_ptr<Common::Event> interrupt_event;
37};
38
39} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9bc86e3b9..0d4ab95b7 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -7,15 +7,17 @@
7#include <dynarmic/A32/a32.h> 7#include <dynarmic/A32/a32.h>
8#include <dynarmic/A32/config.h> 8#include <dynarmic/A32/config.h>
9#include <dynarmic/A32/context.h> 9#include <dynarmic/A32/context.h>
10#include "common/microprofile.h" 10#include "common/logging/log.h"
11#include "common/page_table.h"
12#include "core/arm/cpu_interrupt_handler.h"
11#include "core/arm/dynarmic/arm_dynarmic_32.h" 13#include "core/arm/dynarmic/arm_dynarmic_32.h"
12#include "core/arm/dynarmic/arm_dynarmic_64.h"
13#include "core/arm/dynarmic/arm_dynarmic_cp15.h" 14#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
15#include "core/arm/dynarmic/arm_exclusive_monitor.h"
14#include "core/core.h" 16#include "core/core.h"
15#include "core/core_manager.h"
16#include "core/core_timing.h" 17#include "core/core_timing.h"
17#include "core/hle/kernel/svc.h" 18#include "core/hle/kernel/svc.h"
18#include "core/memory.h" 19#include "core/memory.h"
20#include "core/settings.h"
19 21
20namespace Core { 22namespace Core {
21 23
@@ -49,8 +51,22 @@ public:
49 parent.system.Memory().Write64(vaddr, value); 51 parent.system.Memory().Write64(vaddr, value);
50 } 52 }
51 53
54 bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
55 return parent.system.Memory().WriteExclusive8(vaddr, value, expected);
56 }
57 bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
58 return parent.system.Memory().WriteExclusive16(vaddr, value, expected);
59 }
60 bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
61 return parent.system.Memory().WriteExclusive32(vaddr, value, expected);
62 }
63 bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
64 return parent.system.Memory().WriteExclusive64(vaddr, value, expected);
65 }
66
52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override { 67 void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
53 UNIMPLEMENTED(); 68 UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
69 MemoryReadCode(pc));
54 } 70 }
55 71
56 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { 72 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@@ -61,7 +77,7 @@ public:
61 case Dynarmic::A32::Exception::Breakpoint: 77 case Dynarmic::A32::Exception::Breakpoint:
62 break; 78 break;
63 } 79 }
64 LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", 80 LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
65 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); 81 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
66 UNIMPLEMENTED(); 82 UNIMPLEMENTED();
67 } 83 }
@@ -71,26 +87,36 @@ public:
71 } 87 }
72 88
73 void AddTicks(u64 ticks) override { 89 void AddTicks(u64 ticks) override {
90 if (parent.uses_wall_clock) {
91 return;
92 }
74 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a 93 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
75 // rough approximation of the amount of executed ticks in the system, it may be thrown off 94 // rough approximation of the amount of executed ticks in the system, it may be thrown off
76 // if not all cores are doing a similar amount of work. Instead of doing this, we should 95 // if not all cores are doing a similar amount of work. Instead of doing this, we should
77 // device a way so that timing is consistent across all cores without increasing the ticks 4 96 // device a way so that timing is consistent across all cores without increasing the ticks 4
78 // times. 97 // times.
79 u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; 98 u64 amortized_ticks =
99 (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
80 // Always execute at least one tick. 100 // Always execute at least one tick.
81 amortized_ticks = std::max<u64>(amortized_ticks, 1); 101 amortized_ticks = std::max<u64>(amortized_ticks, 1);
82 102
83 parent.system.CoreTiming().AddTicks(amortized_ticks); 103 parent.system.CoreTiming().AddTicks(amortized_ticks);
84 num_interpreted_instructions = 0; 104 num_interpreted_instructions = 0;
85 } 105 }
106
86 u64 GetTicksRemaining() override { 107 u64 GetTicksRemaining() override {
87 return std::max(parent.system.CoreTiming().GetDowncount(), {}); 108 if (parent.uses_wall_clock) {
109 if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
110 return minimum_run_cycles;
111 }
112 return 0U;
113 }
114 return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
88 } 115 }
89 116
90 ARM_Dynarmic_32& parent; 117 ARM_Dynarmic_32& parent;
91 std::size_t num_interpreted_instructions{}; 118 std::size_t num_interpreted_instructions{};
92 u64 tpidrro_el0{}; 119 static constexpr u64 minimum_run_cycles = 1000U;
93 u64 tpidr_el0{};
94}; 120};
95 121
96std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, 122std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -99,26 +125,46 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
99 config.callbacks = cb.get(); 125 config.callbacks = cb.get();
100 // TODO(bunnei): Implement page table for 32-bit 126 // TODO(bunnei): Implement page table for 32-bit
101 // config.page_table = &page_table.pointers; 127 // config.page_table = &page_table.pointers;
102 config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); 128 config.coprocessors[15] = cp15;
103 config.define_unpredictable_behaviour = true; 129 config.define_unpredictable_behaviour = true;
130 static constexpr std::size_t PAGE_BITS = 12;
131 static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
132 config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
133 page_table.pointers.data());
134 config.absolute_offset_page_table = true;
135 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
136 config.only_detect_misalignment_via_page_table_on_page_boundary = true;
137
138 // Multi-process state
139 config.processor_id = core_index;
140 config.global_monitor = &exclusive_monitor.monitor;
141
142 // Timing
143 config.wall_clock_cntpct = uses_wall_clock;
144
145 // Optimizations
146 if (Settings::values.disable_cpu_opt) {
147 config.enable_optimizations = false;
148 config.enable_fast_dispatch = false;
149 }
150
104 return std::make_unique<Dynarmic::A32::Jit>(config); 151 return std::make_unique<Dynarmic::A32::Jit>(config);
105} 152}
106 153
107MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
108
109void ARM_Dynarmic_32::Run() { 154void ARM_Dynarmic_32::Run() {
110 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32);
111 jit->Run(); 155 jit->Run();
112} 156}
113 157
114void ARM_Dynarmic_32::Step() { 158void ARM_Dynarmic_32::Step() {
115 cb->InterpreterFallback(jit->Regs()[15], 1); 159 jit->Step();
116} 160}
117 161
118ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, 162ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
163 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
119 std::size_t core_index) 164 std::size_t core_index)
120 : ARM_Interface{system}, 165 : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
121 cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, 166 cb(std::make_unique<DynarmicCallbacks32>(*this)),
167 cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
122 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} 168 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
123 169
124ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; 170ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
@@ -154,32 +200,40 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
154} 200}
155 201
156u64 ARM_Dynarmic_32::GetTlsAddress() const { 202u64 ARM_Dynarmic_32::GetTlsAddress() const {
157 return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; 203 return cp15->uro;
158} 204}
159 205
160void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { 206void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
161 CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); 207 cp15->uro = static_cast<u32>(address);
162} 208}
163 209
164u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { 210u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
165 return cb->tpidr_el0; 211 return cp15->uprw;
166} 212}
167 213
168void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { 214void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
169 cb->tpidr_el0 = value; 215 cp15->uprw = static_cast<u32>(value);
216}
217
218void ARM_Dynarmic_32::ChangeProcessorID(std::size_t new_core_id) {
219 jit->ChangeProcessorID(new_core_id);
170} 220}
171 221
172void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { 222void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
173 Dynarmic::A32::Context context; 223 Dynarmic::A32::Context context;
174 jit->SaveContext(context); 224 jit->SaveContext(context);
175 ctx.cpu_registers = context.Regs(); 225 ctx.cpu_registers = context.Regs();
226 ctx.extension_registers = context.ExtRegs();
176 ctx.cpsr = context.Cpsr(); 227 ctx.cpsr = context.Cpsr();
228 ctx.fpscr = context.Fpscr();
177} 229}
178 230
179void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) { 231void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
180 Dynarmic::A32::Context context; 232 Dynarmic::A32::Context context;
181 context.Regs() = ctx.cpu_registers; 233 context.Regs() = ctx.cpu_registers;
234 context.ExtRegs() = ctx.extension_registers;
182 context.SetCpsr(ctx.cpsr); 235 context.SetCpsr(ctx.cpsr);
236 context.SetFpscr(ctx.fpscr);
183 jit->LoadContext(context); 237 jit->LoadContext(context);
184} 238}
185 239
@@ -188,10 +242,15 @@ void ARM_Dynarmic_32::PrepareReschedule() {
188} 242}
189 243
190void ARM_Dynarmic_32::ClearInstructionCache() { 244void ARM_Dynarmic_32::ClearInstructionCache() {
245 if (!jit) {
246 return;
247 }
191 jit->ClearCache(); 248 jit->ClearCache();
192} 249}
193 250
194void ARM_Dynarmic_32::ClearExclusiveState() {} 251void ARM_Dynarmic_32::ClearExclusiveState() {
252 jit->ClearExclusiveState();
253}
195 254
196void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table, 255void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table,
197 std::size_t new_address_space_size_in_bits) { 256 std::size_t new_address_space_size_in_bits) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 8ba9cea8f..2bab31b92 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -9,7 +9,7 @@
9 9
10#include <dynarmic/A32/a32.h> 10#include <dynarmic/A32/a32.h>
11#include <dynarmic/A64/a64.h> 11#include <dynarmic/A64/a64.h>
12#include <dynarmic/A64/exclusive_monitor.h> 12#include <dynarmic/exclusive_monitor.h>
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/hash.h" 14#include "common/hash.h"
15#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
@@ -21,13 +21,16 @@ class Memory;
21 21
22namespace Core { 22namespace Core {
23 23
24class CPUInterruptHandler;
24class DynarmicCallbacks32; 25class DynarmicCallbacks32;
26class DynarmicCP15;
25class DynarmicExclusiveMonitor; 27class DynarmicExclusiveMonitor;
26class System; 28class System;
27 29
28class ARM_Dynarmic_32 final : public ARM_Interface { 30class ARM_Dynarmic_32 final : public ARM_Interface {
29public: 31public:
30 ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 32 ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
33 ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
31 ~ARM_Dynarmic_32() override; 34 ~ARM_Dynarmic_32() override;
32 35
33 void SetPC(u64 pc) override; 36 void SetPC(u64 pc) override;
@@ -44,6 +47,7 @@ public:
44 void SetTlsAddress(VAddr address) override; 47 void SetTlsAddress(VAddr address) override;
45 void SetTPIDR_EL0(u64 value) override; 48 void SetTPIDR_EL0(u64 value) override;
46 u64 GetTPIDR_EL0() const override; 49 u64 GetTPIDR_EL0() const override;
50 void ChangeProcessorID(std::size_t new_core_id) override;
47 51
48 void SaveContext(ThreadContext32& ctx) override; 52 void SaveContext(ThreadContext32& ctx) override;
49 void SaveContext(ThreadContext64& ctx) override {} 53 void SaveContext(ThreadContext64& ctx) override {}
@@ -66,12 +70,14 @@ private:
66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; 70 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
67 71
68 friend class DynarmicCallbacks32; 72 friend class DynarmicCallbacks32;
73 friend class DynarmicCP15;
74
69 std::unique_ptr<DynarmicCallbacks32> cb; 75 std::unique_ptr<DynarmicCallbacks32> cb;
70 JitCacheType jit_cache; 76 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A32::Jit> jit; 77 std::shared_ptr<Dynarmic::A32::Jit> jit;
78 std::shared_ptr<DynarmicCP15> cp15;
72 std::size_t core_index; 79 std::size_t core_index;
73 DynarmicExclusiveMonitor& exclusive_monitor; 80 DynarmicExclusiveMonitor& exclusive_monitor;
74 std::array<u32, 84> CP15_regs{};
75}; 81};
76 82
77} // namespace Core 83} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 337b97be9..790981034 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -7,11 +7,11 @@
7#include <dynarmic/A64/a64.h> 7#include <dynarmic/A64/a64.h>
8#include <dynarmic/A64/config.h> 8#include <dynarmic/A64/config.h>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/microprofile.h"
11#include "common/page_table.h" 10#include "common/page_table.h"
11#include "core/arm/cpu_interrupt_handler.h"
12#include "core/arm/dynarmic/arm_dynarmic_64.h" 12#include "core/arm/dynarmic/arm_dynarmic_64.h"
13#include "core/arm/dynarmic/arm_exclusive_monitor.h"
13#include "core/core.h" 14#include "core/core.h"
14#include "core/core_manager.h"
15#include "core/core_timing.h" 15#include "core/core_timing.h"
16#include "core/core_timing_util.h" 16#include "core/core_timing_util.h"
17#include "core/gdbstub/gdbstub.h" 17#include "core/gdbstub/gdbstub.h"
@@ -65,6 +65,22 @@ public:
65 memory.Write64(vaddr + 8, value[1]); 65 memory.Write64(vaddr + 8, value[1]);
66 } 66 }
67 67
68 bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
69 return parent.system.Memory().WriteExclusive8(vaddr, value, expected);
70 }
71 bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
72 return parent.system.Memory().WriteExclusive16(vaddr, value, expected);
73 }
74 bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
75 return parent.system.Memory().WriteExclusive32(vaddr, value, expected);
76 }
77 bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
78 return parent.system.Memory().WriteExclusive64(vaddr, value, expected);
79 }
80 bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
81 return parent.system.Memory().WriteExclusive128(vaddr, value, expected);
82 }
83
68 void InterpreterFallback(u64 pc, std::size_t num_instructions) override { 84 void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
69 LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, 85 LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
70 num_instructions, MemoryReadCode(pc)); 86 num_instructions, MemoryReadCode(pc));
@@ -98,8 +114,8 @@ public:
98 } 114 }
99 [[fallthrough]]; 115 [[fallthrough]];
100 default: 116 default:
101 ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})", 117 ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
102 static_cast<std::size_t>(exception), pc); 118 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
103 } 119 }
104 } 120 }
105 121
@@ -108,29 +124,42 @@ public:
108 } 124 }
109 125
110 void AddTicks(u64 ticks) override { 126 void AddTicks(u64 ticks) override {
127 if (parent.uses_wall_clock) {
128 return;
129 }
111 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a 130 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
112 // rough approximation of the amount of executed ticks in the system, it may be thrown off 131 // rough approximation of the amount of executed ticks in the system, it may be thrown off
113 // if not all cores are doing a similar amount of work. Instead of doing this, we should 132 // if not all cores are doing a similar amount of work. Instead of doing this, we should
114 // device a way so that timing is consistent across all cores without increasing the ticks 4 133 // device a way so that timing is consistent across all cores without increasing the ticks 4
115 // times. 134 // times.
116 u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; 135 u64 amortized_ticks =
136 (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
117 // Always execute at least one tick. 137 // Always execute at least one tick.
118 amortized_ticks = std::max<u64>(amortized_ticks, 1); 138 amortized_ticks = std::max<u64>(amortized_ticks, 1);
119 139
120 parent.system.CoreTiming().AddTicks(amortized_ticks); 140 parent.system.CoreTiming().AddTicks(amortized_ticks);
121 num_interpreted_instructions = 0; 141 num_interpreted_instructions = 0;
122 } 142 }
143
123 u64 GetTicksRemaining() override { 144 u64 GetTicksRemaining() override {
124 return std::max(parent.system.CoreTiming().GetDowncount(), s64{0}); 145 if (parent.uses_wall_clock) {
146 if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
147 return minimum_run_cycles;
148 }
149 return 0U;
150 }
151 return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
125 } 152 }
153
126 u64 GetCNTPCT() override { 154 u64 GetCNTPCT() override {
127 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); 155 return parent.system.CoreTiming().GetClockTicks();
128 } 156 }
129 157
130 ARM_Dynarmic_64& parent; 158 ARM_Dynarmic_64& parent;
131 std::size_t num_interpreted_instructions = 0; 159 std::size_t num_interpreted_instructions = 0;
132 u64 tpidrro_el0 = 0; 160 u64 tpidrro_el0 = 0;
133 u64 tpidr_el0 = 0; 161 u64 tpidr_el0 = 0;
162 static constexpr u64 minimum_run_cycles = 1000U;
134}; 163};
135 164
136std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, 165std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
@@ -168,14 +197,13 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
168 config.enable_fast_dispatch = false; 197 config.enable_fast_dispatch = false;
169 } 198 }
170 199
200 // Timing
201 config.wall_clock_cntpct = uses_wall_clock;
202
171 return std::make_shared<Dynarmic::A64::Jit>(config); 203 return std::make_shared<Dynarmic::A64::Jit>(config);
172} 204}
173 205
174MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
175
176void ARM_Dynarmic_64::Run() { 206void ARM_Dynarmic_64::Run() {
177 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64);
178
179 jit->Run(); 207 jit->Run();
180} 208}
181 209
@@ -183,11 +211,16 @@ void ARM_Dynarmic_64::Step() {
183 cb->InterpreterFallback(jit->GetPC(), 1); 211 cb->InterpreterFallback(jit->GetPC(), 1);
184} 212}
185 213
186ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, 214ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
215 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
187 std::size_t core_index) 216 std::size_t core_index)
188 : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)), 217 : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
189 inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index}, 218 cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handlers,
190 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} 219 uses_wall_clock,
220 ARM_Unicorn::Arch::AArch64,
221 core_index},
222 core_index{core_index}, exclusive_monitor{
223 dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
191 224
192ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; 225ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
193 226
@@ -239,6 +272,10 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
239 cb->tpidr_el0 = value; 272 cb->tpidr_el0 = value;
240} 273}
241 274
275void ARM_Dynarmic_64::ChangeProcessorID(std::size_t new_core_id) {
276 jit->ChangeProcessorID(new_core_id);
277}
278
242void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) { 279void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {
243 ctx.cpu_registers = jit->GetRegisters(); 280 ctx.cpu_registers = jit->GetRegisters();
244 ctx.sp = jit->GetSP(); 281 ctx.sp = jit->GetSP();
@@ -266,6 +303,9 @@ void ARM_Dynarmic_64::PrepareReschedule() {
266} 303}
267 304
268void ARM_Dynarmic_64::ClearInstructionCache() { 305void ARM_Dynarmic_64::ClearInstructionCache() {
306 if (!jit) {
307 return;
308 }
269 jit->ClearCache(); 309 jit->ClearCache();
270} 310}
271 311
@@ -285,44 +325,4 @@ void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table,
285 jit_cache.emplace(key, jit); 325 jit_cache.emplace(key, jit);
286} 326}
287 327
288DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count)
289 : monitor(core_count), memory{memory} {}
290
291DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
292
293void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) {
294 // Size doesn't actually matter.
295 monitor.Mark(core_index, addr, 16);
296}
297
298void DynarmicExclusiveMonitor::ClearExclusive() {
299 monitor.Clear();
300}
301
302bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
303 return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); });
304}
305
306bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
307 return monitor.DoExclusiveOperation(core_index, vaddr, 2,
308 [&] { memory.Write16(vaddr, value); });
309}
310
311bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
312 return monitor.DoExclusiveOperation(core_index, vaddr, 4,
313 [&] { memory.Write32(vaddr, value); });
314}
315
316bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
317 return monitor.DoExclusiveOperation(core_index, vaddr, 8,
318 [&] { memory.Write64(vaddr, value); });
319}
320
321bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
322 return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
323 memory.Write64(vaddr + 0, value[0]);
324 memory.Write64(vaddr + 8, value[1]);
325 });
326}
327
328} // namespace Core 328} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 647cecaf0..403c55961 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -8,7 +8,6 @@
8#include <unordered_map> 8#include <unordered_map>
9 9
10#include <dynarmic/A64/a64.h> 10#include <dynarmic/A64/a64.h>
11#include <dynarmic/A64/exclusive_monitor.h>
12#include "common/common_types.h" 11#include "common/common_types.h"
13#include "common/hash.h" 12#include "common/hash.h"
14#include "core/arm/arm_interface.h" 13#include "core/arm/arm_interface.h"
@@ -22,12 +21,14 @@ class Memory;
22namespace Core { 21namespace Core {
23 22
24class DynarmicCallbacks64; 23class DynarmicCallbacks64;
24class CPUInterruptHandler;
25class DynarmicExclusiveMonitor; 25class DynarmicExclusiveMonitor;
26class System; 26class System;
27 27
28class ARM_Dynarmic_64 final : public ARM_Interface { 28class ARM_Dynarmic_64 final : public ARM_Interface {
29public: 29public:
30 ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 30 ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
31 ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
31 ~ARM_Dynarmic_64() override; 32 ~ARM_Dynarmic_64() override;
32 33
33 void SetPC(u64 pc) override; 34 void SetPC(u64 pc) override;
@@ -44,6 +45,7 @@ public:
44 void SetTlsAddress(VAddr address) override; 45 void SetTlsAddress(VAddr address) override;
45 void SetTPIDR_EL0(u64 value) override; 46 void SetTPIDR_EL0(u64 value) override;
46 u64 GetTPIDR_EL0() const override; 47 u64 GetTPIDR_EL0() const override;
48 void ChangeProcessorID(std::size_t new_core_id) override;
47 49
48 void SaveContext(ThreadContext32& ctx) override {} 50 void SaveContext(ThreadContext32& ctx) override {}
49 void SaveContext(ThreadContext64& ctx) override; 51 void SaveContext(ThreadContext64& ctx) override;
@@ -75,24 +77,4 @@ private:
75 DynarmicExclusiveMonitor& exclusive_monitor; 77 DynarmicExclusiveMonitor& exclusive_monitor;
76}; 78};
77 79
78class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
79public:
80 explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
81 ~DynarmicExclusiveMonitor() override;
82
83 void SetExclusive(std::size_t core_index, VAddr addr) override;
84 void ClearExclusive() override;
85
86 bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
87 bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
88 bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
89 bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
90 bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
91
92private:
93 friend class ARM_Dynarmic_64;
94 Dynarmic::A64::ExclusiveMonitor monitor;
95 Core::Memory::Memory& memory;
96};
97
98} // namespace Core 80} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
index 3fdcdebde..54556e0f9 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -2,79 +2,132 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <fmt/format.h>
6#include "common/logging/log.h"
7#include "core/arm/dynarmic/arm_dynarmic_32.h"
5#include "core/arm/dynarmic/arm_dynarmic_cp15.h" 8#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
9#include "core/core.h"
10#include "core/core_timing.h"
11#include "core/core_timing_util.h"
6 12
7using Callback = Dynarmic::A32::Coprocessor::Callback; 13using Callback = Dynarmic::A32::Coprocessor::Callback;
8using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; 14using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
9using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; 15using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
10 16
17template <>
18struct fmt::formatter<Dynarmic::A32::CoprocReg> {
19 constexpr auto parse(format_parse_context& ctx) {
20 return ctx.begin();
21 }
22 template <typename FormatContext>
23 auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
24 return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
25 }
26};
27
28namespace Core {
29
30static u32 dummy_value;
31
11std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, 32std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
12 CoprocReg CRd, CoprocReg CRn, 33 CoprocReg CRd, CoprocReg CRn,
13 CoprocReg CRm, unsigned opc2) { 34 CoprocReg CRm, unsigned opc2) {
35 LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
36 CRm, opc2);
14 return {}; 37 return {};
15} 38}
16 39
17CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, 40CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
18 CoprocReg CRm, unsigned opc2) { 41 CoprocReg CRm, unsigned opc2) {
19 // TODO(merry): Privileged CP15 registers
20
21 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { 42 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
43 // CP15_FLUSH_PREFETCH_BUFFER
22 // This is a dummy write, we ignore the value written here. 44 // This is a dummy write, we ignore the value written here.
23 return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; 45 return &dummy_value;
24 } 46 }
25 47
26 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { 48 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
27 switch (opc2) { 49 switch (opc2) {
28 case 4: 50 case 4:
51 // CP15_DATA_SYNC_BARRIER
29 // This is a dummy write, we ignore the value written here. 52 // This is a dummy write, we ignore the value written here.
30 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; 53 return &dummy_value;
31 case 5: 54 case 5:
55 // CP15_DATA_MEMORY_BARRIER
32 // This is a dummy write, we ignore the value written here. 56 // This is a dummy write, we ignore the value written here.
33 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; 57 return &dummy_value;
34 default:
35 return {};
36 } 58 }
37 } 59 }
38 60
39 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { 61 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
40 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; 62 // CP15_THREAD_UPRW
63 return &uprw;
41 } 64 }
42 65
66 LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
67 opc2);
43 return {}; 68 return {};
44} 69}
45 70
46CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { 71CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
72 LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
47 return {}; 73 return {};
48} 74}
49 75
50CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, 76CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
51 CoprocReg CRm, unsigned opc2) { 77 CoprocReg CRm, unsigned opc2) {
52 // TODO(merry): Privileged CP15 registers
53
54 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { 78 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
55 switch (opc2) { 79 switch (opc2) {
56 case 2: 80 case 2:
57 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; 81 // CP15_THREAD_UPRW
82 return &uprw;
58 case 3: 83 case 3:
59 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; 84 // CP15_THREAD_URO
60 default: 85 return &uro;
61 return {};
62 } 86 }
63 } 87 }
64 88
89 LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
90 opc2);
65 return {}; 91 return {};
66} 92}
67 93
68CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { 94CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
95 if (!two && opc == 0 && CRm == CoprocReg::C14) {
96 // CNTPCT
97 const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
98 [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
99 ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
100 return parent.system.CoreTiming().GetClockTicks();
101 });
102 return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
103 }
104
105 LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
69 return {}; 106 return {};
70} 107}
71 108
72std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, 109std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
73 std::optional<u8> option) { 110 std::optional<u8> option) {
111 if (option) {
112 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
113 long_transfer ? "l" : "", CRd, *option);
114 } else {
115 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
116 long_transfer ? "l" : "", CRd);
117 }
74 return {}; 118 return {};
75} 119}
76 120
77std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, 121std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
78 std::optional<u8> option) { 122 std::optional<u8> option) {
123 if (option) {
124 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
125 long_transfer ? "l" : "", CRd, *option);
126 } else {
127 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
128 long_transfer ? "l" : "", CRd);
129 }
79 return {}; 130 return {};
80} 131}
132
133} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
index 07bcde5f9..7356d252e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -10,128 +10,15 @@
10#include <dynarmic/A32/coprocessor.h> 10#include <dynarmic/A32/coprocessor.h>
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13enum class CP15Register { 13namespace Core {
14 // c0 - Information registers
15 CP15_MAIN_ID,
16 CP15_CACHE_TYPE,
17 CP15_TCM_STATUS,
18 CP15_TLB_TYPE,
19 CP15_CPU_ID,
20 CP15_PROCESSOR_FEATURE_0,
21 CP15_PROCESSOR_FEATURE_1,
22 CP15_DEBUG_FEATURE_0,
23 CP15_AUXILIARY_FEATURE_0,
24 CP15_MEMORY_MODEL_FEATURE_0,
25 CP15_MEMORY_MODEL_FEATURE_1,
26 CP15_MEMORY_MODEL_FEATURE_2,
27 CP15_MEMORY_MODEL_FEATURE_3,
28 CP15_ISA_FEATURE_0,
29 CP15_ISA_FEATURE_1,
30 CP15_ISA_FEATURE_2,
31 CP15_ISA_FEATURE_3,
32 CP15_ISA_FEATURE_4,
33 14
34 // c1 - Control registers 15class ARM_Dynarmic_32;
35 CP15_CONTROL,
36 CP15_AUXILIARY_CONTROL,
37 CP15_COPROCESSOR_ACCESS_CONTROL,
38
39 // c2 - Translation table registers
40 CP15_TRANSLATION_BASE_TABLE_0,
41 CP15_TRANSLATION_BASE_TABLE_1,
42 CP15_TRANSLATION_BASE_CONTROL,
43 CP15_DOMAIN_ACCESS_CONTROL,
44 CP15_RESERVED,
45
46 // c5 - Fault status registers
47 CP15_FAULT_STATUS,
48 CP15_INSTR_FAULT_STATUS,
49 CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
50 CP15_INST_FSR,
51
52 // c6 - Fault Address registers
53 CP15_FAULT_ADDRESS,
54 CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
55 CP15_WFAR,
56 CP15_IFAR,
57
58 // c7 - Cache operation registers
59 CP15_WAIT_FOR_INTERRUPT,
60 CP15_PHYS_ADDRESS,
61 CP15_INVALIDATE_INSTR_CACHE,
62 CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
63 CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
64 CP15_FLUSH_PREFETCH_BUFFER,
65 CP15_FLUSH_BRANCH_TARGET_CACHE,
66 CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
67 CP15_INVALIDATE_DATA_CACHE,
68 CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
69 CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
70 CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
71 CP15_CLEAN_DATA_CACHE,
72 CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
73 CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
74 CP15_DATA_SYNC_BARRIER,
75 CP15_DATA_MEMORY_BARRIER,
76 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
77 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
78 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
79
80 // c8 - TLB operations
81 CP15_INVALIDATE_ITLB,
82 CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
83 CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
84 CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
85 CP15_INVALIDATE_DTLB,
86 CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
87 CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
88 CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
89 CP15_INVALIDATE_UTLB,
90 CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
91 CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
92 CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
93
94 // c9 - Data cache lockdown register
95 CP15_DATA_CACHE_LOCKDOWN,
96
97 // c10 - TLB/Memory map registers
98 CP15_TLB_LOCKDOWN,
99 CP15_PRIMARY_REGION_REMAP,
100 CP15_NORMAL_REGION_REMAP,
101
102 // c13 - Thread related registers
103 CP15_PID,
104 CP15_CONTEXT_ID,
105 CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
106 CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
107 CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
108
109 // c15 - Performance and TLB lockdown registers
110 CP15_PERFORMANCE_MONITOR_CONTROL,
111 CP15_CYCLE_COUNTER,
112 CP15_COUNT_0,
113 CP15_COUNT_1,
114 CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
115 CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
116 CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
117 CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
118 CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
119 CP15_TLB_DEBUG_CONTROL,
120
121 // Skyeye defined
122 CP15_TLB_FAULT_ADDR,
123 CP15_TLB_FAULT_STATUS,
124
125 // Not an actual register.
126 // All registers should be defined above this.
127 CP15_REGISTER_COUNT,
128};
129 16
130class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { 17class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
131public: 18public:
132 using CoprocReg = Dynarmic::A32::CoprocReg; 19 using CoprocReg = Dynarmic::A32::CoprocReg;
133 20
134 explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; 21 explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
135 22
136 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, 23 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
137 CoprocReg CRn, CoprocReg CRm, 24 CoprocReg CRn, CoprocReg CRm,
@@ -147,6 +34,9 @@ public:
147 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, 34 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
148 std::optional<u8> option) override; 35 std::optional<u8> option) override;
149 36
150private: 37 ARM_Dynarmic_32& parent;
151 u32* CP15{}; 38 u32 uprw;
39 u32 uro;
152}; 40};
41
42} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
new file mode 100644
index 000000000..4e209f6a5
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@@ -0,0 +1,76 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cinttypes>
6#include <memory>
7#include "core/arm/dynarmic/arm_exclusive_monitor.h"
8#include "core/memory.h"
9
10namespace Core {
11
12DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count)
13 : monitor(core_count), memory{memory} {}
14
15DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
16
17u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
18 return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
19}
20
21u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
22 return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
23}
24
25u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
26 return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
27}
28
29u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
30 return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
31}
32
33u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) {
34 return monitor.ReadAndMark<u128>(core_index, addr, [&]() -> u128 {
35 u128 result;
36 result[0] = memory.Read64(addr);
37 result[1] = memory.Read64(addr + 8);
38 return result;
39 });
40}
41
42void DynarmicExclusiveMonitor::ClearExclusive() {
43 monitor.Clear();
44}
45
46bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
47 return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
48 return memory.WriteExclusive8(vaddr, value, expected);
49 });
50}
51
52bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
53 return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
54 return memory.WriteExclusive16(vaddr, value, expected);
55 });
56}
57
58bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
59 return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
60 return memory.WriteExclusive32(vaddr, value, expected);
61 });
62}
63
64bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
65 return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
66 return memory.WriteExclusive64(vaddr, value, expected);
67 });
68}
69
70bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
71 return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [&](u128 expected) -> bool {
72 return memory.WriteExclusive128(vaddr, value, expected);
73 });
74}
75
76} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
new file mode 100644
index 000000000..964f4a55d
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <dynarmic/exclusive_monitor.h>
11
12#include "common/common_types.h"
13#include "core/arm/dynarmic/arm_dynarmic_32.h"
14#include "core/arm/dynarmic/arm_dynarmic_64.h"
15#include "core/arm/exclusive_monitor.h"
16
17namespace Core::Memory {
18class Memory;
19}
20
21namespace Core {
22
23class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
24public:
25 explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
26 ~DynarmicExclusiveMonitor() override;
27
28 u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override;
29 u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override;
30 u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
31 u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
32 u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
33 void ClearExclusive() override;
34
35 bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
36 bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
37 bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
38 bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
39 bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
40
41private:
42 friend class ARM_Dynarmic_32;
43 friend class ARM_Dynarmic_64;
44 Dynarmic::ExclusiveMonitor monitor;
45 Core::Memory::Memory& memory;
46};
47
48} // namespace Core
diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp
index b32401e0b..d8cba369d 100644
--- a/src/core/arm/exclusive_monitor.cpp
+++ b/src/core/arm/exclusive_monitor.cpp
@@ -3,7 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#ifdef ARCHITECTURE_x86_64 5#ifdef ARCHITECTURE_x86_64
6#include "core/arm/dynarmic/arm_dynarmic_64.h" 6#include "core/arm/dynarmic/arm_exclusive_monitor.h"
7#endif 7#endif
8#include "core/arm/exclusive_monitor.h" 8#include "core/arm/exclusive_monitor.h"
9#include "core/memory.h" 9#include "core/memory.h"
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index ccd73b80f..62f6e6023 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -18,7 +18,11 @@ class ExclusiveMonitor {
18public: 18public:
19 virtual ~ExclusiveMonitor(); 19 virtual ~ExclusiveMonitor();
20 20
21 virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0; 21 virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0;
22 virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0;
23 virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
24 virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
25 virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
22 virtual void ClearExclusive() = 0; 26 virtual void ClearExclusive() = 0;
23 27
24 virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; 28 virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index e40e9626a..1df3f3ed1 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -6,6 +6,7 @@
6#include <unicorn/arm64.h> 6#include <unicorn/arm64.h>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "core/arm/cpu_interrupt_handler.h"
9#include "core/arm/unicorn/arm_unicorn.h" 10#include "core/arm/unicorn/arm_unicorn.h"
10#include "core/core.h" 11#include "core/core.h"
11#include "core/core_timing.h" 12#include "core/core_timing.h"
@@ -62,7 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
62 return false; 63 return false;
63} 64}
64 65
65ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} { 66ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
67 Arch architecture, std::size_t core_index)
68 : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} {
66 const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64; 69 const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
67 CHECKED(uc_open(arch, UC_MODE_ARM, &uc)); 70 CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
68 71
@@ -156,12 +159,20 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) {
156 CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value)); 159 CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value));
157} 160}
158 161
162void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) {
163 core_index = new_core_id;
164}
165
159void ARM_Unicorn::Run() { 166void ARM_Unicorn::Run() {
160 if (GDBStub::IsServerEnabled()) { 167 if (GDBStub::IsServerEnabled()) {
161 ExecuteInstructions(std::max(4000000U, 0U)); 168 ExecuteInstructions(std::max(4000000U, 0U));
162 } else { 169 } else {
163 ExecuteInstructions( 170 while (true) {
164 std::max(std::size_t(system.CoreTiming().GetDowncount()), std::size_t{0})); 171 if (interrupt_handlers[core_index].IsInterrupted()) {
172 return;
173 }
174 ExecuteInstructions(10);
175 }
165 } 176 }
166} 177}
167 178
@@ -183,8 +194,6 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
183 UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data())); 194 UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));
184 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 195 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
185 CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size())); 196 CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size()));
186
187 system.CoreTiming().AddTicks(num_instructions);
188 if (GDBStub::IsServerEnabled()) { 197 if (GDBStub::IsServerEnabled()) {
189 if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { 198 if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
190 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); 199 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 725c65085..810aff311 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -20,7 +20,8 @@ public:
20 AArch64, // 64-bit ARM 20 AArch64, // 64-bit ARM
21 }; 21 };
22 22
23 explicit ARM_Unicorn(System& system, Arch architecture); 23 explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
24 Arch architecture, std::size_t core_index);
24 ~ARM_Unicorn() override; 25 ~ARM_Unicorn() override;
25 26
26 void SetPC(u64 pc) override; 27 void SetPC(u64 pc) override;
@@ -35,6 +36,7 @@ public:
35 void SetTlsAddress(VAddr address) override; 36 void SetTlsAddress(VAddr address) override;
36 void SetTPIDR_EL0(u64 value) override; 37 void SetTPIDR_EL0(u64 value) override;
37 u64 GetTPIDR_EL0() const override; 38 u64 GetTPIDR_EL0() const override;
39 void ChangeProcessorID(std::size_t new_core_id) override;
38 void PrepareReschedule() override; 40 void PrepareReschedule() override;
39 void ClearExclusiveState() override; 41 void ClearExclusiveState() override;
40 void ExecuteInstructions(std::size_t num_instructions); 42 void ExecuteInstructions(std::size_t num_instructions);
@@ -55,6 +57,7 @@ private:
55 uc_engine* uc{}; 57 uc_engine* uc{};
56 GDBStub::BreakpointAddress last_bkpt{}; 58 GDBStub::BreakpointAddress last_bkpt{};
57 bool last_bkpt_hit = false; 59 bool last_bkpt_hit = false;
60 std::size_t core_index;
58}; 61};
59 62
60} // namespace Core 63} // namespace Core
diff --git a/src/core/core.cpp b/src/core/core.cpp
index f9f8a3000..1a243c515 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -8,10 +8,10 @@
8 8
9#include "common/file_util.h" 9#include "common/file_util.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/microprofile.h"
11#include "common/string_util.h" 12#include "common/string_util.h"
12#include "core/arm/exclusive_monitor.h" 13#include "core/arm/exclusive_monitor.h"
13#include "core/core.h" 14#include "core/core.h"
14#include "core/core_manager.h"
15#include "core/core_timing.h" 15#include "core/core_timing.h"
16#include "core/cpu_manager.h" 16#include "core/cpu_manager.h"
17#include "core/device_memory.h" 17#include "core/device_memory.h"
@@ -51,6 +51,11 @@
51#include "video_core/renderer_base.h" 51#include "video_core/renderer_base.h"
52#include "video_core/video_core.h" 52#include "video_core/video_core.h"
53 53
54MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU0, "ARM JIT", "Dynarmic CPU 0", MP_RGB(255, 64, 64));
55MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU1, "ARM JIT", "Dynarmic CPU 1", MP_RGB(255, 64, 64));
56MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU2, "ARM JIT", "Dynarmic CPU 2", MP_RGB(255, 64, 64));
57MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU3, "ARM JIT", "Dynarmic CPU 3", MP_RGB(255, 64, 64));
58
54namespace Core { 59namespace Core {
55 60
56namespace { 61namespace {
@@ -117,23 +122,22 @@ struct System::Impl {
117 : kernel{system}, fs_controller{system}, memory{system}, 122 : kernel{system}, fs_controller{system}, memory{system},
118 cpu_manager{system}, reporter{system}, applet_manager{system} {} 123 cpu_manager{system}, reporter{system}, applet_manager{system} {}
119 124
120 CoreManager& CurrentCoreManager() { 125 ResultStatus Run() {
121 return cpu_manager.GetCurrentCoreManager(); 126 status = ResultStatus::Success;
122 }
123 127
124 Kernel::PhysicalCore& CurrentPhysicalCore() { 128 kernel.Suspend(false);
125 const auto index = cpu_manager.GetActiveCoreIndex(); 129 core_timing.SyncPause(false);
126 return kernel.PhysicalCore(index); 130 cpu_manager.Pause(false);
127 }
128 131
129 Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { 132 return status;
130 return kernel.PhysicalCore(index);
131 } 133 }
132 134
133 ResultStatus RunLoop(bool tight_loop) { 135 ResultStatus Pause() {
134 status = ResultStatus::Success; 136 status = ResultStatus::Success;
135 137
136 cpu_manager.RunLoop(tight_loop); 138 core_timing.SyncPause(true);
139 kernel.Suspend(true);
140 cpu_manager.Pause(true);
137 141
138 return status; 142 return status;
139 } 143 }
@@ -143,7 +147,15 @@ struct System::Impl {
143 147
144 device_memory = std::make_unique<Core::DeviceMemory>(system); 148 device_memory = std::make_unique<Core::DeviceMemory>(system);
145 149
146 core_timing.Initialize(); 150 is_multicore = Settings::values.use_multi_core;
151 is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation;
152
153 kernel.SetMulticore(is_multicore);
154 cpu_manager.SetMulticore(is_multicore);
155 cpu_manager.SetAsyncGpu(is_async_gpu);
156 core_timing.SetMulticore(is_multicore);
157
158 core_timing.Initialize([&system]() { system.RegisterHostThread(); });
147 kernel.Initialize(); 159 kernel.Initialize();
148 cpu_manager.Initialize(); 160 cpu_manager.Initialize();
149 161
@@ -180,6 +192,11 @@ struct System::Impl {
180 is_powered_on = true; 192 is_powered_on = true;
181 exit_lock = false; 193 exit_lock = false;
182 194
195 microprofile_dynarmic[0] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU0);
196 microprofile_dynarmic[1] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU1);
197 microprofile_dynarmic[2] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU2);
198 microprofile_dynarmic[3] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU3);
199
183 LOG_DEBUG(Core, "Initialized OK"); 200 LOG_DEBUG(Core, "Initialized OK");
184 201
185 return ResultStatus::Success; 202 return ResultStatus::Success;
@@ -277,8 +294,6 @@ struct System::Impl {
277 service_manager.reset(); 294 service_manager.reset();
278 cheat_engine.reset(); 295 cheat_engine.reset();
279 telemetry_session.reset(); 296 telemetry_session.reset();
280 perf_stats.reset();
281 gpu_core.reset();
282 device_memory.reset(); 297 device_memory.reset();
283 298
284 // Close all CPU/threading state 299 // Close all CPU/threading state
@@ -290,6 +305,8 @@ struct System::Impl {
290 305
291 // Close app loader 306 // Close app loader
292 app_loader.reset(); 307 app_loader.reset();
308 gpu_core.reset();
309 perf_stats.reset();
293 310
294 // Clear all applets 311 // Clear all applets
295 applet_manager.ClearAll(); 312 applet_manager.ClearAll();
@@ -382,25 +399,35 @@ struct System::Impl {
382 399
383 std::unique_ptr<Core::PerfStats> perf_stats; 400 std::unique_ptr<Core::PerfStats> perf_stats;
384 Core::FrameLimiter frame_limiter; 401 Core::FrameLimiter frame_limiter;
402
403 bool is_multicore{};
404 bool is_async_gpu{};
405
406 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
407 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_dynarmic{};
385}; 408};
386 409
387System::System() : impl{std::make_unique<Impl>(*this)} {} 410System::System() : impl{std::make_unique<Impl>(*this)} {}
388System::~System() = default; 411System::~System() = default;
389 412
390CoreManager& System::CurrentCoreManager() { 413CpuManager& System::GetCpuManager() {
391 return impl->CurrentCoreManager(); 414 return impl->cpu_manager;
415}
416
417const CpuManager& System::GetCpuManager() const {
418 return impl->cpu_manager;
392} 419}
393 420
394const CoreManager& System::CurrentCoreManager() const { 421System::ResultStatus System::Run() {
395 return impl->CurrentCoreManager(); 422 return impl->Run();
396} 423}
397 424
398System::ResultStatus System::RunLoop(bool tight_loop) { 425System::ResultStatus System::Pause() {
399 return impl->RunLoop(tight_loop); 426 return impl->Pause();
400} 427}
401 428
402System::ResultStatus System::SingleStep() { 429System::ResultStatus System::SingleStep() {
403 return RunLoop(false); 430 return ResultStatus::Success;
404} 431}
405 432
406void System::InvalidateCpuInstructionCaches() { 433void System::InvalidateCpuInstructionCaches() {
@@ -416,7 +443,7 @@ bool System::IsPoweredOn() const {
416} 443}
417 444
418void System::PrepareReschedule() { 445void System::PrepareReschedule() {
419 impl->CurrentPhysicalCore().Stop(); 446 // Deprecated, does nothing, kept for backward compatibility.
420} 447}
421 448
422void System::PrepareReschedule(const u32 core_index) { 449void System::PrepareReschedule(const u32 core_index) {
@@ -436,31 +463,41 @@ const TelemetrySession& System::TelemetrySession() const {
436} 463}
437 464
438ARM_Interface& System::CurrentArmInterface() { 465ARM_Interface& System::CurrentArmInterface() {
439 return impl->CurrentPhysicalCore().ArmInterface(); 466 return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
440} 467}
441 468
442const ARM_Interface& System::CurrentArmInterface() const { 469const ARM_Interface& System::CurrentArmInterface() const {
443 return impl->CurrentPhysicalCore().ArmInterface(); 470 return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
444} 471}
445 472
446std::size_t System::CurrentCoreIndex() const { 473std::size_t System::CurrentCoreIndex() const {
447 return impl->cpu_manager.GetActiveCoreIndex(); 474 std::size_t core = impl->kernel.GetCurrentHostThreadID();
475 ASSERT(core < Core::Hardware::NUM_CPU_CORES);
476 return core;
448} 477}
449 478
450Kernel::Scheduler& System::CurrentScheduler() { 479Kernel::Scheduler& System::CurrentScheduler() {
451 return impl->CurrentPhysicalCore().Scheduler(); 480 return impl->kernel.CurrentScheduler();
452} 481}
453 482
454const Kernel::Scheduler& System::CurrentScheduler() const { 483const Kernel::Scheduler& System::CurrentScheduler() const {
455 return impl->CurrentPhysicalCore().Scheduler(); 484 return impl->kernel.CurrentScheduler();
485}
486
487Kernel::PhysicalCore& System::CurrentPhysicalCore() {
488 return impl->kernel.CurrentPhysicalCore();
489}
490
491const Kernel::PhysicalCore& System::CurrentPhysicalCore() const {
492 return impl->kernel.CurrentPhysicalCore();
456} 493}
457 494
458Kernel::Scheduler& System::Scheduler(std::size_t core_index) { 495Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
459 return impl->GetPhysicalCore(core_index).Scheduler(); 496 return impl->kernel.Scheduler(core_index);
460} 497}
461 498
462const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { 499const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
463 return impl->GetPhysicalCore(core_index).Scheduler(); 500 return impl->kernel.Scheduler(core_index);
464} 501}
465 502
466/// Gets the global scheduler 503/// Gets the global scheduler
@@ -490,20 +527,15 @@ const Kernel::Process* System::CurrentProcess() const {
490} 527}
491 528
492ARM_Interface& System::ArmInterface(std::size_t core_index) { 529ARM_Interface& System::ArmInterface(std::size_t core_index) {
493 return impl->GetPhysicalCore(core_index).ArmInterface(); 530 auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
531 ASSERT(thread && !thread->IsHLEThread());
532 return thread->ArmInterface();
494} 533}
495 534
496const ARM_Interface& System::ArmInterface(std::size_t core_index) const { 535const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
497 return impl->GetPhysicalCore(core_index).ArmInterface(); 536 auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
498} 537 ASSERT(thread && !thread->IsHLEThread());
499 538 return thread->ArmInterface();
500CoreManager& System::GetCoreManager(std::size_t core_index) {
501 return impl->cpu_manager.GetCoreManager(core_index);
502}
503
504const CoreManager& System::GetCoreManager(std::size_t core_index) const {
505 ASSERT(core_index < NUM_CPU_CORES);
506 return impl->cpu_manager.GetCoreManager(core_index);
507} 539}
508 540
509ExclusiveMonitor& System::Monitor() { 541ExclusiveMonitor& System::Monitor() {
@@ -722,4 +754,18 @@ void System::RegisterHostThread() {
722 impl->kernel.RegisterHostThread(); 754 impl->kernel.RegisterHostThread();
723} 755}
724 756
757void System::EnterDynarmicProfile() {
758 std::size_t core = impl->kernel.GetCurrentHostThreadID();
759 impl->dynarmic_ticks[core] = MicroProfileEnter(impl->microprofile_dynarmic[core]);
760}
761
762void System::ExitDynarmicProfile() {
763 std::size_t core = impl->kernel.GetCurrentHostThreadID();
764 MicroProfileLeave(impl->microprofile_dynarmic[core], impl->dynarmic_ticks[core]);
765}
766
767bool System::IsMulticore() const {
768 return impl->is_multicore;
769}
770
725} // namespace Core 771} // namespace Core
diff --git a/src/core/core.h b/src/core/core.h
index acc53d6a1..5c6cfbffe 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -27,6 +27,7 @@ class VfsFilesystem;
27namespace Kernel { 27namespace Kernel {
28class GlobalScheduler; 28class GlobalScheduler;
29class KernelCore; 29class KernelCore;
30class PhysicalCore;
30class Process; 31class Process;
31class Scheduler; 32class Scheduler;
32} // namespace Kernel 33} // namespace Kernel
@@ -90,7 +91,7 @@ class InterruptManager;
90namespace Core { 91namespace Core {
91 92
92class ARM_Interface; 93class ARM_Interface;
93class CoreManager; 94class CpuManager;
94class DeviceMemory; 95class DeviceMemory;
95class ExclusiveMonitor; 96class ExclusiveMonitor;
96class FrameLimiter; 97class FrameLimiter;
@@ -136,16 +137,16 @@ public:
136 }; 137 };
137 138
138 /** 139 /**
139 * Run the core CPU loop 140 * Run the OS and Application
140 * This function runs the core for the specified number of CPU instructions before trying to 141 * This function will start emulation and run the relevant devices
141 * update hardware. This is much faster than SingleStep (and should be equivalent), as the CPU 142 */
142 * is not required to do a full dispatch with each instruction. NOTE: the number of instructions 143 ResultStatus Run();
143 * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware 144
144 * update is requested (e.g. on a thread switch). 145 /**
145 * @param tight_loop If false, the CPU single-steps. 146 * Pause the OS and Application
146 * @return Result status, indicating whether or not the operation succeeded. 147 * This function will pause emulation and stop the relevant devices
147 */ 148 */
148 ResultStatus RunLoop(bool tight_loop = true); 149 ResultStatus Pause();
149 150
150 /** 151 /**
151 * Step the CPU one instruction 152 * Step the CPU one instruction
@@ -209,17 +210,21 @@ public:
209 /// Gets the scheduler for the CPU core that is currently running 210 /// Gets the scheduler for the CPU core that is currently running
210 const Kernel::Scheduler& CurrentScheduler() const; 211 const Kernel::Scheduler& CurrentScheduler() const;
211 212
213 /// Gets the physical core for the CPU core that is currently running
214 Kernel::PhysicalCore& CurrentPhysicalCore();
215
216 /// Gets the physical core for the CPU core that is currently running
217 const Kernel::PhysicalCore& CurrentPhysicalCore() const;
218
212 /// Gets a reference to an ARM interface for the CPU core with the specified index 219 /// Gets a reference to an ARM interface for the CPU core with the specified index
213 ARM_Interface& ArmInterface(std::size_t core_index); 220 ARM_Interface& ArmInterface(std::size_t core_index);
214 221
215 /// Gets a const reference to an ARM interface from the CPU core with the specified index 222 /// Gets a const reference to an ARM interface from the CPU core with the specified index
216 const ARM_Interface& ArmInterface(std::size_t core_index) const; 223 const ARM_Interface& ArmInterface(std::size_t core_index) const;
217 224
218 /// Gets a CPU interface to the CPU core with the specified index 225 CpuManager& GetCpuManager();
219 CoreManager& GetCoreManager(std::size_t core_index);
220 226
221 /// Gets a CPU interface to the CPU core with the specified index 227 const CpuManager& GetCpuManager() const;
222 const CoreManager& GetCoreManager(std::size_t core_index) const;
223 228
224 /// Gets a reference to the exclusive monitor 229 /// Gets a reference to the exclusive monitor
225 ExclusiveMonitor& Monitor(); 230 ExclusiveMonitor& Monitor();
@@ -370,14 +375,17 @@ public:
370 /// Register a host thread as an auxiliary thread. 375 /// Register a host thread as an auxiliary thread.
371 void RegisterHostThread(); 376 void RegisterHostThread();
372 377
373private: 378 /// Enter Dynarmic Microprofile
374 System(); 379 void EnterDynarmicProfile();
380
381 /// Exit Dynarmic Microprofile
382 void ExitDynarmicProfile();
375 383
376 /// Returns the currently running CPU core 384 /// Tells if system is running on multicore.
377 CoreManager& CurrentCoreManager(); 385 bool IsMulticore() const;
378 386
379 /// Returns the currently running CPU core 387private:
380 const CoreManager& CurrentCoreManager() const; 388 System();
381 389
382 /** 390 /**
383 * Initialize the emulated system. 391 * Initialize the emulated system.
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
deleted file mode 100644
index b6b797c80..000000000
--- a/src/core/core_manager.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <condition_variable>
6#include <mutex>
7
8#include "common/logging/log.h"
9#include "core/arm/exclusive_monitor.h"
10#include "core/arm/unicorn/arm_unicorn.h"
11#include "core/core.h"
12#include "core/core_manager.h"
13#include "core/core_timing.h"
14#include "core/hle/kernel/kernel.h"
15#include "core/hle/kernel/physical_core.h"
16#include "core/hle/kernel/scheduler.h"
17#include "core/hle/kernel/thread.h"
18#include "core/hle/lock.h"
19#include "core/settings.h"
20
21namespace Core {
22
23CoreManager::CoreManager(System& system, std::size_t core_index)
24 : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore(
25 core_index)},
26 core_timing{system.CoreTiming()}, core_index{core_index} {}
27
28CoreManager::~CoreManager() = default;
29
30void CoreManager::RunLoop(bool tight_loop) {
31 Reschedule();
32
33 // If we don't have a currently active thread then don't execute instructions,
34 // instead advance to the next event and try to yield to the next thread
35 if (Kernel::GetCurrentThread() == nullptr) {
36 LOG_TRACE(Core, "Core-{} idling", core_index);
37 core_timing.Idle();
38 } else {
39 if (tight_loop) {
40 physical_core.Run();
41 } else {
42 physical_core.Step();
43 }
44 }
45 core_timing.Advance();
46
47 Reschedule();
48}
49
50void CoreManager::SingleStep() {
51 return RunLoop(false);
52}
53
54void CoreManager::PrepareReschedule() {
55 physical_core.Stop();
56}
57
58void CoreManager::Reschedule() {
59 // Lock the global kernel mutex when we manipulate the HLE state
60 std::lock_guard lock(HLE::g_hle_lock);
61
62 global_scheduler.SelectThread(core_index);
63
64 physical_core.Scheduler().TryDoContextSwitch();
65}
66
67} // namespace Core
diff --git a/src/core/core_manager.h b/src/core/core_manager.h
deleted file mode 100644
index d525de00a..000000000
--- a/src/core/core_manager.h
+++ /dev/null
@@ -1,63 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <cstddef>
9#include <memory>
10#include "common/common_types.h"
11
12namespace Kernel {
13class GlobalScheduler;
14class PhysicalCore;
15} // namespace Kernel
16
17namespace Core {
18class System;
19}
20
21namespace Core::Timing {
22class CoreTiming;
23}
24
25namespace Core::Memory {
26class Memory;
27}
28
29namespace Core {
30
31constexpr unsigned NUM_CPU_CORES{4};
32
33class CoreManager {
34public:
35 CoreManager(System& system, std::size_t core_index);
36 ~CoreManager();
37
38 void RunLoop(bool tight_loop = true);
39
40 void SingleStep();
41
42 void PrepareReschedule();
43
44 bool IsMainCore() const {
45 return core_index == 0;
46 }
47
48 std::size_t CoreIndex() const {
49 return core_index;
50 }
51
52private:
53 void Reschedule();
54
55 Kernel::GlobalScheduler& global_scheduler;
56 Kernel::PhysicalCore& physical_core;
57 Timing::CoreTiming& core_timing;
58
59 std::atomic<bool> reschedule_pending = false;
60 std::size_t core_index;
61};
62
63} // namespace Core
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 46d4178c4..5c83c41a4 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -1,29 +1,27 @@
1// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2+ 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core_timing.h"
6
7#include <algorithm> 5#include <algorithm>
8#include <mutex> 6#include <mutex>
9#include <string> 7#include <string>
10#include <tuple> 8#include <tuple>
11 9
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/thread.h" 11#include "common/microprofile.h"
12#include "core/core_timing.h"
14#include "core/core_timing_util.h" 13#include "core/core_timing_util.h"
15#include "core/hardware_properties.h"
16 14
17namespace Core::Timing { 15namespace Core::Timing {
18 16
19constexpr int MAX_SLICE_LENGTH = 10000; 17constexpr u64 MAX_SLICE_LENGTH = 4000;
20 18
21std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { 19std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
22 return std::make_shared<EventType>(std::move(callback), std::move(name)); 20 return std::make_shared<EventType>(std::move(callback), std::move(name));
23} 21}
24 22
25struct CoreTiming::Event { 23struct CoreTiming::Event {
26 s64 time; 24 u64 time;
27 u64 fifo_order; 25 u64 fifo_order;
28 u64 userdata; 26 u64 userdata;
29 std::weak_ptr<EventType> type; 27 std::weak_ptr<EventType> type;
@@ -39,51 +37,90 @@ struct CoreTiming::Event {
39 } 37 }
40}; 38};
41 39
42CoreTiming::CoreTiming() = default; 40CoreTiming::CoreTiming() {
43CoreTiming::~CoreTiming() = default; 41 clock =
42 Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
43}
44 44
45void CoreTiming::Initialize() { 45CoreTiming::~CoreTiming() = default;
46 downcounts.fill(MAX_SLICE_LENGTH);
47 time_slice.fill(MAX_SLICE_LENGTH);
48 slice_length = MAX_SLICE_LENGTH;
49 global_timer = 0;
50 idled_cycles = 0;
51 current_context = 0;
52 46
53 // The time between CoreTiming being initialized and the first call to Advance() is considered 47void CoreTiming::ThreadEntry(CoreTiming& instance) {
54 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before 48 constexpr char name[] = "yuzu:HostTiming";
55 // executing the first cycle of each slice to prepare the slice length and downcount for 49 MicroProfileOnThreadCreate(name);
56 // that slice. 50 Common::SetCurrentThreadName(name);
57 is_global_timer_sane = true; 51 Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh);
52 instance.on_thread_init();
53 instance.ThreadLoop();
54}
58 55
56void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
57 on_thread_init = std::move(on_thread_init_);
59 event_fifo_id = 0; 58 event_fifo_id = 0;
60 59 shutting_down = false;
60 ticks = 0;
61 const auto empty_timed_callback = [](u64, s64) {}; 61 const auto empty_timed_callback = [](u64, s64) {};
62 ev_lost = CreateEvent("_lost_event", empty_timed_callback); 62 ev_lost = CreateEvent("_lost_event", empty_timed_callback);
63 if (is_multicore) {
64 timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
65 }
63} 66}
64 67
65void CoreTiming::Shutdown() { 68void CoreTiming::Shutdown() {
69 paused = true;
70 shutting_down = true;
71 pause_event.Set();
72 event.Set();
73 if (timer_thread) {
74 timer_thread->join();
75 }
66 ClearPendingEvents(); 76 ClearPendingEvents();
77 timer_thread.reset();
78 has_started = false;
67} 79}
68 80
69void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, 81void CoreTiming::Pause(bool is_paused) {
70 u64 userdata) { 82 paused = is_paused;
71 std::lock_guard guard{inner_mutex}; 83 pause_event.Set();
72 const s64 timeout = GetTicks() + cycles_into_future; 84}
73 85
74 // If this event needs to be scheduled before the next advance(), force one early 86void CoreTiming::SyncPause(bool is_paused) {
75 if (!is_global_timer_sane) { 87 if (is_paused == paused && paused_set == paused) {
76 ForceExceptionCheck(cycles_into_future); 88 return;
89 }
90 Pause(is_paused);
91 if (timer_thread) {
92 if (!is_paused) {
93 pause_event.Set();
94 }
95 event.Set();
96 while (paused_set != is_paused)
97 ;
77 } 98 }
99}
78 100
79 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); 101bool CoreTiming::IsRunning() const {
102 return !paused_set;
103}
80 104
81 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 105bool CoreTiming::HasPendingEvents() const {
106 return !(wait_set && event_queue.empty());
82} 107}
83 108
84void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { 109void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
85 std::lock_guard guard{inner_mutex}; 110 u64 userdata) {
111 {
112 std::scoped_lock scope{basic_lock};
113 const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
114
115 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
86 116
117 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
118 }
119 event.Set();
120}
121
122void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
123 std::scoped_lock scope{basic_lock};
87 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { 124 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
88 return e.type.lock().get() == event_type.get() && e.userdata == userdata; 125 return e.type.lock().get() == event_type.get() && e.userdata == userdata;
89 }); 126 });
@@ -95,21 +132,39 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
95 } 132 }
96} 133}
97 134
98u64 CoreTiming::GetTicks() const { 135void CoreTiming::AddTicks(u64 ticks) {
99 u64 ticks = static_cast<u64>(global_timer); 136 this->ticks += ticks;
100 if (!is_global_timer_sane) { 137 downcount -= ticks;
101 ticks += accumulated_ticks; 138}
139
140void CoreTiming::Idle() {
141 if (!event_queue.empty()) {
142 const u64 next_event_time = event_queue.front().time;
143 const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
144 if (next_ticks > ticks) {
145 ticks = next_ticks;
146 }
147 return;
102 } 148 }
103 return ticks; 149 ticks += 1000U;
104} 150}
105 151
106u64 CoreTiming::GetIdleTicks() const { 152void CoreTiming::ResetTicks() {
107 return static_cast<u64>(idled_cycles); 153 downcount = MAX_SLICE_LENGTH;
108} 154}
109 155
110void CoreTiming::AddTicks(u64 ticks) { 156u64 CoreTiming::GetCPUTicks() const {
111 accumulated_ticks += ticks; 157 if (is_multicore) {
112 downcounts[current_context] -= static_cast<s64>(ticks); 158 return clock->GetCPUCycles();
159 }
160 return ticks;
161}
162
163u64 CoreTiming::GetClockTicks() const {
164 if (is_multicore) {
165 return clock->GetClockCycles();
166 }
167 return CpuCyclesToClockCycles(ticks);
113} 168}
114 169
115void CoreTiming::ClearPendingEvents() { 170void CoreTiming::ClearPendingEvents() {
@@ -117,7 +172,7 @@ void CoreTiming::ClearPendingEvents() {
117} 172}
118 173
119void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { 174void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
120 std::lock_guard guard{inner_mutex}; 175 basic_lock.lock();
121 176
122 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { 177 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
123 return e.type.lock().get() == event_type.get(); 178 return e.type.lock().get() == event_type.get();
@@ -128,99 +183,72 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
128 event_queue.erase(itr, event_queue.end()); 183 event_queue.erase(itr, event_queue.end());
129 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 184 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
130 } 185 }
186 basic_lock.unlock();
131} 187}
132 188
133void CoreTiming::ForceExceptionCheck(s64 cycles) { 189std::optional<s64> CoreTiming::Advance() {
134 cycles = std::max<s64>(0, cycles); 190 std::scoped_lock advance_scope{advance_lock};
135 if (downcounts[current_context] <= cycles) { 191 std::scoped_lock basic_scope{basic_lock};
136 return; 192 global_timer = GetGlobalTimeNs().count();
137 }
138
139 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
140 // here. Account for cycles already executed by adjusting the g.slice_length
141 downcounts[current_context] = static_cast<int>(cycles);
142}
143
144std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const {
145 const u64 original_context = current_context;
146 u64 next_context = (original_context + 1) % num_cpu_cores;
147 while (next_context != original_context) {
148 if (time_slice[next_context] >= needed_ticks) {
149 return {next_context};
150 } else if (time_slice[next_context] >= 0) {
151 return std::nullopt;
152 }
153 next_context = (next_context + 1) % num_cpu_cores;
154 }
155 return std::nullopt;
156}
157
158void CoreTiming::Advance() {
159 std::unique_lock<std::mutex> guard(inner_mutex);
160
161 const u64 cycles_executed = accumulated_ticks;
162 time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks);
163 global_timer += cycles_executed;
164
165 is_global_timer_sane = true;
166 193
167 while (!event_queue.empty() && event_queue.front().time <= global_timer) { 194 while (!event_queue.empty() && event_queue.front().time <= global_timer) {
168 Event evt = std::move(event_queue.front()); 195 Event evt = std::move(event_queue.front());
169 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 196 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
170 event_queue.pop_back(); 197 event_queue.pop_back();
171 inner_mutex.unlock(); 198 basic_lock.unlock();
172 199
173 if (auto event_type{evt.type.lock()}) { 200 if (auto event_type{evt.type.lock()}) {
174 event_type->callback(evt.userdata, global_timer - evt.time); 201 event_type->callback(evt.userdata, global_timer - evt.time);
175 } 202 }
176 203
177 inner_mutex.lock(); 204 basic_lock.lock();
205 global_timer = GetGlobalTimeNs().count();
178 } 206 }
179 207
180 is_global_timer_sane = false;
181
182 // Still events left (scheduled in the future)
183 if (!event_queue.empty()) { 208 if (!event_queue.empty()) {
184 const s64 needed_ticks = 209 const s64 next_time = event_queue.front().time - global_timer;
185 std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); 210 return next_time;
186 const auto next_core = NextAvailableCore(needed_ticks); 211 } else {
187 if (next_core) { 212 return std::nullopt;
188 downcounts[*next_core] = needed_ticks;
189 }
190 } 213 }
191
192 accumulated_ticks = 0;
193
194 downcounts[current_context] = time_slice[current_context];
195} 214}
196 215
197void CoreTiming::ResetRun() { 216void CoreTiming::ThreadLoop() {
198 downcounts.fill(MAX_SLICE_LENGTH); 217 has_started = true;
199 time_slice.fill(MAX_SLICE_LENGTH); 218 while (!shutting_down) {
200 current_context = 0; 219 while (!paused) {
201 // Still events left (scheduled in the future) 220 paused_set = false;
202 if (!event_queue.empty()) { 221 const auto next_time = Advance();
203 const s64 needed_ticks = 222 if (next_time) {
204 std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); 223 if (*next_time > 0) {
205 downcounts[current_context] = needed_ticks; 224 std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
225 event.WaitFor(next_time_ns);
226 }
227 } else {
228 wait_set = true;
229 event.Wait();
230 }
231 wait_set = false;
232 }
233 paused_set = true;
234 clock->Pause(true);
235 pause_event.Wait();
236 clock->Pause(false);
206 } 237 }
207
208 is_global_timer_sane = false;
209 accumulated_ticks = 0;
210} 238}
211 239
212void CoreTiming::Idle() { 240std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
213 accumulated_ticks += downcounts[current_context]; 241 if (is_multicore) {
214 idled_cycles += downcounts[current_context]; 242 return clock->GetTimeNS();
215 downcounts[current_context] = 0; 243 }
244 return CyclesToNs(ticks);
216} 245}
217 246
218std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { 247std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
219 return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; 248 if (is_multicore) {
220} 249 return clock->GetTimeUS();
221 250 }
222s64 CoreTiming::GetDowncount() const { 251 return CyclesToUs(ticks);
223 return downcounts[current_context];
224} 252}
225 253
226} // namespace Core::Timing 254} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index d50f4eb8a..72faaab64 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -1,19 +1,25 @@
1// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2+ 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <chrono> 8#include <chrono>
8#include <functional> 9#include <functional>
9#include <memory> 10#include <memory>
10#include <mutex> 11#include <mutex>
11#include <optional> 12#include <optional>
12#include <string> 13#include <string>
14#include <thread>
13#include <vector> 15#include <vector>
14 16
15#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/spin_lock.h"
19#include "common/thread.h"
16#include "common/threadsafe_queue.h" 20#include "common/threadsafe_queue.h"
21#include "common/wall_clock.h"
22#include "core/hardware_properties.h"
17 23
18namespace Core::Timing { 24namespace Core::Timing {
19 25
@@ -56,16 +62,40 @@ public:
56 62
57 /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is 63 /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
58 /// required to end slice - 1 and start slice 0 before the first cycle of code is executed. 64 /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
59 void Initialize(); 65 void Initialize(std::function<void(void)>&& on_thread_init_);
60 66
61 /// Tears down all timing related functionality. 67 /// Tears down all timing related functionality.
62 void Shutdown(); 68 void Shutdown();
63 69
64 /// After the first Advance, the slice lengths and the downcount will be reduced whenever an 70 /// Sets if emulation is multicore or single core, must be set before Initialize
65 /// event is scheduled earlier than the current values. 71 void SetMulticore(bool is_multicore) {
66 /// 72 this->is_multicore = is_multicore;
67 /// Scheduling from a callback will not update the downcount until the Advance() completes. 73 }
68 void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, 74
75 /// Check if it's using host timing.
76 bool IsHostTiming() const {
77 return is_multicore;
78 }
79
80 /// Pauses/Unpauses the execution of the timer thread.
81 void Pause(bool is_paused);
82
83 /// Pauses/Unpauses the execution of the timer thread and waits until paused.
84 void SyncPause(bool is_paused);
85
86 /// Checks if core timing is running.
87 bool IsRunning() const;
88
89 /// Checks if the timer thread has started.
90 bool HasStarted() const {
91 return has_started;
92 }
93
94 /// Checks if there are any pending time events.
95 bool HasPendingEvents() const;
96
97 /// Schedules an event in core timing
98 void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
69 u64 userdata = 0); 99 u64 userdata = 0);
70 100
71 void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata); 101 void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
@@ -73,41 +103,30 @@ public:
73 /// We only permit one event of each type in the queue at a time. 103 /// We only permit one event of each type in the queue at a time.
74 void RemoveEvent(const std::shared_ptr<EventType>& event_type); 104 void RemoveEvent(const std::shared_ptr<EventType>& event_type);
75 105
76 void ForceExceptionCheck(s64 cycles);
77
78 /// This should only be called from the emu thread, if you are calling it any other thread,
79 /// you are doing something evil
80 u64 GetTicks() const;
81
82 u64 GetIdleTicks() const;
83
84 void AddTicks(u64 ticks); 106 void AddTicks(u64 ticks);
85 107
86 /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends 108 void ResetTicks();
87 /// the previous timing slice and begins the next one, you must Advance from the previous
88 /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
89 /// Advance() is required to initialize the slice length before the first cycle of emulated
90 /// instructions is executed.
91 void Advance();
92 109
93 /// Pretend that the main CPU has executed enough cycles to reach the next event.
94 void Idle(); 110 void Idle();
95 111
96 std::chrono::microseconds GetGlobalTimeUs() const; 112 s64 GetDowncount() const {
113 return downcount;
114 }
97 115
98 void ResetRun(); 116 /// Returns current time in emulated CPU cycles
117 u64 GetCPUTicks() const;
99 118
100 s64 GetDowncount() const; 119 /// Returns current time in emulated in Clock cycles
120 u64 GetClockTicks() const;
101 121
102 void SwitchContext(u64 new_context) { 122 /// Returns current time in microseconds.
103 current_context = new_context; 123 std::chrono::microseconds GetGlobalTimeUs() const;
104 }
105 124
106 bool CanCurrentContextRun() const { 125 /// Returns current time in nanoseconds.
107 return time_slice[current_context] > 0; 126 std::chrono::nanoseconds GetGlobalTimeNs() const;
108 }
109 127
110 std::optional<u64> NextAvailableCore(const s64 needed_ticks) const; 128 /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
129 std::optional<s64> Advance();
111 130
112private: 131private:
113 struct Event; 132 struct Event;
@@ -115,21 +134,14 @@ private:
115 /// Clear all pending events. This should ONLY be done on exit. 134 /// Clear all pending events. This should ONLY be done on exit.
116 void ClearPendingEvents(); 135 void ClearPendingEvents();
117 136
118 static constexpr u64 num_cpu_cores = 4; 137 static void ThreadEntry(CoreTiming& instance);
138 void ThreadLoop();
119 139
120 s64 global_timer = 0; 140 std::unique_ptr<Common::WallClock> clock;
121 s64 idled_cycles = 0;
122 s64 slice_length = 0;
123 u64 accumulated_ticks = 0;
124 std::array<s64, num_cpu_cores> downcounts{};
125 // Slice of time assigned to each core per run.
126 std::array<s64, num_cpu_cores> time_slice{};
127 u64 current_context = 0;
128 141
129 // Are we in a function that has been called from Advance() 142 u64 global_timer = 0;
130 // If events are scheduled from a function that gets called from Advance(), 143
131 // don't change slice_length and downcount. 144 std::chrono::nanoseconds start_point;
132 bool is_global_timer_sane = false;
133 145
134 // The queue is a min-heap using std::make_heap/push_heap/pop_heap. 146 // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
135 // We don't use std::priority_queue because we need to be able to serialize, unserialize and 147 // We don't use std::priority_queue because we need to be able to serialize, unserialize and
@@ -139,8 +151,23 @@ private:
139 u64 event_fifo_id = 0; 151 u64 event_fifo_id = 0;
140 152
141 std::shared_ptr<EventType> ev_lost; 153 std::shared_ptr<EventType> ev_lost;
142 154 Common::Event event{};
143 std::mutex inner_mutex; 155 Common::Event pause_event{};
156 Common::SpinLock basic_lock{};
157 Common::SpinLock advance_lock{};
158 std::unique_ptr<std::thread> timer_thread;
159 std::atomic<bool> paused{};
160 std::atomic<bool> paused_set{};
161 std::atomic<bool> wait_set{};
162 std::atomic<bool> shutting_down{};
163 std::atomic<bool> has_started{};
164 std::function<void(void)> on_thread_init{};
165
166 bool is_multicore{};
167
168 /// Cycle timing
169 u64 ticks{};
170 s64 downcount{};
144}; 171};
145 172
146/// Creates a core timing event with the given name and callback. 173/// Creates a core timing event with the given name and callback.
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index de50d3b14..aefc63663 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -38,15 +38,23 @@ s64 usToCycles(std::chrono::microseconds us) {
38} 38}
39 39
40s64 nsToCycles(std::chrono::nanoseconds ns) { 40s64 nsToCycles(std::chrono::nanoseconds ns) {
41 if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) { 41 const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
42 LOG_ERROR(Core_Timing, "Integer overflow, use max value"); 42 return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
43 return std::numeric_limits<s64>::max(); 43}
44 } 44
45 if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) { 45u64 msToClockCycles(std::chrono::milliseconds ns) {
46 LOG_DEBUG(Core_Timing, "Time very big, do rounding"); 46 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
47 return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000); 47 return Common::Divide128On32(temp, 1000).first;
48 } 48}
49 return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; 49
50u64 usToClockCycles(std::chrono::microseconds ns) {
51 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
52 return Common::Divide128On32(temp, 1000000).first;
53}
54
55u64 nsToClockCycles(std::chrono::nanoseconds ns) {
56 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
57 return Common::Divide128On32(temp, 1000000000).first;
50} 58}
51 59
52u64 CpuCyclesToClockCycles(u64 ticks) { 60u64 CpuCyclesToClockCycles(u64 ticks) {
@@ -54,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) {
54 return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; 62 return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
55} 63}
56 64
65std::chrono::milliseconds CyclesToMs(s64 cycles) {
66 const u128 temporal = Common::Multiply64Into128(cycles, 1000);
67 u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
68 return std::chrono::milliseconds(ms);
69}
70
71std::chrono::nanoseconds CyclesToNs(s64 cycles) {
72 const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
73 u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
74 return std::chrono::nanoseconds(ns);
75}
76
77std::chrono::microseconds CyclesToUs(s64 cycles) {
78 const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
79 u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
80 return std::chrono::microseconds(us);
81}
82
57} // namespace Core::Timing 83} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index addc72b19..2ed979e14 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,18 +13,12 @@ namespace Core::Timing {
13s64 msToCycles(std::chrono::milliseconds ms); 13s64 msToCycles(std::chrono::milliseconds ms);
14s64 usToCycles(std::chrono::microseconds us); 14s64 usToCycles(std::chrono::microseconds us);
15s64 nsToCycles(std::chrono::nanoseconds ns); 15s64 nsToCycles(std::chrono::nanoseconds ns);
16 16u64 msToClockCycles(std::chrono::milliseconds ns);
17inline std::chrono::milliseconds CyclesToMs(s64 cycles) { 17u64 usToClockCycles(std::chrono::microseconds ns);
18 return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); 18u64 nsToClockCycles(std::chrono::nanoseconds ns);
19} 19std::chrono::milliseconds CyclesToMs(s64 cycles);
20 20std::chrono::nanoseconds CyclesToNs(s64 cycles);
21inline std::chrono::nanoseconds CyclesToNs(s64 cycles) { 21std::chrono::microseconds CyclesToUs(s64 cycles);
22 return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
23}
24
25inline std::chrono::microseconds CyclesToUs(s64 cycles) {
26 return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
27}
28 22
29u64 CpuCyclesToClockCycles(u64 ticks); 23u64 CpuCyclesToClockCycles(u64 ticks);
30 24
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 70ddbdcca..32afcf3ae 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -2,80 +2,372 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/fiber.h"
6#include "common/microprofile.h"
7#include "common/thread.h"
5#include "core/arm/exclusive_monitor.h" 8#include "core/arm/exclusive_monitor.h"
6#include "core/core.h" 9#include "core/core.h"
7#include "core/core_manager.h"
8#include "core/core_timing.h" 10#include "core/core_timing.h"
9#include "core/cpu_manager.h" 11#include "core/cpu_manager.h"
10#include "core/gdbstub/gdbstub.h" 12#include "core/gdbstub/gdbstub.h"
13#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/physical_core.h"
15#include "core/hle/kernel/scheduler.h"
16#include "core/hle/kernel/thread.h"
17#include "video_core/gpu.h"
11 18
12namespace Core { 19namespace Core {
13 20
14CpuManager::CpuManager(System& system) : system{system} {} 21CpuManager::CpuManager(System& system) : system{system} {}
15CpuManager::~CpuManager() = default; 22CpuManager::~CpuManager() = default;
16 23
24void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
25 cpu_manager.RunThread(core);
26}
27
17void CpuManager::Initialize() { 28void CpuManager::Initialize() {
18 for (std::size_t index = 0; index < core_managers.size(); ++index) { 29 running_mode = true;
19 core_managers[index] = std::make_unique<CoreManager>(system, index); 30 if (is_multicore) {
31 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
32 core_data[core].host_thread =
33 std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
34 }
35 } else {
36 core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
20 } 37 }
21} 38}
22 39
23void CpuManager::Shutdown() { 40void CpuManager::Shutdown() {
24 for (auto& cpu_core : core_managers) { 41 running_mode = false;
25 cpu_core.reset(); 42 Pause(false);
43 if (is_multicore) {
44 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
45 core_data[core].host_thread->join();
46 core_data[core].host_thread.reset();
47 }
48 } else {
49 core_data[0].host_thread->join();
50 core_data[0].host_thread.reset();
26 } 51 }
27} 52}
28 53
29CoreManager& CpuManager::GetCoreManager(std::size_t index) { 54std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
30 return *core_managers.at(index); 55 return std::function<void(void*)>(GuestThreadFunction);
31} 56}
32 57
33const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { 58std::function<void(void*)> CpuManager::GetIdleThreadStartFunc() {
34 return *core_managers.at(index); 59 return std::function<void(void*)>(IdleThreadFunction);
35} 60}
36 61
37CoreManager& CpuManager::GetCurrentCoreManager() { 62std::function<void(void*)> CpuManager::GetSuspendThreadStartFunc() {
38 // Otherwise, use single-threaded mode active_core variable 63 return std::function<void(void*)>(SuspendThreadFunction);
39 return *core_managers[active_core];
40} 64}
41 65
42const CoreManager& CpuManager::GetCurrentCoreManager() const { 66void CpuManager::GuestThreadFunction(void* cpu_manager_) {
43 // Otherwise, use single-threaded mode active_core variable 67 CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
44 return *core_managers[active_core]; 68 if (cpu_manager->is_multicore) {
69 cpu_manager->MultiCoreRunGuestThread();
70 } else {
71 cpu_manager->SingleCoreRunGuestThread();
72 }
45} 73}
46 74
47void CpuManager::RunLoop(bool tight_loop) { 75void CpuManager::GuestRewindFunction(void* cpu_manager_) {
48 if (GDBStub::IsServerEnabled()) { 76 CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
49 GDBStub::HandlePacket(); 77 if (cpu_manager->is_multicore) {
78 cpu_manager->MultiCoreRunGuestLoop();
79 } else {
80 cpu_manager->SingleCoreRunGuestLoop();
81 }
82}
50 83
51 // If the loop is halted and we want to step, use a tiny (1) number of instructions to 84void CpuManager::IdleThreadFunction(void* cpu_manager_) {
52 // execute. Otherwise, get out of the loop function. 85 CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
53 if (GDBStub::GetCpuHaltFlag()) { 86 if (cpu_manager->is_multicore) {
54 if (GDBStub::GetCpuStepFlag()) { 87 cpu_manager->MultiCoreRunIdleThread();
55 tight_loop = false; 88 } else {
56 } else { 89 cpu_manager->SingleCoreRunIdleThread();
57 return; 90 }
91}
92
93void CpuManager::SuspendThreadFunction(void* cpu_manager_) {
94 CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
95 if (cpu_manager->is_multicore) {
96 cpu_manager->MultiCoreRunSuspendThread();
97 } else {
98 cpu_manager->SingleCoreRunSuspendThread();
99 }
100}
101
102void* CpuManager::GetStartFuncParamater() {
103 return static_cast<void*>(this);
104}
105
106///////////////////////////////////////////////////////////////////////////////
107/// MultiCore ///
108///////////////////////////////////////////////////////////////////////////////
109
110void CpuManager::MultiCoreRunGuestThread() {
111 auto& kernel = system.Kernel();
112 {
113 auto& sched = kernel.CurrentScheduler();
114 sched.OnThreadStart();
115 }
116 MultiCoreRunGuestLoop();
117}
118
119void CpuManager::MultiCoreRunGuestLoop() {
120 auto& kernel = system.Kernel();
121 auto* thread = kernel.CurrentScheduler().GetCurrentThread();
122 while (true) {
123 auto* physical_core = &kernel.CurrentPhysicalCore();
124 auto& arm_interface = thread->ArmInterface();
125 system.EnterDynarmicProfile();
126 while (!physical_core->IsInterrupted()) {
127 arm_interface.Run();
128 physical_core = &kernel.CurrentPhysicalCore();
129 }
130 system.ExitDynarmicProfile();
131 arm_interface.ClearExclusiveState();
132 auto& scheduler = kernel.CurrentScheduler();
133 scheduler.TryDoContextSwitch();
134 }
135}
136
137void CpuManager::MultiCoreRunIdleThread() {
138 auto& kernel = system.Kernel();
139 while (true) {
140 auto& physical_core = kernel.CurrentPhysicalCore();
141 physical_core.Idle();
142 auto& scheduler = kernel.CurrentScheduler();
143 scheduler.TryDoContextSwitch();
144 }
145}
146
147void CpuManager::MultiCoreRunSuspendThread() {
148 auto& kernel = system.Kernel();
149 {
150 auto& sched = kernel.CurrentScheduler();
151 sched.OnThreadStart();
152 }
153 while (true) {
154 auto core = kernel.GetCurrentHostThreadID();
155 auto& scheduler = kernel.CurrentScheduler();
156 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
157 Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context);
158 ASSERT(scheduler.ContextSwitchPending());
159 ASSERT(core == kernel.GetCurrentHostThreadID());
160 scheduler.TryDoContextSwitch();
161 }
162}
163
164void CpuManager::MultiCorePause(bool paused) {
165 if (!paused) {
166 bool all_not_barrier = false;
167 while (!all_not_barrier) {
168 all_not_barrier = true;
169 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
170 all_not_barrier &=
171 !core_data[core].is_running.load() && core_data[core].initialized.load();
172 }
173 }
174 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
175 core_data[core].enter_barrier->Set();
176 }
177 if (paused_state.load()) {
178 bool all_barrier = false;
179 while (!all_barrier) {
180 all_barrier = true;
181 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
182 all_barrier &=
183 core_data[core].is_paused.load() && core_data[core].initialized.load();
184 }
185 }
186 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
187 core_data[core].exit_barrier->Set();
188 }
189 }
190 } else {
191 /// Wait until all cores are paused.
192 bool all_barrier = false;
193 while (!all_barrier) {
194 all_barrier = true;
195 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
196 all_barrier &=
197 core_data[core].is_paused.load() && core_data[core].initialized.load();
58 } 198 }
59 } 199 }
200 /// Don't release the barrier
60 } 201 }
202 paused_state = paused;
203}
204
205///////////////////////////////////////////////////////////////////////////////
206/// SingleCore ///
207///////////////////////////////////////////////////////////////////////////////
61 208
62 auto& core_timing = system.CoreTiming(); 209void CpuManager::SingleCoreRunGuestThread() {
63 core_timing.ResetRun(); 210 auto& kernel = system.Kernel();
64 bool keep_running{}; 211 {
65 do { 212 auto& sched = kernel.CurrentScheduler();
66 keep_running = false; 213 sched.OnThreadStart();
67 for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { 214 }
68 core_timing.SwitchContext(active_core); 215 SingleCoreRunGuestLoop();
69 if (core_timing.CanCurrentContextRun()) { 216}
70 core_managers[active_core]->RunLoop(tight_loop); 217
218void CpuManager::SingleCoreRunGuestLoop() {
219 auto& kernel = system.Kernel();
220 auto* thread = kernel.CurrentScheduler().GetCurrentThread();
221 while (true) {
222 auto* physical_core = &kernel.CurrentPhysicalCore();
223 auto& arm_interface = thread->ArmInterface();
224 system.EnterDynarmicProfile();
225 if (!physical_core->IsInterrupted()) {
226 arm_interface.Run();
227 physical_core = &kernel.CurrentPhysicalCore();
228 }
229 system.ExitDynarmicProfile();
230 thread->SetPhantomMode(true);
231 system.CoreTiming().Advance();
232 thread->SetPhantomMode(false);
233 arm_interface.ClearExclusiveState();
234 PreemptSingleCore();
235 auto& scheduler = kernel.Scheduler(current_core);
236 scheduler.TryDoContextSwitch();
237 }
238}
239
240void CpuManager::SingleCoreRunIdleThread() {
241 auto& kernel = system.Kernel();
242 while (true) {
243 auto& physical_core = kernel.CurrentPhysicalCore();
244 PreemptSingleCore(false);
245 system.CoreTiming().AddTicks(1000U);
246 idle_count++;
247 auto& scheduler = physical_core.Scheduler();
248 scheduler.TryDoContextSwitch();
249 }
250}
251
252void CpuManager::SingleCoreRunSuspendThread() {
253 auto& kernel = system.Kernel();
254 {
255 auto& sched = kernel.CurrentScheduler();
256 sched.OnThreadStart();
257 }
258 while (true) {
259 auto core = kernel.GetCurrentHostThreadID();
260 auto& scheduler = kernel.CurrentScheduler();
261 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
262 Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context);
263 ASSERT(scheduler.ContextSwitchPending());
264 ASSERT(core == kernel.GetCurrentHostThreadID());
265 scheduler.TryDoContextSwitch();
266 }
267}
268
269void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
270 std::size_t old_core = current_core;
271 auto& scheduler = system.Kernel().Scheduler(old_core);
272 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
273 if (idle_count >= 4 || from_running_enviroment) {
274 if (!from_running_enviroment) {
275 system.CoreTiming().Idle();
276 idle_count = 0;
277 }
278 current_thread->SetPhantomMode(true);
279 system.CoreTiming().Advance();
280 current_thread->SetPhantomMode(false);
281 }
282 current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
283 system.CoreTiming().ResetTicks();
284 scheduler.Unload();
285 auto& next_scheduler = system.Kernel().Scheduler(current_core);
286 Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());
287 /// May have changed scheduler
288 auto& current_scheduler = system.Kernel().Scheduler(current_core);
289 current_scheduler.Reload();
290 auto* currrent_thread2 = current_scheduler.GetCurrentThread();
291 if (!currrent_thread2->IsIdleThread()) {
292 idle_count = 0;
293 }
294}
295
296void CpuManager::SingleCorePause(bool paused) {
297 if (!paused) {
298 bool all_not_barrier = false;
299 while (!all_not_barrier) {
300 all_not_barrier = !core_data[0].is_running.load() && core_data[0].initialized.load();
301 }
302 core_data[0].enter_barrier->Set();
303 if (paused_state.load()) {
304 bool all_barrier = false;
305 while (!all_barrier) {
306 all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();
71 } 307 }
72 keep_running |= core_timing.CanCurrentContextRun(); 308 core_data[0].exit_barrier->Set();
73 } 309 }
74 } while (keep_running); 310 } else {
311 /// Wait until all cores are paused.
312 bool all_barrier = false;
313 while (!all_barrier) {
314 all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();
315 }
316 /// Don't release the barrier
317 }
318 paused_state = paused;
319}
320
321void CpuManager::Pause(bool paused) {
322 if (is_multicore) {
323 MultiCorePause(paused);
324 } else {
325 SingleCorePause(paused);
326 }
327}
75 328
76 if (GDBStub::IsServerEnabled()) { 329void CpuManager::RunThread(std::size_t core) {
77 GDBStub::SetCpuStepFlag(false); 330 /// Initialization
331 system.RegisterCoreThread(core);
332 std::string name;
333 if (is_multicore) {
334 name = "yuzu:CoreCPUThread_" + std::to_string(core);
335 } else {
336 name = "yuzu:CPUThread";
337 }
338 MicroProfileOnThreadCreate(name.c_str());
339 Common::SetCurrentThreadName(name.c_str());
340 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
341 auto& data = core_data[core];
342 data.enter_barrier = std::make_unique<Common::Event>();
343 data.exit_barrier = std::make_unique<Common::Event>();
344 data.host_context = Common::Fiber::ThreadToFiber();
345 data.is_running = false;
346 data.initialized = true;
347 const bool sc_sync = !is_async_gpu && !is_multicore;
348 bool sc_sync_first_use = sc_sync;
349 /// Running
350 while (running_mode) {
351 data.is_running = false;
352 data.enter_barrier->Wait();
353 if (sc_sync_first_use) {
354 system.GPU().ObtainContext();
355 sc_sync_first_use = false;
356 }
357 auto& scheduler = system.Kernel().CurrentScheduler();
358 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
359 data.is_running = true;
360 Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext());
361 data.is_running = false;
362 data.is_paused = true;
363 data.exit_barrier->Wait();
364 data.is_paused = false;
78 } 365 }
366 /// Time to cleanup
367 data.host_context->Exit();
368 data.enter_barrier.reset();
369 data.exit_barrier.reset();
370 data.initialized = false;
79} 371}
80 372
81} // namespace Core 373} // namespace Core
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 97554d1bb..35929ed94 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -5,12 +5,19 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <functional>
8#include <memory> 10#include <memory>
11#include <thread>
9#include "core/hardware_properties.h" 12#include "core/hardware_properties.h"
10 13
14namespace Common {
15class Event;
16class Fiber;
17} // namespace Common
18
11namespace Core { 19namespace Core {
12 20
13class CoreManager;
14class System; 21class System;
15 22
16class CpuManager { 23class CpuManager {
@@ -24,24 +31,75 @@ public:
24 CpuManager& operator=(const CpuManager&) = delete; 31 CpuManager& operator=(const CpuManager&) = delete;
25 CpuManager& operator=(CpuManager&&) = delete; 32 CpuManager& operator=(CpuManager&&) = delete;
26 33
34 /// Sets if emulation is multicore or single core, must be set before Initialize
35 void SetMulticore(bool is_multicore) {
36 this->is_multicore = is_multicore;
37 }
38
39 /// Sets if emulation is using an asynchronous GPU.
40 void SetAsyncGpu(bool is_async_gpu) {
41 this->is_async_gpu = is_async_gpu;
42 }
43
27 void Initialize(); 44 void Initialize();
28 void Shutdown(); 45 void Shutdown();
29 46
30 CoreManager& GetCoreManager(std::size_t index); 47 void Pause(bool paused);
31 const CoreManager& GetCoreManager(std::size_t index) const;
32 48
33 CoreManager& GetCurrentCoreManager(); 49 std::function<void(void*)> GetGuestThreadStartFunc();
34 const CoreManager& GetCurrentCoreManager() const; 50 std::function<void(void*)> GetIdleThreadStartFunc();
51 std::function<void(void*)> GetSuspendThreadStartFunc();
52 void* GetStartFuncParamater();
35 53
36 std::size_t GetActiveCoreIndex() const { 54 void PreemptSingleCore(bool from_running_enviroment = true);
37 return active_core;
38 }
39 55
40 void RunLoop(bool tight_loop); 56 std::size_t CurrentCore() const {
57 return current_core.load();
58 }
41 59
42private: 60private:
43 std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers; 61 static void GuestThreadFunction(void* cpu_manager);
44 std::size_t active_core{}; ///< Active core, only used in single thread mode 62 static void GuestRewindFunction(void* cpu_manager);
63 static void IdleThreadFunction(void* cpu_manager);
64 static void SuspendThreadFunction(void* cpu_manager);
65
66 void MultiCoreRunGuestThread();
67 void MultiCoreRunGuestLoop();
68 void MultiCoreRunIdleThread();
69 void MultiCoreRunSuspendThread();
70 void MultiCorePause(bool paused);
71
72 void SingleCoreRunGuestThread();
73 void SingleCoreRunGuestLoop();
74 void SingleCoreRunIdleThread();
75 void SingleCoreRunSuspendThread();
76 void SingleCorePause(bool paused);
77
78 static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
79
80 void RunThread(std::size_t core);
81
82 struct CoreData {
83 std::shared_ptr<Common::Fiber> host_context;
84 std::unique_ptr<Common::Event> enter_barrier;
85 std::unique_ptr<Common::Event> exit_barrier;
86 std::atomic<bool> is_running;
87 std::atomic<bool> is_paused;
88 std::atomic<bool> initialized;
89 std::unique_ptr<std::thread> host_thread;
90 };
91
92 std::atomic<bool> running_mode{};
93 std::atomic<bool> paused_state{};
94
95 std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
96
97 bool is_async_gpu{};
98 bool is_multicore{};
99 std::atomic<std::size_t> current_core{};
100 std::size_t preemption_count{};
101 std::size_t idle_count{};
102 static constexpr std::size_t max_cycle_runs = 5;
45 103
46 System& system; 104 System& system;
47}; 105};
diff --git a/src/core/file_sys/system_archive/mii_model.cpp b/src/core/file_sys/system_archive/mii_model.cpp
index 6a9add87c..61bb67945 100644
--- a/src/core/file_sys/system_archive/mii_model.cpp
+++ b/src/core/file_sys/system_archive/mii_model.cpp
@@ -40,7 +40,7 @@ VirtualDir MiiModel() {
40 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>( 40 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
41 MiiModelData::SHAPE_MID, "ShapeMid.dat")); 41 MiiModelData::SHAPE_MID, "ShapeMid.dat"));
42 42
43 return std::move(out); 43 return out;
44} 44}
45 45
46} // namespace FileSys::SystemArchive 46} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/shared_font.cpp b/src/core/file_sys/system_archive/shared_font.cpp
index 2c05eb42e..c5cdf7d9b 100644
--- a/src/core/file_sys/system_archive/shared_font.cpp
+++ b/src/core/file_sys/system_archive/shared_font.cpp
@@ -23,7 +23,7 @@ VirtualFile PackBFTTF(const std::array<u8, Size>& data, const std::string& name)
23 23
24 std::vector<u8> bfttf(Size + sizeof(u64)); 24 std::vector<u8> bfttf(Size + sizeof(u64));
25 25
26 u64 offset = 0; 26 size_t offset = 0;
27 Service::NS::EncryptSharedFont(vec, bfttf, offset); 27 Service::NS::EncryptSharedFont(vec, bfttf, offset);
28 return std::make_shared<VectorVfsFile>(std::move(bfttf), name); 28 return std::make_shared<VectorVfsFile>(std::move(bfttf), name);
29} 29}
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 70c0f8b80..79f22a403 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -35,7 +35,6 @@
35#include "common/swap.h" 35#include "common/swap.h"
36#include "core/arm/arm_interface.h" 36#include "core/arm/arm_interface.h"
37#include "core/core.h" 37#include "core/core.h"
38#include "core/core_manager.h"
39#include "core/gdbstub/gdbstub.h" 38#include "core/gdbstub/gdbstub.h"
40#include "core/hle/kernel/memory/page_table.h" 39#include "core/hle/kernel/memory/page_table.h"
41#include "core/hle/kernel/process.h" 40#include "core/hle/kernel/process.h"
diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
index b04e046ed..456b41e1b 100644
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -42,6 +42,10 @@ struct EmuThreadHandle {
42 constexpr u32 invalid_handle = 0xFFFFFFFF; 42 constexpr u32 invalid_handle = 0xFFFFFFFF;
43 return {invalid_handle, invalid_handle}; 43 return {invalid_handle, invalid_handle};
44 } 44 }
45
46 bool IsInvalid() const {
47 return (*this) == InvalidHandle();
48 }
45}; 49};
46 50
47} // namespace Core 51} // namespace Core
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 8475b698c..4d2a9b35d 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -7,11 +7,15 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/arm/exclusive_monitor.h"
10#include "core/core.h" 11#include "core/core.h"
11#include "core/hle/kernel/address_arbiter.h" 12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/handle_table.h"
15#include "core/hle/kernel/kernel.h"
13#include "core/hle/kernel/scheduler.h" 16#include "core/hle/kernel/scheduler.h"
14#include "core/hle/kernel/thread.h" 17#include "core/hle/kernel/thread.h"
18#include "core/hle/kernel/time_manager.h"
15#include "core/hle/result.h" 19#include "core/hle/result.h"
16#include "core/memory.h" 20#include "core/memory.h"
17 21
@@ -20,6 +24,7 @@ namespace Kernel {
20// Wake up num_to_wake (or all) threads in a vector. 24// Wake up num_to_wake (or all) threads in a vector.
21void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, 25void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
22 s32 num_to_wake) { 26 s32 num_to_wake) {
27 auto& time_manager = system.Kernel().TimeManager();
23 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 28 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
24 // them all. 29 // them all.
25 std::size_t last = waiting_threads.size(); 30 std::size_t last = waiting_threads.size();
@@ -29,12 +34,10 @@ void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& wai
29 34
30 // Signal the waiting threads. 35 // Signal the waiting threads.
31 for (std::size_t i = 0; i < last; i++) { 36 for (std::size_t i = 0; i < last; i++) {
32 ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb); 37 waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
33 waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
34 RemoveThread(waiting_threads[i]); 38 RemoveThread(waiting_threads[i]);
35 waiting_threads[i]->SetArbiterWaitAddress(0); 39 waiting_threads[i]->WaitForArbitration(false);
36 waiting_threads[i]->ResumeFromWait(); 40 waiting_threads[i]->ResumeFromWait();
37 system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
38 } 41 }
39} 42}
40 43
@@ -56,6 +59,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v
56} 59}
57 60
58ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { 61ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
62 SchedulerLock lock(system.Kernel());
59 const std::vector<std::shared_ptr<Thread>> waiting_threads = 63 const std::vector<std::shared_ptr<Thread>> waiting_threads =
60 GetThreadsWaitingOnAddress(address); 64 GetThreadsWaitingOnAddress(address);
61 WakeThreads(waiting_threads, num_to_wake); 65 WakeThreads(waiting_threads, num_to_wake);
@@ -64,6 +68,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
64 68
65ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, 69ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
66 s32 num_to_wake) { 70 s32 num_to_wake) {
71 SchedulerLock lock(system.Kernel());
67 auto& memory = system.Memory(); 72 auto& memory = system.Memory();
68 73
69 // Ensure that we can write to the address. 74 // Ensure that we can write to the address.
@@ -71,16 +76,24 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32
71 return ERR_INVALID_ADDRESS_STATE; 76 return ERR_INVALID_ADDRESS_STATE;
72 } 77 }
73 78
74 if (static_cast<s32>(memory.Read32(address)) != value) { 79 const std::size_t current_core = system.CurrentCoreIndex();
75 return ERR_INVALID_STATE; 80 auto& monitor = system.Monitor();
76 } 81 u32 current_value;
82 do {
83 current_value = monitor.ExclusiveRead32(current_core, address);
84
85 if (current_value != value) {
86 return ERR_INVALID_STATE;
87 }
88 current_value++;
89 } while (!monitor.ExclusiveWrite32(current_core, address, current_value));
77 90
78 memory.Write32(address, static_cast<u32>(value + 1));
79 return SignalToAddressOnly(address, num_to_wake); 91 return SignalToAddressOnly(address, num_to_wake);
80} 92}
81 93
82ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, 94ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
83 s32 num_to_wake) { 95 s32 num_to_wake) {
96 SchedulerLock lock(system.Kernel());
84 auto& memory = system.Memory(); 97 auto& memory = system.Memory();
85 98
86 // Ensure that we can write to the address. 99 // Ensure that we can write to the address.
@@ -92,29 +105,33 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
92 const std::vector<std::shared_ptr<Thread>> waiting_threads = 105 const std::vector<std::shared_ptr<Thread>> waiting_threads =
93 GetThreadsWaitingOnAddress(address); 106 GetThreadsWaitingOnAddress(address);
94 107
95 // Determine the modified value depending on the waiting count. 108 const std::size_t current_core = system.CurrentCoreIndex();
109 auto& monitor = system.Monitor();
96 s32 updated_value; 110 s32 updated_value;
97 if (num_to_wake <= 0) { 111 do {
98 if (waiting_threads.empty()) { 112 updated_value = monitor.ExclusiveRead32(current_core, address);
99 updated_value = value + 1; 113
100 } else { 114 if (updated_value != value) {
101 updated_value = value - 1; 115 return ERR_INVALID_STATE;
102 } 116 }
103 } else { 117 // Determine the modified value depending on the waiting count.
104 if (waiting_threads.empty()) { 118 if (num_to_wake <= 0) {
105 updated_value = value + 1; 119 if (waiting_threads.empty()) {
106 } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) { 120 updated_value = value + 1;
107 updated_value = value - 1; 121 } else {
122 updated_value = value - 1;
123 }
108 } else { 124 } else {
109 updated_value = value; 125 if (waiting_threads.empty()) {
126 updated_value = value + 1;
127 } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
128 updated_value = value - 1;
129 } else {
130 updated_value = value;
131 }
110 } 132 }
111 } 133 } while (!monitor.ExclusiveWrite32(current_core, address, updated_value));
112 134
113 if (static_cast<s32>(memory.Read32(address)) != value) {
114 return ERR_INVALID_STATE;
115 }
116
117 memory.Write32(address, static_cast<u32>(updated_value));
118 WakeThreads(waiting_threads, num_to_wake); 135 WakeThreads(waiting_threads, num_to_wake);
119 return RESULT_SUCCESS; 136 return RESULT_SUCCESS;
120} 137}
@@ -136,60 +153,127 @@ ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s
136ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, 153ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
137 bool should_decrement) { 154 bool should_decrement) {
138 auto& memory = system.Memory(); 155 auto& memory = system.Memory();
156 auto& kernel = system.Kernel();
157 Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
139 158
140 // Ensure that we can read the address. 159 Handle event_handle = InvalidHandle;
141 if (!memory.IsValidVirtualAddress(address)) { 160 {
142 return ERR_INVALID_ADDRESS_STATE; 161 SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
143 } 162
163 if (current_thread->IsPendingTermination()) {
164 lock.CancelSleep();
165 return ERR_THREAD_TERMINATING;
166 }
167
168 // Ensure that we can read the address.
169 if (!memory.IsValidVirtualAddress(address)) {
170 lock.CancelSleep();
171 return ERR_INVALID_ADDRESS_STATE;
172 }
173
174 s32 current_value = static_cast<s32>(memory.Read32(address));
175 if (current_value >= value) {
176 lock.CancelSleep();
177 return ERR_INVALID_STATE;
178 }
179
180 current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
181
182 s32 decrement_value;
183
184 const std::size_t current_core = system.CurrentCoreIndex();
185 auto& monitor = system.Monitor();
186 do {
187 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
188 if (should_decrement) {
189 decrement_value = current_value - 1;
190 } else {
191 decrement_value = current_value;
192 }
193 } while (
194 !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value)));
195
196 // Short-circuit without rescheduling, if timeout is zero.
197 if (timeout == 0) {
198 lock.CancelSleep();
199 return RESULT_TIMEOUT;
200 }
144 201
145 const s32 cur_value = static_cast<s32>(memory.Read32(address)); 202 current_thread->SetArbiterWaitAddress(address);
146 if (cur_value >= value) { 203 InsertThread(SharedFrom(current_thread));
147 return ERR_INVALID_STATE; 204 current_thread->SetStatus(ThreadStatus::WaitArb);
205 current_thread->WaitForArbitration(true);
148 } 206 }
149 207
150 if (should_decrement) { 208 if (event_handle != InvalidHandle) {
151 memory.Write32(address, static_cast<u32>(cur_value - 1)); 209 auto& time_manager = kernel.TimeManager();
210 time_manager.UnscheduleTimeEvent(event_handle);
152 } 211 }
153 212
154 // Short-circuit without rescheduling, if timeout is zero. 213 {
155 if (timeout == 0) { 214 SchedulerLock lock(kernel);
156 return RESULT_TIMEOUT; 215 if (current_thread->IsWaitingForArbitration()) {
216 RemoveThread(SharedFrom(current_thread));
217 current_thread->WaitForArbitration(false);
218 }
157 } 219 }
158 220
159 return WaitForAddressImpl(address, timeout); 221 return current_thread->GetSignalingResult();
160} 222}
161 223
162ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { 224ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163 auto& memory = system.Memory(); 225 auto& memory = system.Memory();
226 auto& kernel = system.Kernel();
227 Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
164 228
165 // Ensure that we can read the address. 229 Handle event_handle = InvalidHandle;
166 if (!memory.IsValidVirtualAddress(address)) { 230 {
167 return ERR_INVALID_ADDRESS_STATE; 231 SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
168 } 232
233 if (current_thread->IsPendingTermination()) {
234 lock.CancelSleep();
235 return ERR_THREAD_TERMINATING;
236 }
237
238 // Ensure that we can read the address.
239 if (!memory.IsValidVirtualAddress(address)) {
240 lock.CancelSleep();
241 return ERR_INVALID_ADDRESS_STATE;
242 }
169 243
170 // Only wait for the address if equal. 244 s32 current_value = static_cast<s32>(memory.Read32(address));
171 if (static_cast<s32>(memory.Read32(address)) != value) { 245 if (current_value != value) {
172 return ERR_INVALID_STATE; 246 lock.CancelSleep();
247 return ERR_INVALID_STATE;
248 }
249
250 // Short-circuit without rescheduling, if timeout is zero.
251 if (timeout == 0) {
252 lock.CancelSleep();
253 return RESULT_TIMEOUT;
254 }
255
256 current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
257 current_thread->SetArbiterWaitAddress(address);
258 InsertThread(SharedFrom(current_thread));
259 current_thread->SetStatus(ThreadStatus::WaitArb);
260 current_thread->WaitForArbitration(true);
173 } 261 }
174 262
175 // Short-circuit without rescheduling if timeout is zero. 263 if (event_handle != InvalidHandle) {
176 if (timeout == 0) { 264 auto& time_manager = kernel.TimeManager();
177 return RESULT_TIMEOUT; 265 time_manager.UnscheduleTimeEvent(event_handle);
178 } 266 }
179 267
180 return WaitForAddressImpl(address, timeout); 268 {
181} 269 SchedulerLock lock(kernel);
270 if (current_thread->IsWaitingForArbitration()) {
271 RemoveThread(SharedFrom(current_thread));
272 current_thread->WaitForArbitration(false);
273 }
274 }
182 275
183ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { 276 return current_thread->GetSignalingResult();
184 Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
185 current_thread->SetArbiterWaitAddress(address);
186 InsertThread(SharedFrom(current_thread));
187 current_thread->SetStatus(ThreadStatus::WaitArb);
188 current_thread->InvalidateWakeupCallback();
189 current_thread->WakeAfterDelay(timeout);
190
191 system.PrepareReschedule(current_thread->GetProcessorID());
192 return RESULT_TIMEOUT;
193} 277}
194 278
195void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { 279void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
@@ -221,9 +305,9 @@ void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
221 const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), 305 const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
222 [&thread](const auto& entry) { return thread == entry; }); 306 [&thread](const auto& entry) { return thread == entry; });
223 307
224 ASSERT(iter != thread_list.cend()); 308 if (iter != thread_list.cend()) {
225 309 thread_list.erase(iter);
226 thread_list.erase(iter); 310 }
227} 311}
228 312
229std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( 313std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index f958eee5a..0b05d533c 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -73,9 +73,6 @@ private:
73 /// Waits on an address if the value passed is equal to the argument value. 73 /// Waits on an address if the value passed is equal to the argument value.
74 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 74 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
75 75
76 // Waits on the given address with a timeout in nanoseconds
77 ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
78
79 /// Wake up num_to_wake (or all) threads in a vector. 76 /// Wake up num_to_wake (or all) threads in a vector.
80 void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake); 77 void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
81 78
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index 5498fd313..8aff2227a 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -34,7 +34,7 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
34 } 34 }
35 35
36 // Wake the threads waiting on the ServerPort 36 // Wake the threads waiting on the ServerPort
37 server_port->WakeupAllWaitingThreads(); 37 server_port->Signal();
38 38
39 return MakeResult(std::move(client)); 39 return MakeResult(std::move(client));
40} 40}
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index 29bfa3621..d4e5d88cf 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -12,6 +12,7 @@ namespace Kernel {
12 12
13constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; 13constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 16constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 17constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; 18constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index ba0eac4c2..9277b5d08 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,14 +14,17 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "core/hle/ipc_helpers.h" 16#include "core/hle/ipc_helpers.h"
17#include "core/hle/kernel/errors.h"
17#include "core/hle/kernel/handle_table.h" 18#include "core/hle/kernel/handle_table.h"
18#include "core/hle/kernel/hle_ipc.h" 19#include "core/hle/kernel/hle_ipc.h"
19#include "core/hle/kernel/kernel.h" 20#include "core/hle/kernel/kernel.h"
20#include "core/hle/kernel/object.h" 21#include "core/hle/kernel/object.h"
21#include "core/hle/kernel/process.h" 22#include "core/hle/kernel/process.h"
22#include "core/hle/kernel/readable_event.h" 23#include "core/hle/kernel/readable_event.h"
24#include "core/hle/kernel/scheduler.h"
23#include "core/hle/kernel/server_session.h" 25#include "core/hle/kernel/server_session.h"
24#include "core/hle/kernel/thread.h" 26#include "core/hle/kernel/thread.h"
27#include "core/hle/kernel/time_manager.h"
25#include "core/hle/kernel/writable_event.h" 28#include "core/hle/kernel/writable_event.h"
26#include "core/memory.h" 29#include "core/memory.h"
27 30
@@ -46,15 +49,6 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
46 const std::string& reason, u64 timeout, WakeupCallback&& callback, 49 const std::string& reason, u64 timeout, WakeupCallback&& callback,
47 std::shared_ptr<WritableEvent> writable_event) { 50 std::shared_ptr<WritableEvent> writable_event) {
48 // Put the client thread to sleep until the wait event is signaled or the timeout expires. 51 // Put the client thread to sleep until the wait event is signaled or the timeout expires.
49 thread->SetWakeupCallback(
50 [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
51 std::shared_ptr<SynchronizationObject> object,
52 std::size_t index) mutable -> bool {
53 ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
54 callback(thread, context, reason);
55 context.WriteToOutgoingCommandBuffer(*thread);
56 return true;
57 });
58 52
59 if (!writable_event) { 53 if (!writable_event) {
60 // Create event if not provided 54 // Create event if not provided
@@ -62,14 +56,26 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
62 writable_event = pair.writable; 56 writable_event = pair.writable;
63 } 57 }
64 58
65 const auto readable_event{writable_event->GetReadableEvent()}; 59 {
66 writable_event->Clear(); 60 Handle event_handle = InvalidHandle;
67 thread->SetStatus(ThreadStatus::WaitHLEEvent); 61 SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
68 thread->SetSynchronizationObjects({readable_event}); 62 thread->SetHLECallback(
69 readable_event->AddWaitingThread(thread); 63 [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool {
70 64 ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT
71 if (timeout > 0) { 65 ? ThreadWakeupReason::Timeout
72 thread->WakeAfterDelay(timeout); 66 : ThreadWakeupReason::Signal;
67 callback(thread, context, reason);
68 context.WriteToOutgoingCommandBuffer(*thread);
69 return true;
70 });
71 const auto readable_event{writable_event->GetReadableEvent()};
72 writable_event->Clear();
73 thread->SetHLESyncObject(readable_event.get());
74 thread->SetStatus(ThreadStatus::WaitHLEEvent);
75 thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
76 readable_event->AddWaitingThread(thread);
77 lock.Release();
78 thread->SetHLETimeEvent(event_handle);
73 } 79 }
74 80
75 is_thread_waiting = true; 81 is_thread_waiting = true;
@@ -282,18 +288,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
282} 288}
283 289
284std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { 290std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
285 std::vector<u8> buffer; 291 std::vector<u8> buffer{};
286 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 292 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
287 BufferDescriptorA()[buffer_index].Size()}; 293 BufferDescriptorA()[buffer_index].Size()};
288 294
289 if (is_buffer_a) { 295 if (is_buffer_a) {
290 ASSERT_MSG(BufferDescriptorA().size() > buffer_index, 296 ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return buffer; },
291 "BufferDescriptorA invalid buffer_index {}", buffer_index); 297 "BufferDescriptorA invalid buffer_index {}", buffer_index);
292 buffer.resize(BufferDescriptorA()[buffer_index].Size()); 298 buffer.resize(BufferDescriptorA()[buffer_index].Size());
293 memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size()); 299 memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
294 } else { 300 } else {
295 ASSERT_MSG(BufferDescriptorX().size() > buffer_index, 301 ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return buffer; },
296 "BufferDescriptorX invalid buffer_index {}", buffer_index); 302 "BufferDescriptorX invalid buffer_index {}", buffer_index);
297 buffer.resize(BufferDescriptorX()[buffer_index].Size()); 303 buffer.resize(BufferDescriptorX()[buffer_index].Size());
298 memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size()); 304 memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
299 } 305 }
@@ -318,16 +324,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
318 } 324 }
319 325
320 if (is_buffer_b) { 326 if (is_buffer_b) {
321 ASSERT_MSG(BufferDescriptorB().size() > buffer_index, 327 ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index &&
322 "BufferDescriptorB invalid buffer_index {}", buffer_index); 328 BufferDescriptorB()[buffer_index].Size() >= size,
323 ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, 329 { return 0; }, "BufferDescriptorB is invalid, index={}, size={}",
324 "BufferDescriptorB buffer_index {} is not large enough", buffer_index); 330 buffer_index, size);
325 memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size); 331 memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
326 } else { 332 } else {
327 ASSERT_MSG(BufferDescriptorC().size() > buffer_index, 333 ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index &&
328 "BufferDescriptorC invalid buffer_index {}", buffer_index); 334 BufferDescriptorC()[buffer_index].Size() >= size,
329 ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, 335 { return 0; }, "BufferDescriptorC is invalid, index={}, size={}",
330 "BufferDescriptorC buffer_index {} is not large enough", buffer_index); 336 buffer_index, size);
331 memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size); 337 memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
332 } 338 }
333 339
@@ -338,16 +344,12 @@ std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const
338 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 344 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
339 BufferDescriptorA()[buffer_index].Size()}; 345 BufferDescriptorA()[buffer_index].Size()};
340 if (is_buffer_a) { 346 if (is_buffer_a) {
341 ASSERT_MSG(BufferDescriptorA().size() > buffer_index, 347 ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return 0; },
342 "BufferDescriptorA invalid buffer_index {}", buffer_index); 348 "BufferDescriptorA invalid buffer_index {}", buffer_index);
343 ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
344 "BufferDescriptorA buffer_index {} is empty", buffer_index);
345 return BufferDescriptorA()[buffer_index].Size(); 349 return BufferDescriptorA()[buffer_index].Size();
346 } else { 350 } else {
347 ASSERT_MSG(BufferDescriptorX().size() > buffer_index, 351 ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return 0; },
348 "BufferDescriptorX invalid buffer_index {}", buffer_index); 352 "BufferDescriptorX invalid buffer_index {}", buffer_index);
349 ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
350 "BufferDescriptorX buffer_index {} is empty", buffer_index);
351 return BufferDescriptorX()[buffer_index].Size(); 353 return BufferDescriptorX()[buffer_index].Size();
352 } 354 }
353} 355}
@@ -356,14 +358,15 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
356 const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && 358 const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
357 BufferDescriptorB()[buffer_index].Size()}; 359 BufferDescriptorB()[buffer_index].Size()};
358 if (is_buffer_b) { 360 if (is_buffer_b) {
359 ASSERT_MSG(BufferDescriptorB().size() > buffer_index, 361 ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index, { return 0; },
360 "BufferDescriptorB invalid buffer_index {}", buffer_index); 362 "BufferDescriptorB invalid buffer_index {}", buffer_index);
361 return BufferDescriptorB()[buffer_index].Size(); 363 return BufferDescriptorB()[buffer_index].Size();
362 } else { 364 } else {
363 ASSERT_MSG(BufferDescriptorC().size() > buffer_index, 365 ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index, { return 0; },
364 "BufferDescriptorC invalid buffer_index {}", buffer_index); 366 "BufferDescriptorC invalid buffer_index {}", buffer_index);
365 return BufferDescriptorC()[buffer_index].Size(); 367 return BufferDescriptorC()[buffer_index].Size();
366 } 368 }
369 return 0;
367} 370}
368 371
369std::string HLERequestContext::Description() const { 372std::string HLERequestContext::Description() const {
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7655382fa..1f2af7a1b 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <atomic> 6#include <atomic>
6#include <bitset> 7#include <bitset>
7#include <functional> 8#include <functional>
@@ -13,11 +14,15 @@
13 14
14#include "common/assert.h" 15#include "common/assert.h"
15#include "common/logging/log.h" 16#include "common/logging/log.h"
17#include "common/microprofile.h"
18#include "common/thread.h"
16#include "core/arm/arm_interface.h" 19#include "core/arm/arm_interface.h"
20#include "core/arm/cpu_interrupt_handler.h"
17#include "core/arm/exclusive_monitor.h" 21#include "core/arm/exclusive_monitor.h"
18#include "core/core.h" 22#include "core/core.h"
19#include "core/core_timing.h" 23#include "core/core_timing.h"
20#include "core/core_timing_util.h" 24#include "core/core_timing_util.h"
25#include "core/cpu_manager.h"
21#include "core/device_memory.h" 26#include "core/device_memory.h"
22#include "core/hardware_properties.h" 27#include "core/hardware_properties.h"
23#include "core/hle/kernel/client_port.h" 28#include "core/hle/kernel/client_port.h"
@@ -39,85 +44,28 @@
39#include "core/hle/result.h" 44#include "core/hle/result.h"
40#include "core/memory.h" 45#include "core/memory.h"
41 46
42namespace Kernel { 47MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
43
44/**
45 * Callback that will wake up the thread it was scheduled for
46 * @param thread_handle The handle of the thread that's been awoken
47 * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
48 */
49static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
50 const auto proper_handle = static_cast<Handle>(thread_handle);
51 const auto& system = Core::System::GetInstance();
52
53 // Lock the global kernel mutex when we enter the kernel HLE.
54 std::lock_guard lock{HLE::g_hle_lock};
55
56 std::shared_ptr<Thread> thread =
57 system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
58 if (thread == nullptr) {
59 LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
60 return;
61 }
62
63 bool resume = true;
64
65 if (thread->GetStatus() == ThreadStatus::WaitSynch ||
66 thread->GetStatus() == ThreadStatus::WaitHLEEvent) {
67 // Remove the thread from each of its waiting objects' waitlists
68 for (const auto& object : thread->GetSynchronizationObjects()) {
69 object->RemoveWaitingThread(thread);
70 }
71 thread->ClearSynchronizationObjects();
72
73 // Invoke the wakeup callback before clearing the wait objects
74 if (thread->HasWakeupCallback()) {
75 resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
76 }
77 } else if (thread->GetStatus() == ThreadStatus::WaitMutex ||
78 thread->GetStatus() == ThreadStatus::WaitCondVar) {
79 thread->SetMutexWaitAddress(0);
80 thread->SetWaitHandle(0);
81 if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
82 thread->GetOwnerProcess()->RemoveConditionVariableThread(thread);
83 thread->SetCondVarWaitAddress(0);
84 }
85
86 auto* const lock_owner = thread->GetLockOwner();
87 // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
88 // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
89 // wasn't awakened due to the mutex already being acquired.
90 if (lock_owner != nullptr) {
91 lock_owner->RemoveMutexWaiter(thread);
92 }
93 }
94 48
95 if (thread->GetStatus() == ThreadStatus::WaitArb) { 49namespace Kernel {
96 auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
97 address_arbiter.HandleWakeupThread(thread);
98 }
99
100 if (resume) {
101 if (thread->GetStatus() == ThreadStatus::WaitCondVar ||
102 thread->GetStatus() == ThreadStatus::WaitArb) {
103 thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
104 }
105 thread->ResumeFromWait();
106 }
107}
108 50
109struct KernelCore::Impl { 51struct KernelCore::Impl {
110 explicit Impl(Core::System& system, KernelCore& kernel) 52 explicit Impl(Core::System& system, KernelCore& kernel)
111 : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {} 53 : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
112 54
55 void SetMulticore(bool is_multicore) {
56 this->is_multicore = is_multicore;
57 }
58
113 void Initialize(KernelCore& kernel) { 59 void Initialize(KernelCore& kernel) {
114 Shutdown(); 60 Shutdown();
61 RegisterHostThread();
115 62
116 InitializePhysicalCores(); 63 InitializePhysicalCores();
117 InitializeSystemResourceLimit(kernel); 64 InitializeSystemResourceLimit(kernel);
118 InitializeMemoryLayout(); 65 InitializeMemoryLayout();
119 InitializeThreads(); 66 InitializePreemption(kernel);
120 InitializePreemption(); 67 InitializeSchedulers();
68 InitializeSuspendThreads();
121 } 69 }
122 70
123 void Shutdown() { 71 void Shutdown() {
@@ -126,13 +74,26 @@ struct KernelCore::Impl {
126 next_user_process_id = Process::ProcessIDMin; 74 next_user_process_id = Process::ProcessIDMin;
127 next_thread_id = 1; 75 next_thread_id = 1;
128 76
77 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
78 if (suspend_threads[i]) {
79 suspend_threads[i].reset();
80 }
81 }
82
83 for (std::size_t i = 0; i < cores.size(); i++) {
84 cores[i].Shutdown();
85 schedulers[i].reset();
86 }
87 cores.clear();
88
89 registered_core_threads.reset();
90
129 process_list.clear(); 91 process_list.clear();
130 current_process = nullptr; 92 current_process = nullptr;
131 93
132 system_resource_limit = nullptr; 94 system_resource_limit = nullptr;
133 95
134 global_handle_table.Clear(); 96 global_handle_table.Clear();
135 thread_wakeup_event_type = nullptr;
136 preemption_event = nullptr; 97 preemption_event = nullptr;
137 98
138 global_scheduler.Shutdown(); 99 global_scheduler.Shutdown();
@@ -145,13 +106,21 @@ struct KernelCore::Impl {
145 cores.clear(); 106 cores.clear();
146 107
147 exclusive_monitor.reset(); 108 exclusive_monitor.reset();
109 host_thread_ids.clear();
148 } 110 }
149 111
150 void InitializePhysicalCores() { 112 void InitializePhysicalCores() {
151 exclusive_monitor = 113 exclusive_monitor =
152 Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES); 114 Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
153 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { 115 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
154 cores.emplace_back(system, i, *exclusive_monitor); 116 schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i);
117 cores.emplace_back(system, i, *schedulers[i], interrupts[i]);
118 }
119 }
120
121 void InitializeSchedulers() {
122 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
123 cores[i].Scheduler().Initialize();
155 } 124 }
156 } 125 }
157 126
@@ -173,15 +142,13 @@ struct KernelCore::Impl {
173 } 142 }
174 } 143 }
175 144
176 void InitializeThreads() { 145 void InitializePreemption(KernelCore& kernel) {
177 thread_wakeup_event_type = 146 preemption_event = Core::Timing::CreateEvent(
178 Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback); 147 "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) {
179 } 148 {
180 149 SchedulerLock lock(kernel);
181 void InitializePreemption() { 150 global_scheduler.PreemptThreads();
182 preemption_event = 151 }
183 Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
184 global_scheduler.PreemptThreads();
185 s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); 152 s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
186 system.CoreTiming().ScheduleEvent(time_interval, preemption_event); 153 system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
187 }); 154 });
@@ -190,6 +157,20 @@ struct KernelCore::Impl {
190 system.CoreTiming().ScheduleEvent(time_interval, preemption_event); 157 system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
191 } 158 }
192 159
160 void InitializeSuspendThreads() {
161 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
162 std::string name = "Suspend Thread Id:" + std::to_string(i);
163 std::function<void(void*)> init_func =
164 system.GetCpuManager().GetSuspendThreadStartFunc();
165 void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
166 ThreadType type =
167 static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_SUSPEND);
168 auto thread_res = Thread::Create(system, type, name, 0, 0, 0, static_cast<u32>(i), 0,
169 nullptr, std::move(init_func), init_func_parameter);
170 suspend_threads[i] = std::move(thread_res).Unwrap();
171 }
172 }
173
193 void MakeCurrentProcess(Process* process) { 174 void MakeCurrentProcess(Process* process) {
194 current_process = process; 175 current_process = process;
195 176
@@ -197,15 +178,17 @@ struct KernelCore::Impl {
197 return; 178 return;
198 } 179 }
199 180
200 for (auto& core : cores) { 181 u32 core_id = GetCurrentHostThreadID();
201 core.SetIs64Bit(process->Is64BitProcess()); 182 if (core_id < Core::Hardware::NUM_CPU_CORES) {
183 system.Memory().SetCurrentPageTable(*process, core_id);
202 } 184 }
203
204 system.Memory().SetCurrentPageTable(*process);
205 } 185 }
206 186
207 void RegisterCoreThread(std::size_t core_id) { 187 void RegisterCoreThread(std::size_t core_id) {
208 std::unique_lock lock{register_thread_mutex}; 188 std::unique_lock lock{register_thread_mutex};
189 if (!is_multicore) {
190 single_core_thread_id = std::this_thread::get_id();
191 }
209 const std::thread::id this_id = std::this_thread::get_id(); 192 const std::thread::id this_id = std::this_thread::get_id();
210 const auto it = host_thread_ids.find(this_id); 193 const auto it = host_thread_ids.find(this_id);
211 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); 194 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
@@ -219,12 +202,19 @@ struct KernelCore::Impl {
219 std::unique_lock lock{register_thread_mutex}; 202 std::unique_lock lock{register_thread_mutex};
220 const std::thread::id this_id = std::this_thread::get_id(); 203 const std::thread::id this_id = std::this_thread::get_id();
221 const auto it = host_thread_ids.find(this_id); 204 const auto it = host_thread_ids.find(this_id);
222 ASSERT(it == host_thread_ids.end()); 205 if (it != host_thread_ids.end()) {
206 return;
207 }
223 host_thread_ids[this_id] = registered_thread_ids++; 208 host_thread_ids[this_id] = registered_thread_ids++;
224 } 209 }
225 210
226 u32 GetCurrentHostThreadID() const { 211 u32 GetCurrentHostThreadID() const {
227 const std::thread::id this_id = std::this_thread::get_id(); 212 const std::thread::id this_id = std::this_thread::get_id();
213 if (!is_multicore) {
214 if (single_core_thread_id == this_id) {
215 return static_cast<u32>(system.GetCpuManager().CurrentCore());
216 }
217 }
228 const auto it = host_thread_ids.find(this_id); 218 const auto it = host_thread_ids.find(this_id);
229 if (it == host_thread_ids.end()) { 219 if (it == host_thread_ids.end()) {
230 return Core::INVALID_HOST_THREAD_ID; 220 return Core::INVALID_HOST_THREAD_ID;
@@ -240,7 +230,7 @@ struct KernelCore::Impl {
240 } 230 }
241 const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler(); 231 const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler();
242 const Kernel::Thread* current = sched.GetCurrentThread(); 232 const Kernel::Thread* current = sched.GetCurrentThread();
243 if (current != nullptr) { 233 if (current != nullptr && !current->IsPhantomMode()) {
244 result.guest_handle = current->GetGlobalHandle(); 234 result.guest_handle = current->GetGlobalHandle();
245 } else { 235 } else {
246 result.guest_handle = InvalidHandle; 236 result.guest_handle = InvalidHandle;
@@ -313,7 +303,6 @@ struct KernelCore::Impl {
313 303
314 std::shared_ptr<ResourceLimit> system_resource_limit; 304 std::shared_ptr<ResourceLimit> system_resource_limit;
315 305
316 std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;
317 std::shared_ptr<Core::Timing::EventType> preemption_event; 306 std::shared_ptr<Core::Timing::EventType> preemption_event;
318 307
319 // This is the kernel's handle table or supervisor handle table which 308 // This is the kernel's handle table or supervisor handle table which
@@ -343,6 +332,15 @@ struct KernelCore::Impl {
343 std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; 332 std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
344 std::shared_ptr<Kernel::SharedMemory> time_shared_mem; 333 std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
345 334
335 std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
336 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
337 std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
338
339 bool is_multicore{};
340 std::thread::id single_core_thread_id{};
341
342 std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{};
343
346 // System context 344 // System context
347 Core::System& system; 345 Core::System& system;
348}; 346};
@@ -352,6 +350,10 @@ KernelCore::~KernelCore() {
352 Shutdown(); 350 Shutdown();
353} 351}
354 352
353void KernelCore::SetMulticore(bool is_multicore) {
354 impl->SetMulticore(is_multicore);
355}
356
355void KernelCore::Initialize() { 357void KernelCore::Initialize() {
356 impl->Initialize(*this); 358 impl->Initialize(*this);
357} 359}
@@ -397,11 +399,11 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
397} 399}
398 400
399Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { 401Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) {
400 return impl->cores[id].Scheduler(); 402 return *impl->schedulers[id];
401} 403}
402 404
403const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { 405const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const {
404 return impl->cores[id].Scheduler(); 406 return *impl->schedulers[id];
405} 407}
406 408
407Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { 409Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) {
@@ -412,6 +414,39 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
412 return impl->cores[id]; 414 return impl->cores[id];
413} 415}
414 416
417Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() {
418 u32 core_id = impl->GetCurrentHostThreadID();
419 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
420 return impl->cores[core_id];
421}
422
423const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
424 u32 core_id = impl->GetCurrentHostThreadID();
425 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
426 return impl->cores[core_id];
427}
428
429Kernel::Scheduler& KernelCore::CurrentScheduler() {
430 u32 core_id = impl->GetCurrentHostThreadID();
431 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
432 return *impl->schedulers[core_id];
433}
434
435const Kernel::Scheduler& KernelCore::CurrentScheduler() const {
436 u32 core_id = impl->GetCurrentHostThreadID();
437 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
438 return *impl->schedulers[core_id];
439}
440
441std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() {
442 return impl->interrupts;
443}
444
445const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts()
446 const {
447 return impl->interrupts;
448}
449
415Kernel::Synchronization& KernelCore::Synchronization() { 450Kernel::Synchronization& KernelCore::Synchronization() {
416 return impl->synchronization; 451 return impl->synchronization;
417} 452}
@@ -437,15 +472,17 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
437} 472}
438 473
439void KernelCore::InvalidateAllInstructionCaches() { 474void KernelCore::InvalidateAllInstructionCaches() {
440 for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) { 475 auto& threads = GlobalScheduler().GetThreadList();
441 PhysicalCore(i).ArmInterface().ClearInstructionCache(); 476 for (auto& thread : threads) {
477 if (!thread->IsHLEThread()) {
478 auto& arm_interface = thread->ArmInterface();
479 arm_interface.ClearInstructionCache();
480 }
442 } 481 }
443} 482}
444 483
445void KernelCore::PrepareReschedule(std::size_t id) { 484void KernelCore::PrepareReschedule(std::size_t id) {
446 if (id < impl->global_scheduler.CpuCoresCount()) { 485 // TODO: Reimplement, this
447 impl->cores[id].Stop();
448 }
449} 486}
450 487
451void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) { 488void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) {
@@ -481,10 +518,6 @@ u64 KernelCore::CreateNewUserProcessID() {
481 return impl->next_user_process_id++; 518 return impl->next_user_process_id++;
482} 519}
483 520
484const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallbackEventType() const {
485 return impl->thread_wakeup_event_type;
486}
487
488Kernel::HandleTable& KernelCore::GlobalHandleTable() { 521Kernel::HandleTable& KernelCore::GlobalHandleTable() {
489 return impl->global_handle_table; 522 return impl->global_handle_table;
490} 523}
@@ -557,4 +590,34 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const {
557 return *impl->time_shared_mem; 590 return *impl->time_shared_mem;
558} 591}
559 592
593void KernelCore::Suspend(bool in_suspention) {
594 const bool should_suspend = exception_exited || in_suspention;
595 {
596 SchedulerLock lock(*this);
597 ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep;
598 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
599 impl->suspend_threads[i]->SetStatus(status);
600 }
601 }
602}
603
604bool KernelCore::IsMulticore() const {
605 return impl->is_multicore;
606}
607
608void KernelCore::ExceptionalExit() {
609 exception_exited = true;
610 Suspend(true);
611}
612
613void KernelCore::EnterSVCProfile() {
614 std::size_t core = impl->GetCurrentHostThreadID();
615 impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
616}
617
618void KernelCore::ExitSVCProfile() {
619 std::size_t core = impl->GetCurrentHostThreadID();
620 MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
621}
622
560} // namespace Kernel 623} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 83de1f542..49bd47e89 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -4,15 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <memory> 8#include <memory>
8#include <string> 9#include <string>
9#include <unordered_map> 10#include <unordered_map>
10#include <vector> 11#include <vector>
12#include "core/hardware_properties.h"
11#include "core/hle/kernel/memory/memory_types.h" 13#include "core/hle/kernel/memory/memory_types.h"
12#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
13 15
14namespace Core { 16namespace Core {
15struct EmuThreadHandle; 17class CPUInterruptHandler;
16class ExclusiveMonitor; 18class ExclusiveMonitor;
17class System; 19class System;
18} // namespace Core 20} // namespace Core
@@ -65,6 +67,9 @@ public:
65 KernelCore(KernelCore&&) = delete; 67 KernelCore(KernelCore&&) = delete;
66 KernelCore& operator=(KernelCore&&) = delete; 68 KernelCore& operator=(KernelCore&&) = delete;
67 69
70 /// Sets if emulation is multicore or single core, must be set before Initialize
71 void SetMulticore(bool is_multicore);
72
68 /// Resets the kernel to a clean slate for use. 73 /// Resets the kernel to a clean slate for use.
69 void Initialize(); 74 void Initialize();
70 75
@@ -110,6 +115,18 @@ public:
110 /// Gets the an instance of the respective physical CPU core. 115 /// Gets the an instance of the respective physical CPU core.
111 const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; 116 const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
112 117
118 /// Gets the sole instance of the Scheduler at the current running core.
119 Kernel::Scheduler& CurrentScheduler();
120
121 /// Gets the sole instance of the Scheduler at the current running core.
122 const Kernel::Scheduler& CurrentScheduler() const;
123
124 /// Gets the an instance of the current physical CPU core.
125 Kernel::PhysicalCore& CurrentPhysicalCore();
126
127 /// Gets the an instance of the current physical CPU core.
128 const Kernel::PhysicalCore& CurrentPhysicalCore() const;
129
113 /// Gets the an instance of the Synchronization Interface. 130 /// Gets the an instance of the Synchronization Interface.
114 Kernel::Synchronization& Synchronization(); 131 Kernel::Synchronization& Synchronization();
115 132
@@ -129,6 +146,10 @@ public:
129 146
130 const Core::ExclusiveMonitor& GetExclusiveMonitor() const; 147 const Core::ExclusiveMonitor& GetExclusiveMonitor() const;
131 148
149 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts();
150
151 const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts() const;
152
132 void InvalidateAllInstructionCaches(); 153 void InvalidateAllInstructionCaches();
133 154
134 /// Adds a port to the named port table 155 /// Adds a port to the named port table
@@ -191,6 +212,18 @@ public:
191 /// Gets the shared memory object for Time services. 212 /// Gets the shared memory object for Time services.
192 const Kernel::SharedMemory& GetTimeSharedMem() const; 213 const Kernel::SharedMemory& GetTimeSharedMem() const;
193 214
215 /// Suspend/unsuspend the OS.
216 void Suspend(bool in_suspention);
217
218 /// Exceptional exit the OS.
219 void ExceptionalExit();
220
221 bool IsMulticore() const;
222
223 void EnterSVCProfile();
224
225 void ExitSVCProfile();
226
194private: 227private:
195 friend class Object; 228 friend class Object;
196 friend class Process; 229 friend class Process;
@@ -208,9 +241,6 @@ private:
208 /// Creates a new thread ID, incrementing the internal thread ID counter. 241 /// Creates a new thread ID, incrementing the internal thread ID counter.
209 u64 CreateNewThreadID(); 242 u64 CreateNewThreadID();
210 243
211 /// Retrieves the event type used for thread wakeup callbacks.
212 const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const;
213
214 /// Provides a reference to the global handle table. 244 /// Provides a reference to the global handle table.
215 Kernel::HandleTable& GlobalHandleTable(); 245 Kernel::HandleTable& GlobalHandleTable();
216 246
@@ -219,6 +249,7 @@ private:
219 249
220 struct Impl; 250 struct Impl;
221 std::unique_ptr<Impl> impl; 251 std::unique_ptr<Impl> impl;
252 bool exception_exited{};
222}; 253};
223 254
224} // namespace Kernel 255} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp
index 6b432e1b2..acf13585c 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -104,7 +104,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
104 // Ensure that we don't leave anything un-freed 104 // Ensure that we don't leave anything un-freed
105 auto group_guard = detail::ScopeExit([&] { 105 auto group_guard = detail::ScopeExit([&] {
106 for (const auto& it : page_list.Nodes()) { 106 for (const auto& it : page_list.Nodes()) {
107 const auto min_num_pages{std::min( 107 const auto min_num_pages{std::min<size_t>(
108 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; 108 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
109 chosen_manager.Free(it.GetAddress(), min_num_pages); 109 chosen_manager.Free(it.GetAddress(), min_num_pages);
110 } 110 }
@@ -139,7 +139,6 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
139 } 139 }
140 140
141 // Only succeed if we allocated as many pages as we wanted 141 // Only succeed if we allocated as many pages as we wanted
142 ASSERT(num_pages >= 0);
143 if (num_pages) { 142 if (num_pages) {
144 return ERR_OUT_OF_MEMORY; 143 return ERR_OUT_OF_MEMORY;
145 } 144 }
@@ -165,7 +164,7 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages,
165 164
166 // Free all of the pages 165 // Free all of the pages
167 for (const auto& it : page_list.Nodes()) { 166 for (const auto& it : page_list.Nodes()) {
168 const auto min_num_pages{std::min( 167 const auto min_num_pages{std::min<size_t>(
169 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; 168 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
170 chosen_manager.Free(it.GetAddress(), min_num_pages); 169 chosen_manager.Free(it.GetAddress(), min_num_pages);
171 } 170 }
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 7869eb32b..8f6c944d1 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -34,8 +34,6 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr
34 if (thread->GetMutexWaitAddress() != mutex_addr) 34 if (thread->GetMutexWaitAddress() != mutex_addr)
35 continue; 35 continue;
36 36
37 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
38
39 ++num_waiters; 37 ++num_waiters;
40 if (highest_priority_thread == nullptr || 38 if (highest_priority_thread == nullptr ||
41 thread->GetPriority() < highest_priority_thread->GetPriority()) { 39 thread->GetPriority() < highest_priority_thread->GetPriority()) {
@@ -49,6 +47,7 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr
49/// Update the mutex owner field of all threads waiting on the mutex to point to the new owner. 47/// Update the mutex owner field of all threads waiting on the mutex to point to the new owner.
50static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread, 48static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread,
51 std::shared_ptr<Thread> new_owner) { 49 std::shared_ptr<Thread> new_owner) {
50 current_thread->RemoveMutexWaiter(new_owner);
52 const auto threads = current_thread->GetMutexWaitingThreads(); 51 const auto threads = current_thread->GetMutexWaitingThreads();
53 for (const auto& thread : threads) { 52 for (const auto& thread : threads) {
54 if (thread->GetMutexWaitAddress() != mutex_addr) 53 if (thread->GetMutexWaitAddress() != mutex_addr)
@@ -72,85 +71,100 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
72 return ERR_INVALID_ADDRESS; 71 return ERR_INVALID_ADDRESS;
73 } 72 }
74 73
75 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 74 auto& kernel = system.Kernel();
76 std::shared_ptr<Thread> current_thread = 75 std::shared_ptr<Thread> current_thread =
77 SharedFrom(system.CurrentScheduler().GetCurrentThread()); 76 SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
78 std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); 77 {
79 std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle); 78 SchedulerLock lock(kernel);
79 // The mutex address must be 4-byte aligned
80 if ((address % sizeof(u32)) != 0) {
81 return ERR_INVALID_ADDRESS;
82 }
80 83
81 // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another 84 const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
82 // thread. 85 std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
83 ASSERT(requesting_thread == current_thread); 86 std::shared_ptr<Thread> requesting_thread =
87 handle_table.Get<Thread>(requesting_thread_handle);
84 88
85 const u32 addr_value = system.Memory().Read32(address); 89 // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of
90 // another thread.
91 ASSERT(requesting_thread == current_thread);
86 92
87 // If the mutex isn't being held, just return success. 93 current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
88 if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
89 return RESULT_SUCCESS;
90 }
91 94
92 if (holding_thread == nullptr) { 95 const u32 addr_value = system.Memory().Read32(address);
93 LOG_ERROR(Kernel, "Holding thread does not exist! thread_handle={:08X}", 96
94 holding_thread_handle); 97 // If the mutex isn't being held, just return success.
95 return ERR_INVALID_HANDLE; 98 if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
96 } 99 return RESULT_SUCCESS;
100 }
97 101
98 // Wait until the mutex is released 102 if (holding_thread == nullptr) {
99 current_thread->SetMutexWaitAddress(address); 103 return ERR_INVALID_HANDLE;
100 current_thread->SetWaitHandle(requesting_thread_handle); 104 }
101 105
102 current_thread->SetStatus(ThreadStatus::WaitMutex); 106 // Wait until the mutex is released
103 current_thread->InvalidateWakeupCallback(); 107 current_thread->SetMutexWaitAddress(address);
108 current_thread->SetWaitHandle(requesting_thread_handle);
104 109
105 // Update the lock holder thread's priority to prevent priority inversion. 110 current_thread->SetStatus(ThreadStatus::WaitMutex);
106 holding_thread->AddMutexWaiter(current_thread);
107 111
108 system.PrepareReschedule(); 112 // Update the lock holder thread's priority to prevent priority inversion.
113 holding_thread->AddMutexWaiter(current_thread);
114 }
109 115
110 return RESULT_SUCCESS; 116 {
117 SchedulerLock lock(kernel);
118 auto* owner = current_thread->GetLockOwner();
119 if (owner != nullptr) {
120 owner->RemoveMutexWaiter(current_thread);
121 }
122 }
123 return current_thread->GetSignalingResult();
111} 124}
112 125
113ResultCode Mutex::Release(VAddr address) { 126std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner,
127 VAddr address) {
114 // The mutex address must be 4-byte aligned 128 // The mutex address must be 4-byte aligned
115 if ((address % sizeof(u32)) != 0) { 129 if ((address % sizeof(u32)) != 0) {
116 LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); 130 LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
117 return ERR_INVALID_ADDRESS; 131 return {ERR_INVALID_ADDRESS, nullptr};
118 } 132 }
119 133
120 std::shared_ptr<Thread> current_thread = 134 auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
121 SharedFrom(system.CurrentScheduler().GetCurrentThread()); 135 if (new_owner == nullptr) {
122 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
123
124 // There are no more threads waiting for the mutex, release it completely.
125 if (thread == nullptr) {
126 system.Memory().Write32(address, 0); 136 system.Memory().Write32(address, 0);
127 return RESULT_SUCCESS; 137 return {RESULT_SUCCESS, nullptr};
128 } 138 }
129
130 // Transfer the ownership of the mutex from the previous owner to the new one. 139 // Transfer the ownership of the mutex from the previous owner to the new one.
131 TransferMutexOwnership(address, current_thread, thread); 140 TransferMutexOwnership(address, owner, new_owner);
132 141 u32 mutex_value = new_owner->GetWaitHandle();
133 u32 mutex_value = thread->GetWaitHandle();
134
135 if (num_waiters >= 2) { 142 if (num_waiters >= 2) {
136 // Notify the guest that there are still some threads waiting for the mutex 143 // Notify the guest that there are still some threads waiting for the mutex
137 mutex_value |= Mutex::MutexHasWaitersFlag; 144 mutex_value |= Mutex::MutexHasWaitersFlag;
138 } 145 }
146 new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
147 new_owner->SetLockOwner(nullptr);
148 new_owner->ResumeFromWait();
139 149
140 // Grant the mutex to the next waiting thread and resume it.
141 system.Memory().Write32(address, mutex_value); 150 system.Memory().Write32(address, mutex_value);
151 return {RESULT_SUCCESS, new_owner};
152}
142 153
143 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 154ResultCode Mutex::Release(VAddr address) {
144 thread->ResumeFromWait(); 155 auto& kernel = system.Kernel();
156 SchedulerLock lock(kernel);
145 157
146 thread->SetLockOwner(nullptr); 158 std::shared_ptr<Thread> current_thread =
147 thread->SetCondVarWaitAddress(0); 159 SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
148 thread->SetMutexWaitAddress(0);
149 thread->SetWaitHandle(0);
150 thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
151 160
152 system.PrepareReschedule(); 161 auto [result, new_owner] = Unlock(current_thread, address);
153 162
154 return RESULT_SUCCESS; 163 if (result != RESULT_SUCCESS && new_owner != nullptr) {
164 new_owner->SetSynchronizationResults(nullptr, result);
165 }
166
167 return result;
155} 168}
169
156} // namespace Kernel 170} // namespace Kernel
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index b904de2e8..3b81dc3df 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -28,6 +28,10 @@ public:
28 ResultCode TryAcquire(VAddr address, Handle holding_thread_handle, 28 ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
29 Handle requesting_thread_handle); 29 Handle requesting_thread_handle);
30 30
31 /// Unlocks a mutex for owner at address
32 std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner,
33 VAddr address);
34
31 /// Releases the mutex at the specified address. 35 /// Releases the mutex at the specified address.
32 ResultCode Release(VAddr address); 36 ResultCode Release(VAddr address);
33 37
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index a15011076..c6bbdb080 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -2,12 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/spin_lock.h"
6#include "core/arm/arm_interface.h" 8#include "core/arm/arm_interface.h"
7#ifdef ARCHITECTURE_x86_64 9#ifdef ARCHITECTURE_x86_64
8#include "core/arm/dynarmic/arm_dynarmic_32.h" 10#include "core/arm/dynarmic/arm_dynarmic_32.h"
9#include "core/arm/dynarmic/arm_dynarmic_64.h" 11#include "core/arm/dynarmic/arm_dynarmic_64.h"
10#endif 12#endif
13#include "core/arm/cpu_interrupt_handler.h"
11#include "core/arm/exclusive_monitor.h" 14#include "core/arm/exclusive_monitor.h"
12#include "core/arm/unicorn/arm_unicorn.h" 15#include "core/arm/unicorn/arm_unicorn.h"
13#include "core/core.h" 16#include "core/core.h"
@@ -17,50 +20,37 @@
17 20
18namespace Kernel { 21namespace Kernel {
19 22
20PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, 23PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
21 Core::ExclusiveMonitor& exclusive_monitor) 24 Core::CPUInterruptHandler& interrupt_handler)
22 : core_index{id} { 25 : interrupt_handler{interrupt_handler}, core_index{id}, scheduler{scheduler} {
23#ifdef ARCHITECTURE_x86_64
24 arm_interface_32 =
25 std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index);
26 arm_interface_64 =
27 std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);
28
29#else
30 using Core::ARM_Unicorn;
31 arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32);
32 arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64);
33 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
34#endif
35 26
36 scheduler = std::make_unique<Kernel::Scheduler>(system, core_index); 27 guard = std::make_unique<Common::SpinLock>();
37} 28}
38 29
39PhysicalCore::~PhysicalCore() = default; 30PhysicalCore::~PhysicalCore() = default;
40 31
41void PhysicalCore::Run() { 32void PhysicalCore::Idle() {
42 arm_interface->Run(); 33 interrupt_handler.AwaitInterrupt();
43 arm_interface->ClearExclusiveState();
44} 34}
45 35
46void PhysicalCore::Step() { 36void PhysicalCore::Shutdown() {
47 arm_interface->Step(); 37 scheduler.Shutdown();
48} 38}
49 39
50void PhysicalCore::Stop() { 40bool PhysicalCore::IsInterrupted() const {
51 arm_interface->PrepareReschedule(); 41 return interrupt_handler.IsInterrupted();
52} 42}
53 43
54void PhysicalCore::Shutdown() { 44void PhysicalCore::Interrupt() {
55 scheduler->Shutdown(); 45 guard->lock();
46 interrupt_handler.SetInterrupt(true);
47 guard->unlock();
56} 48}
57 49
58void PhysicalCore::SetIs64Bit(bool is_64_bit) { 50void PhysicalCore::ClearInterrupt() {
59 if (is_64_bit) { 51 guard->lock();
60 arm_interface = arm_interface_64.get(); 52 interrupt_handler.SetInterrupt(false);
61 } else { 53 guard->unlock();
62 arm_interface = arm_interface_32.get();
63 }
64} 54}
65 55
66} // namespace Kernel 56} // namespace Kernel
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 3269166be..d7a7a951c 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -7,12 +7,17 @@
7#include <cstddef> 7#include <cstddef>
8#include <memory> 8#include <memory>
9 9
10namespace Common {
11class SpinLock;
12}
13
10namespace Kernel { 14namespace Kernel {
11class Scheduler; 15class Scheduler;
12} // namespace Kernel 16} // namespace Kernel
13 17
14namespace Core { 18namespace Core {
15class ARM_Interface; 19class ARM_Interface;
20class CPUInterruptHandler;
16class ExclusiveMonitor; 21class ExclusiveMonitor;
17class System; 22class System;
18} // namespace Core 23} // namespace Core
@@ -21,7 +26,8 @@ namespace Kernel {
21 26
22class PhysicalCore { 27class PhysicalCore {
23public: 28public:
24 PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor); 29 PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
30 Core::CPUInterruptHandler& interrupt_handler);
25 ~PhysicalCore(); 31 ~PhysicalCore();
26 32
27 PhysicalCore(const PhysicalCore&) = delete; 33 PhysicalCore(const PhysicalCore&) = delete;
@@ -30,23 +36,18 @@ public:
30 PhysicalCore(PhysicalCore&&) = default; 36 PhysicalCore(PhysicalCore&&) = default;
31 PhysicalCore& operator=(PhysicalCore&&) = default; 37 PhysicalCore& operator=(PhysicalCore&&) = default;
32 38
33 /// Execute current jit state 39 void Idle();
34 void Run(); 40 /// Interrupt this physical core.
35 /// Execute a single instruction in current jit. 41 void Interrupt();
36 void Step();
37 /// Stop JIT execution/exit
38 void Stop();
39 42
40 // Shutdown this physical core. 43 /// Clear this core's interrupt
41 void Shutdown(); 44 void ClearInterrupt();
42 45
43 Core::ARM_Interface& ArmInterface() { 46 /// Check if this core is interrupted
44 return *arm_interface; 47 bool IsInterrupted() const;
45 }
46 48
47 const Core::ARM_Interface& ArmInterface() const { 49 // Shutdown this physical core.
48 return *arm_interface; 50 void Shutdown();
49 }
50 51
51 bool IsMainCore() const { 52 bool IsMainCore() const {
52 return core_index == 0; 53 return core_index == 0;
@@ -61,21 +62,18 @@ public:
61 } 62 }
62 63
63 Kernel::Scheduler& Scheduler() { 64 Kernel::Scheduler& Scheduler() {
64 return *scheduler; 65 return scheduler;
65 } 66 }
66 67
67 const Kernel::Scheduler& Scheduler() const { 68 const Kernel::Scheduler& Scheduler() const {
68 return *scheduler; 69 return scheduler;
69 } 70 }
70 71
71 void SetIs64Bit(bool is_64_bit);
72
73private: 72private:
73 Core::CPUInterruptHandler& interrupt_handler;
74 std::size_t core_index; 74 std::size_t core_index;
75 std::unique_ptr<Core::ARM_Interface> arm_interface_32; 75 Kernel::Scheduler& scheduler;
76 std::unique_ptr<Core::ARM_Interface> arm_interface_64; 76 std::unique_ptr<Common::SpinLock> guard;
77 std::unique_ptr<Kernel::Scheduler> scheduler;
78 Core::ARM_Interface* arm_interface{};
79}; 77};
80 78
81} // namespace Kernel 79} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 36724569f..f9d7c024d 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -22,6 +22,7 @@
22#include "core/hle/kernel/resource_limit.h" 22#include "core/hle/kernel/resource_limit.h"
23#include "core/hle/kernel/scheduler.h" 23#include "core/hle/kernel/scheduler.h"
24#include "core/hle/kernel/thread.h" 24#include "core/hle/kernel/thread.h"
25#include "core/hle/lock.h"
25#include "core/memory.h" 26#include "core/memory.h"
26#include "core/settings.h" 27#include "core/settings.h"
27 28
@@ -30,14 +31,15 @@ namespace {
30/** 31/**
31 * Sets up the primary application thread 32 * Sets up the primary application thread
32 * 33 *
34 * @param system The system instance to create the main thread under.
33 * @param owner_process The parent process for the main thread 35 * @param owner_process The parent process for the main thread
34 * @param kernel The kernel instance to create the main thread under.
35 * @param priority The priority to give the main thread 36 * @param priority The priority to give the main thread
36 */ 37 */
37void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) { 38void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
38 const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); 39 const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
39 auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, 40 ThreadType type = THREADTYPE_USER;
40 owner_process.GetIdealCore(), stack_top, owner_process); 41 auto thread_res = Thread::Create(system, type, "main", entry_point, priority, 0,
42 owner_process.GetIdealCore(), stack_top, &owner_process);
41 43
42 std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap(); 44 std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap();
43 45
@@ -48,8 +50,12 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, V
48 thread->GetContext32().cpu_registers[1] = thread_handle; 50 thread->GetContext32().cpu_registers[1] = thread_handle;
49 thread->GetContext64().cpu_registers[1] = thread_handle; 51 thread->GetContext64().cpu_registers[1] = thread_handle;
50 52
53 auto& kernel = system.Kernel();
51 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires 54 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
52 thread->ResumeFromWait(); 55 {
56 SchedulerLock lock{kernel};
57 thread->SetStatus(ThreadStatus::Ready);
58 }
53} 59}
54} // Anonymous namespace 60} // Anonymous namespace
55 61
@@ -132,7 +138,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
132 138
133u64 Process::GetTotalPhysicalMemoryAvailable() const { 139u64 Process::GetTotalPhysicalMemoryAvailable() const {
134 const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + 140 const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
135 page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; 141 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
142 main_thread_stack_size};
136 143
137 if (capacity < memory_usage_capacity) { 144 if (capacity < memory_usage_capacity) {
138 return capacity; 145 return capacity;
@@ -146,7 +153,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
146} 153}
147 154
148u64 Process::GetTotalPhysicalMemoryUsed() const { 155u64 Process::GetTotalPhysicalMemoryUsed() const {
149 return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); 156 return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
157 GetSystemResourceSize();
150} 158}
151 159
152u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { 160u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
@@ -180,7 +188,6 @@ void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) {
180 } 188 }
181 ++it; 189 ++it;
182 } 190 }
183 UNREACHABLE();
184} 191}
185 192
186std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads( 193std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads(
@@ -205,6 +212,7 @@ void Process::UnregisterThread(const Thread* thread) {
205} 212}
206 213
207ResultCode Process::ClearSignalState() { 214ResultCode Process::ClearSignalState() {
215 SchedulerLock lock(system.Kernel());
208 if (status == ProcessStatus::Exited) { 216 if (status == ProcessStatus::Exited) {
209 LOG_ERROR(Kernel, "called on a terminated process instance."); 217 LOG_ERROR(Kernel, "called on a terminated process instance.");
210 return ERR_INVALID_STATE; 218 return ERR_INVALID_STATE;
@@ -292,7 +300,7 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
292 300
293 ChangeStatus(ProcessStatus::Running); 301 ChangeStatus(ProcessStatus::Running);
294 302
295 SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top); 303 SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
296 resource_limit->Reserve(ResourceType::Threads, 1); 304 resource_limit->Reserve(ResourceType::Threads, 1);
297 resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size); 305 resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size);
298} 306}
@@ -338,6 +346,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
338} 346}
339 347
340VAddr Process::CreateTLSRegion() { 348VAddr Process::CreateTLSRegion() {
349 SchedulerLock lock(system.Kernel());
341 if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; 350 if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};
342 tls_page_iter != tls_pages.cend()) { 351 tls_page_iter != tls_pages.cend()) {
343 return *tls_page_iter->ReserveSlot(); 352 return *tls_page_iter->ReserveSlot();
@@ -368,6 +377,7 @@ VAddr Process::CreateTLSRegion() {
368} 377}
369 378
370void Process::FreeTLSRegion(VAddr tls_address) { 379void Process::FreeTLSRegion(VAddr tls_address) {
380 SchedulerLock lock(system.Kernel());
371 const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); 381 const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);
372 auto iter = 382 auto iter =
373 std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { 383 std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
@@ -382,6 +392,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
382} 392}
383 393
384void Process::LoadModule(CodeSet code_set, VAddr base_addr) { 394void Process::LoadModule(CodeSet code_set, VAddr base_addr) {
395 std::lock_guard lock{HLE::g_hle_lock};
385 const auto ReprotectSegment = [&](const CodeSet::Segment& segment, 396 const auto ReprotectSegment = [&](const CodeSet::Segment& segment,
386 Memory::MemoryPermission permission) { 397 Memory::MemoryPermission permission) {
387 page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission); 398 page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission);
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index ef5e19e63..6e286419e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -6,8 +6,10 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/hle/kernel/errors.h" 8#include "core/hle/kernel/errors.h"
9#include "core/hle/kernel/kernel.h"
9#include "core/hle/kernel/object.h" 10#include "core/hle/kernel/object.h"
10#include "core/hle/kernel/readable_event.h" 11#include "core/hle/kernel/readable_event.h"
12#include "core/hle/kernel/scheduler.h"
11#include "core/hle/kernel/thread.h" 13#include "core/hle/kernel/thread.h"
12 14
13namespace Kernel { 15namespace Kernel {
@@ -37,6 +39,7 @@ void ReadableEvent::Clear() {
37} 39}
38 40
39ResultCode ReadableEvent::Reset() { 41ResultCode ReadableEvent::Reset() {
42 SchedulerLock lock(kernel);
40 if (!is_signaled) { 43 if (!is_signaled) {
41 LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", 44 LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
42 GetObjectId(), GetTypeName(), GetName()); 45 GetObjectId(), GetTypeName(), GetName());
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index d9beaa3a4..212e442f4 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
24 const std::size_t index{ResourceTypeToIndex(resource)}; 24 const std::size_t index{ResourceTypeToIndex(resource)};
25 25
26 s64 new_value = current[index] + amount; 26 s64 new_value = current[index] + amount;
27 while (new_value > limit[index] && available[index] + amount <= limit[index]) { 27 if (new_value > limit[index] && available[index] + amount <= limit[index]) {
28 // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout 28 // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
29 new_value = current[index] + amount; 29 new_value = current[index] + amount;
30
31 if (timeout >= 0) {
32 break;
33 }
34 } 30 }
35 31
36 if (new_value <= limit[index]) { 32 if (new_value <= limit[index]) {
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 1140c72a3..2b12c0dbf 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -11,11 +11,15 @@
11#include <utility> 11#include <utility>
12 12
13#include "common/assert.h" 13#include "common/assert.h"
14#include "common/bit_util.h"
15#include "common/fiber.h"
14#include "common/logging/log.h" 16#include "common/logging/log.h"
15#include "core/arm/arm_interface.h" 17#include "core/arm/arm_interface.h"
16#include "core/core.h" 18#include "core/core.h"
17#include "core/core_timing.h" 19#include "core/core_timing.h"
20#include "core/cpu_manager.h"
18#include "core/hle/kernel/kernel.h" 21#include "core/hle/kernel/kernel.h"
22#include "core/hle/kernel/physical_core.h"
19#include "core/hle/kernel/process.h" 23#include "core/hle/kernel/process.h"
20#include "core/hle/kernel/scheduler.h" 24#include "core/hle/kernel/scheduler.h"
21#include "core/hle/kernel/time_manager.h" 25#include "core/hle/kernel/time_manager.h"
@@ -27,103 +31,151 @@ GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}
27GlobalScheduler::~GlobalScheduler() = default; 31GlobalScheduler::~GlobalScheduler() = default;
28 32
29void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) { 33void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) {
34 global_list_guard.lock();
30 thread_list.push_back(std::move(thread)); 35 thread_list.push_back(std::move(thread));
36 global_list_guard.unlock();
31} 37}
32 38
33void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) { 39void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
40 global_list_guard.lock();
34 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), 41 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
35 thread_list.end()); 42 thread_list.end());
43 global_list_guard.unlock();
36} 44}
37 45
38void GlobalScheduler::UnloadThread(std::size_t core) { 46u32 GlobalScheduler::SelectThreads() {
39 Scheduler& sched = kernel.Scheduler(core); 47 ASSERT(is_locked);
40 sched.UnloadThread();
41}
42
43void GlobalScheduler::SelectThread(std::size_t core) {
44 const auto update_thread = [](Thread* thread, Scheduler& sched) { 48 const auto update_thread = [](Thread* thread, Scheduler& sched) {
45 if (thread != sched.selected_thread.get()) { 49 sched.guard.lock();
50 if (thread != sched.selected_thread_set.get()) {
46 if (thread == nullptr) { 51 if (thread == nullptr) {
47 ++sched.idle_selection_count; 52 ++sched.idle_selection_count;
48 } 53 }
49 sched.selected_thread = SharedFrom(thread); 54 sched.selected_thread_set = SharedFrom(thread);
50 } 55 }
51 sched.is_context_switch_pending = sched.selected_thread != sched.current_thread; 56 const bool reschedule_pending =
57 sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread);
58 sched.is_context_switch_pending = reschedule_pending;
52 std::atomic_thread_fence(std::memory_order_seq_cst); 59 std::atomic_thread_fence(std::memory_order_seq_cst);
60 sched.guard.unlock();
61 return reschedule_pending;
53 }; 62 };
54 Scheduler& sched = kernel.Scheduler(core); 63 if (!is_reselection_pending.load()) {
55 Thread* current_thread = nullptr; 64 return 0;
56 // Step 1: Get top thread in schedule queue.
57 current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
58 if (current_thread) {
59 update_thread(current_thread, sched);
60 return;
61 } 65 }
62 // Step 2: Try selecting a suggested thread. 66 std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{};
63 Thread* winner = nullptr; 67
64 std::set<s32> sug_cores; 68 u32 idle_cores{};
65 for (auto thread : suggested_queue[core]) { 69
66 s32 this_core = thread->GetProcessorID(); 70 // Step 1: Get top thread in schedule queue.
67 Thread* thread_on_core = nullptr; 71 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
68 if (this_core >= 0) { 72 Thread* top_thread =
69 thread_on_core = scheduled_queue[this_core].front(); 73 scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
70 } 74 if (top_thread != nullptr) {
71 if (this_core < 0 || thread != thread_on_core) { 75 // TODO(Blinkhawk): Implement Thread Pinning
72 winner = thread; 76 } else {
73 break; 77 idle_cores |= (1ul << core);
74 } 78 }
75 sug_cores.insert(this_core); 79 top_threads[core] = top_thread;
76 } 80 }
77 // if we got a suggested thread, select it, else do a second pass. 81
78 if (winner && winner->GetPriority() > 2) { 82 while (idle_cores != 0) {
79 if (winner->IsRunning()) { 83 u32 core_id = Common::CountTrailingZeroes32(idle_cores);
80 UnloadThread(static_cast<u32>(winner->GetProcessorID())); 84
85 if (!suggested_queue[core_id].empty()) {
86 std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{};
87 std::size_t num_candidates = 0;
88 auto iter = suggested_queue[core_id].begin();
89 Thread* suggested = nullptr;
90 // Step 2: Try selecting a suggested thread.
91 while (iter != suggested_queue[core_id].end()) {
92 suggested = *iter;
93 iter++;
94 s32 suggested_core_id = suggested->GetProcessorID();
95 Thread* top_thread =
96 suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr;
97 if (top_thread != suggested) {
98 if (top_thread != nullptr &&
99 top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) {
100 suggested = nullptr;
101 break;
102 // There's a too high thread to do core migration, cancel
103 }
104 TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested);
105 break;
106 }
107 suggested = nullptr;
108 migration_candidates[num_candidates++] = suggested_core_id;
109 }
110 // Step 3: Select a suggested thread from another core
111 if (suggested == nullptr) {
112 for (std::size_t i = 0; i < num_candidates; i++) {
113 s32 candidate_core = migration_candidates[i];
114 suggested = top_threads[candidate_core];
115 auto it = scheduled_queue[candidate_core].begin();
116 it++;
117 Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr;
118 if (next != nullptr) {
119 TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id),
120 suggested);
121 top_threads[candidate_core] = next;
122 break;
123 } else {
124 suggested = nullptr;
125 }
126 }
127 }
128 top_threads[core_id] = suggested;
81 } 129 }
82 TransferToCore(winner->GetPriority(), static_cast<s32>(core), winner); 130
83 update_thread(winner, sched); 131 idle_cores &= ~(1ul << core_id);
84 return;
85 } 132 }
86 // Step 3: Select a suggested thread from another core 133 u32 cores_needing_context_switch{};
87 for (auto& src_core : sug_cores) { 134 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
88 auto it = scheduled_queue[src_core].begin(); 135 Scheduler& sched = kernel.Scheduler(core);
89 it++; 136 ASSERT(top_threads[core] == nullptr || top_threads[core]->GetProcessorID() == core);
90 if (it != scheduled_queue[src_core].end()) { 137 if (update_thread(top_threads[core], sched)) {
91 Thread* thread_on_core = scheduled_queue[src_core].front(); 138 cores_needing_context_switch |= (1ul << core);
92 Thread* to_change = *it;
93 if (thread_on_core->IsRunning() || to_change->IsRunning()) {
94 UnloadThread(static_cast<u32>(src_core));
95 }
96 TransferToCore(thread_on_core->GetPriority(), static_cast<s32>(core), thread_on_core);
97 current_thread = thread_on_core;
98 break;
99 } 139 }
100 } 140 }
101 update_thread(current_thread, sched); 141 return cores_needing_context_switch;
102} 142}
103 143
104bool GlobalScheduler::YieldThread(Thread* yielding_thread) { 144bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
145 ASSERT(is_locked);
105 // Note: caller should use critical section, etc. 146 // Note: caller should use critical section, etc.
147 if (!yielding_thread->IsRunnable()) {
148 // Normally this case shouldn't happen except for SetThreadActivity.
149 is_reselection_pending.store(true, std::memory_order_release);
150 return false;
151 }
106 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); 152 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
107 const u32 priority = yielding_thread->GetPriority(); 153 const u32 priority = yielding_thread->GetPriority();
108 154
109 // Yield the thread 155 // Yield the thread
110 const Thread* const winner = scheduled_queue[core_id].front(priority); 156 Reschedule(priority, core_id, yielding_thread);
111 ASSERT_MSG(yielding_thread == winner, "Thread yielding without being in front"); 157 const Thread* const winner = scheduled_queue[core_id].front();
112 scheduled_queue[core_id].yield(priority); 158 if (kernel.GetCurrentHostThreadID() != core_id) {
159 is_reselection_pending.store(true, std::memory_order_release);
160 }
113 161
114 return AskForReselectionOrMarkRedundant(yielding_thread, winner); 162 return AskForReselectionOrMarkRedundant(yielding_thread, winner);
115} 163}
116 164
117bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { 165bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
166 ASSERT(is_locked);
118 // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, 167 // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
119 // etc. 168 // etc.
169 if (!yielding_thread->IsRunnable()) {
170 // Normally this case shouldn't happen except for SetThreadActivity.
171 is_reselection_pending.store(true, std::memory_order_release);
172 return false;
173 }
120 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); 174 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
121 const u32 priority = yielding_thread->GetPriority(); 175 const u32 priority = yielding_thread->GetPriority();
122 176
123 // Yield the thread 177 // Yield the thread
124 ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), 178 Reschedule(priority, core_id, yielding_thread);
125 "Thread yielding without being in front");
126 scheduled_queue[core_id].yield(priority);
127 179
128 std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; 180 std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
129 for (std::size_t i = 0; i < current_threads.size(); i++) { 181 for (std::size_t i = 0; i < current_threads.size(); i++) {
@@ -153,21 +205,28 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
153 205
154 if (winner != nullptr) { 206 if (winner != nullptr) {
155 if (winner != yielding_thread) { 207 if (winner != yielding_thread) {
156 if (winner->IsRunning()) {
157 UnloadThread(static_cast<u32>(winner->GetProcessorID()));
158 }
159 TransferToCore(winner->GetPriority(), s32(core_id), winner); 208 TransferToCore(winner->GetPriority(), s32(core_id), winner);
160 } 209 }
161 } else { 210 } else {
162 winner = next_thread; 211 winner = next_thread;
163 } 212 }
164 213
214 if (kernel.GetCurrentHostThreadID() != core_id) {
215 is_reselection_pending.store(true, std::memory_order_release);
216 }
217
165 return AskForReselectionOrMarkRedundant(yielding_thread, winner); 218 return AskForReselectionOrMarkRedundant(yielding_thread, winner);
166} 219}
167 220
168bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { 221bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
222 ASSERT(is_locked);
169 // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, 223 // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
170 // etc. 224 // etc.
225 if (!yielding_thread->IsRunnable()) {
226 // Normally this case shouldn't happen except for SetThreadActivity.
227 is_reselection_pending.store(true, std::memory_order_release);
228 return false;
229 }
171 Thread* winner = nullptr; 230 Thread* winner = nullptr;
172 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); 231 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
173 232
@@ -195,25 +254,31 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
195 } 254 }
196 if (winner != nullptr) { 255 if (winner != nullptr) {
197 if (winner != yielding_thread) { 256 if (winner != yielding_thread) {
198 if (winner->IsRunning()) {
199 UnloadThread(static_cast<u32>(winner->GetProcessorID()));
200 }
201 TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner); 257 TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner);
202 } 258 }
203 } else { 259 } else {
204 winner = yielding_thread; 260 winner = yielding_thread;
205 } 261 }
262 } else {
263 winner = scheduled_queue[core_id].front();
264 }
265
266 if (kernel.GetCurrentHostThreadID() != core_id) {
267 is_reselection_pending.store(true, std::memory_order_release);
206 } 268 }
207 269
208 return AskForReselectionOrMarkRedundant(yielding_thread, winner); 270 return AskForReselectionOrMarkRedundant(yielding_thread, winner);
209} 271}
210 272
211void GlobalScheduler::PreemptThreads() { 273void GlobalScheduler::PreemptThreads() {
274 ASSERT(is_locked);
212 for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { 275 for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
213 const u32 priority = preemption_priorities[core_id]; 276 const u32 priority = preemption_priorities[core_id];
214 277
215 if (scheduled_queue[core_id].size(priority) > 0) { 278 if (scheduled_queue[core_id].size(priority) > 0) {
216 scheduled_queue[core_id].front(priority)->IncrementYieldCount(); 279 if (scheduled_queue[core_id].size(priority) > 1) {
280 scheduled_queue[core_id].front(priority)->IncrementYieldCount();
281 }
217 scheduled_queue[core_id].yield(priority); 282 scheduled_queue[core_id].yield(priority);
218 if (scheduled_queue[core_id].size(priority) > 1) { 283 if (scheduled_queue[core_id].size(priority) > 1) {
219 scheduled_queue[core_id].front(priority)->IncrementYieldCount(); 284 scheduled_queue[core_id].front(priority)->IncrementYieldCount();
@@ -247,9 +312,6 @@ void GlobalScheduler::PreemptThreads() {
247 } 312 }
248 313
249 if (winner != nullptr) { 314 if (winner != nullptr) {
250 if (winner->IsRunning()) {
251 UnloadThread(static_cast<u32>(winner->GetProcessorID()));
252 }
253 TransferToCore(winner->GetPriority(), s32(core_id), winner); 315 TransferToCore(winner->GetPriority(), s32(core_id), winner);
254 current_thread = 316 current_thread =
255 winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; 317 winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
@@ -280,9 +342,6 @@ void GlobalScheduler::PreemptThreads() {
280 } 342 }
281 343
282 if (winner != nullptr) { 344 if (winner != nullptr) {
283 if (winner->IsRunning()) {
284 UnloadThread(static_cast<u32>(winner->GetProcessorID()));
285 }
286 TransferToCore(winner->GetPriority(), s32(core_id), winner); 345 TransferToCore(winner->GetPriority(), s32(core_id), winner);
287 current_thread = winner; 346 current_thread = winner;
288 } 347 }
@@ -292,34 +351,65 @@ void GlobalScheduler::PreemptThreads() {
292 } 351 }
293} 352}
294 353
354void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
355 Core::EmuThreadHandle global_thread) {
356 u32 current_core = global_thread.host_handle;
357 bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
358 (current_core < Core::Hardware::NUM_CPU_CORES);
359 while (cores_pending_reschedule != 0) {
360 u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule);
361 ASSERT(core < Core::Hardware::NUM_CPU_CORES);
362 if (!must_context_switch || core != current_core) {
363 auto& phys_core = kernel.PhysicalCore(core);
364 phys_core.Interrupt();
365 } else {
366 must_context_switch = true;
367 }
368 cores_pending_reschedule &= ~(1ul << core);
369 }
370 if (must_context_switch) {
371 auto& core_scheduler = kernel.CurrentScheduler();
372 kernel.ExitSVCProfile();
373 core_scheduler.TryDoContextSwitch();
374 kernel.EnterSVCProfile();
375 }
376}
377
295void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) { 378void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) {
379 ASSERT(is_locked);
296 suggested_queue[core].add(thread, priority); 380 suggested_queue[core].add(thread, priority);
297} 381}
298 382
299void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) { 383void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) {
384 ASSERT(is_locked);
300 suggested_queue[core].remove(thread, priority); 385 suggested_queue[core].remove(thread, priority);
301} 386}
302 387
303void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) { 388void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) {
389 ASSERT(is_locked);
304 ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core."); 390 ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
305 scheduled_queue[core].add(thread, priority); 391 scheduled_queue[core].add(thread, priority);
306} 392}
307 393
308void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) { 394void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) {
395 ASSERT(is_locked);
309 ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core."); 396 ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
310 scheduled_queue[core].add(thread, priority, false); 397 scheduled_queue[core].add(thread, priority, false);
311} 398}
312 399
313void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) { 400void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) {
401 ASSERT(is_locked);
314 scheduled_queue[core].remove(thread, priority); 402 scheduled_queue[core].remove(thread, priority);
315 scheduled_queue[core].add(thread, priority); 403 scheduled_queue[core].add(thread, priority);
316} 404}
317 405
318void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) { 406void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) {
407 ASSERT(is_locked);
319 scheduled_queue[core].remove(thread, priority); 408 scheduled_queue[core].remove(thread, priority);
320} 409}
321 410
322void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) { 411void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
412 ASSERT(is_locked);
323 const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; 413 const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
324 const s32 source_core = thread->GetProcessorID(); 414 const s32 source_core = thread->GetProcessorID();
325 if (source_core == destination_core || !schedulable) { 415 if (source_core == destination_core || !schedulable) {
@@ -349,6 +439,108 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
349 } 439 }
350} 440}
351 441
442void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
443 if (old_flags == thread->scheduling_state) {
444 return;
445 }
446 ASSERT(is_locked);
447
448 if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) {
449 // In this case the thread was running, now it's pausing/exitting
450 if (thread->processor_id >= 0) {
451 Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
452 }
453
454 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
455 if (core != static_cast<u32>(thread->processor_id) &&
456 ((thread->affinity_mask >> core) & 1) != 0) {
457 Unsuggest(thread->current_priority, core, thread);
458 }
459 }
460 } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
461 // The thread is now set to running from being stopped
462 if (thread->processor_id >= 0) {
463 Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
464 }
465
466 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
467 if (core != static_cast<u32>(thread->processor_id) &&
468 ((thread->affinity_mask >> core) & 1) != 0) {
469 Suggest(thread->current_priority, core, thread);
470 }
471 }
472 }
473
474 SetReselectionPending();
475}
476
477void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) {
478 if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) {
479 return;
480 }
481 ASSERT(is_locked);
482 if (thread->processor_id >= 0) {
483 Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread);
484 }
485
486 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
487 if (core != static_cast<u32>(thread->processor_id) &&
488 ((thread->affinity_mask >> core) & 1) != 0) {
489 Unsuggest(old_priority, core, thread);
490 }
491 }
492
493 if (thread->processor_id >= 0) {
494 if (thread == kernel.CurrentScheduler().GetCurrentThread()) {
495 SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id),
496 thread);
497 } else {
498 Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
499 }
500 }
501
502 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
503 if (core != static_cast<u32>(thread->processor_id) &&
504 ((thread->affinity_mask >> core) & 1) != 0) {
505 Suggest(thread->current_priority, core, thread);
506 }
507 }
508 thread->IncrementYieldCount();
509 SetReselectionPending();
510}
511
512void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask,
513 s32 old_core) {
514 if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) ||
515 thread->current_priority >= THREADPRIO_COUNT) {
516 return;
517 }
518 ASSERT(is_locked);
519
520 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
521 if (((old_affinity_mask >> core) & 1) != 0) {
522 if (core == static_cast<u32>(old_core)) {
523 Unschedule(thread->current_priority, core, thread);
524 } else {
525 Unsuggest(thread->current_priority, core, thread);
526 }
527 }
528 }
529
530 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
531 if (((thread->affinity_mask >> core) & 1) != 0) {
532 if (core == static_cast<u32>(thread->processor_id)) {
533 Schedule(thread->current_priority, core, thread);
534 } else {
535 Suggest(thread->current_priority, core, thread);
536 }
537 }
538 }
539
540 thread->IncrementYieldCount();
541 SetReselectionPending();
542}
543
352void GlobalScheduler::Shutdown() { 544void GlobalScheduler::Shutdown() {
353 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { 545 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
354 scheduled_queue[core].clear(); 546 scheduled_queue[core].clear();
@@ -359,10 +551,12 @@ void GlobalScheduler::Shutdown() {
359 551
360void GlobalScheduler::Lock() { 552void GlobalScheduler::Lock() {
361 Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID(); 553 Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID();
554 ASSERT(!current_thread.IsInvalid());
362 if (current_thread == current_owner) { 555 if (current_thread == current_owner) {
363 ++scope_lock; 556 ++scope_lock;
364 } else { 557 } else {
365 inner_lock.lock(); 558 inner_lock.lock();
559 is_locked = true;
366 current_owner = current_thread; 560 current_owner = current_thread;
367 ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle()); 561 ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());
368 scope_lock = 1; 562 scope_lock = 1;
@@ -374,17 +568,18 @@ void GlobalScheduler::Unlock() {
374 ASSERT(scope_lock > 0); 568 ASSERT(scope_lock > 0);
375 return; 569 return;
376 } 570 }
377 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { 571 u32 cores_pending_reschedule = SelectThreads();
378 SelectThread(i); 572 Core::EmuThreadHandle leaving_thread = current_owner;
379 }
380 current_owner = Core::EmuThreadHandle::InvalidHandle(); 573 current_owner = Core::EmuThreadHandle::InvalidHandle();
381 scope_lock = 1; 574 scope_lock = 1;
575 is_locked = false;
382 inner_lock.unlock(); 576 inner_lock.unlock();
383 // TODO(Blinkhawk): Setup the interrupts and change context on current core. 577 EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
384} 578}
385 579
386Scheduler::Scheduler(Core::System& system, std::size_t core_id) 580Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) {
387 : system{system}, core_id{core_id} {} 581 switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
582}
388 583
389Scheduler::~Scheduler() = default; 584Scheduler::~Scheduler() = default;
390 585
@@ -393,56 +588,128 @@ bool Scheduler::HaveReadyThreads() const {
393} 588}
394 589
395Thread* Scheduler::GetCurrentThread() const { 590Thread* Scheduler::GetCurrentThread() const {
396 return current_thread.get(); 591 if (current_thread) {
592 return current_thread.get();
593 }
594 return idle_thread.get();
397} 595}
398 596
399Thread* Scheduler::GetSelectedThread() const { 597Thread* Scheduler::GetSelectedThread() const {
400 return selected_thread.get(); 598 return selected_thread.get();
401} 599}
402 600
403void Scheduler::SelectThreads() {
404 system.GlobalScheduler().SelectThread(core_id);
405}
406
407u64 Scheduler::GetLastContextSwitchTicks() const { 601u64 Scheduler::GetLastContextSwitchTicks() const {
408 return last_context_switch_time; 602 return last_context_switch_time;
409} 603}
410 604
411void Scheduler::TryDoContextSwitch() { 605void Scheduler::TryDoContextSwitch() {
606 auto& phys_core = system.Kernel().CurrentPhysicalCore();
607 if (phys_core.IsInterrupted()) {
608 phys_core.ClearInterrupt();
609 }
610 guard.lock();
412 if (is_context_switch_pending) { 611 if (is_context_switch_pending) {
413 SwitchContext(); 612 SwitchContext();
613 } else {
614 guard.unlock();
414 } 615 }
415} 616}
416 617
417void Scheduler::UnloadThread() { 618void Scheduler::OnThreadStart() {
418 Thread* const previous_thread = GetCurrentThread(); 619 SwitchContextStep2();
419 Process* const previous_process = system.Kernel().CurrentProcess(); 620}
420 621
421 UpdateLastContextSwitchTime(previous_thread, previous_process); 622void Scheduler::Unload() {
623 Thread* thread = current_thread.get();
624 if (thread) {
625 thread->SetContinuousOnSVC(false);
626 thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
627 thread->SetIsRunning(false);
628 if (!thread->IsHLEThread() && !thread->HasExited()) {
629 Core::ARM_Interface& cpu_core = thread->ArmInterface();
630 cpu_core.SaveContext(thread->GetContext32());
631 cpu_core.SaveContext(thread->GetContext64());
632 // Save the TPIDR_EL0 system register in case it was modified.
633 thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
634 cpu_core.ClearExclusiveState();
635 }
636 thread->context_guard.unlock();
637 }
638}
422 639
423 // Save context for previous thread 640void Scheduler::Reload() {
424 if (previous_thread) { 641 Thread* thread = current_thread.get();
425 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); 642 if (thread) {
426 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); 643 ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
427 // Save the TPIDR_EL0 system register in case it was modified. 644 "Thread must be runnable.");
428 previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
429 645
430 if (previous_thread->GetStatus() == ThreadStatus::Running) { 646 // Cancel any outstanding wakeup events for this thread
431 // This is only the case when a reschedule is triggered without the current thread 647 thread->SetIsRunning(true);
432 // yielding execution (i.e. an event triggered, system core time-sliced, etc) 648 thread->SetWasRunning(false);
433 previous_thread->SetStatus(ThreadStatus::Ready); 649 thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
650
651 auto* const thread_owner_process = thread->GetOwnerProcess();
652 if (thread_owner_process != nullptr) {
653 system.Kernel().MakeCurrentProcess(thread_owner_process);
654 }
655 if (!thread->IsHLEThread()) {
656 Core::ARM_Interface& cpu_core = thread->ArmInterface();
657 cpu_core.LoadContext(thread->GetContext32());
658 cpu_core.LoadContext(thread->GetContext64());
659 cpu_core.SetTlsAddress(thread->GetTLSAddress());
660 cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
661 cpu_core.ChangeProcessorID(this->core_id);
662 cpu_core.ClearExclusiveState();
434 } 663 }
435 previous_thread->SetIsRunning(false);
436 } 664 }
437 current_thread = nullptr; 665}
666
667void Scheduler::SwitchContextStep2() {
668 Thread* previous_thread = current_thread_prev.get();
669 Thread* new_thread = selected_thread.get();
670
671 // Load context of new thread
672 Process* const previous_process =
673 previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr;
674
675 if (new_thread) {
676 ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
677 "Thread must be runnable.");
678
679 // Cancel any outstanding wakeup events for this thread
680 new_thread->SetIsRunning(true);
681 new_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
682 new_thread->SetWasRunning(false);
683
684 auto* const thread_owner_process = current_thread->GetOwnerProcess();
685 if (thread_owner_process != nullptr) {
686 system.Kernel().MakeCurrentProcess(thread_owner_process);
687 }
688 if (!new_thread->IsHLEThread()) {
689 Core::ARM_Interface& cpu_core = new_thread->ArmInterface();
690 cpu_core.LoadContext(new_thread->GetContext32());
691 cpu_core.LoadContext(new_thread->GetContext64());
692 cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
693 cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
694 cpu_core.ChangeProcessorID(this->core_id);
695 cpu_core.ClearExclusiveState();
696 }
697 }
698
699 TryDoContextSwitch();
438} 700}
439 701
440void Scheduler::SwitchContext() { 702void Scheduler::SwitchContext() {
441 Thread* const previous_thread = GetCurrentThread(); 703 current_thread_prev = current_thread;
442 Thread* const new_thread = GetSelectedThread(); 704 selected_thread = selected_thread_set;
705 Thread* previous_thread = current_thread_prev.get();
706 Thread* new_thread = selected_thread.get();
707 current_thread = selected_thread;
443 708
444 is_context_switch_pending = false; 709 is_context_switch_pending = false;
710
445 if (new_thread == previous_thread) { 711 if (new_thread == previous_thread) {
712 guard.unlock();
446 return; 713 return;
447 } 714 }
448 715
@@ -452,51 +719,75 @@ void Scheduler::SwitchContext() {
452 719
453 // Save context for previous thread 720 // Save context for previous thread
454 if (previous_thread) { 721 if (previous_thread) {
455 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); 722 if (new_thread != nullptr && new_thread->IsSuspendThread()) {
456 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); 723 previous_thread->SetWasRunning(true);
457 // Save the TPIDR_EL0 system register in case it was modified.
458 previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
459
460 if (previous_thread->GetStatus() == ThreadStatus::Running) {
461 // This is only the case when a reschedule is triggered without the current thread
462 // yielding execution (i.e. an event triggered, system core time-sliced, etc)
463 previous_thread->SetStatus(ThreadStatus::Ready);
464 } 724 }
725 previous_thread->SetContinuousOnSVC(false);
726 previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
465 previous_thread->SetIsRunning(false); 727 previous_thread->SetIsRunning(false);
466 } 728 if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) {
467 729 Core::ARM_Interface& cpu_core = previous_thread->ArmInterface();
468 // Load context of new thread 730 cpu_core.SaveContext(previous_thread->GetContext32());
469 if (new_thread) { 731 cpu_core.SaveContext(previous_thread->GetContext64());
470 ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id), 732 // Save the TPIDR_EL0 system register in case it was modified.
471 "Thread must be assigned to this core."); 733 previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
472 ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, 734 cpu_core.ClearExclusiveState();
473 "Thread must be ready to become running.");
474
475 // Cancel any outstanding wakeup events for this thread
476 new_thread->CancelWakeupTimer();
477 current_thread = SharedFrom(new_thread);
478 new_thread->SetStatus(ThreadStatus::Running);
479 new_thread->SetIsRunning(true);
480
481 auto* const thread_owner_process = current_thread->GetOwnerProcess();
482 if (previous_process != thread_owner_process) {
483 system.Kernel().MakeCurrentProcess(thread_owner_process);
484 } 735 }
736 previous_thread->context_guard.unlock();
737 }
485 738
486 system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); 739 std::shared_ptr<Common::Fiber>* old_context;
487 system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); 740 if (previous_thread != nullptr) {
488 system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); 741 old_context = &previous_thread->GetHostContext();
489 system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
490 } else { 742 } else {
491 current_thread = nullptr; 743 old_context = &idle_thread->GetHostContext();
492 // Note: We do not reset the current process and current page table when idling because 744 }
493 // technically we haven't changed processes, our threads are just paused. 745 guard.unlock();
746
747 Common::Fiber::YieldTo(*old_context, switch_fiber);
748 /// When a thread wakes up, the scheduler may have changed to other in another core.
749 auto& next_scheduler = system.Kernel().CurrentScheduler();
750 next_scheduler.SwitchContextStep2();
751}
752
753void Scheduler::OnSwitch(void* this_scheduler) {
754 Scheduler* sched = static_cast<Scheduler*>(this_scheduler);
755 sched->SwitchToCurrent();
756}
757
758void Scheduler::SwitchToCurrent() {
759 while (true) {
760 guard.lock();
761 selected_thread = selected_thread_set;
762 current_thread = selected_thread;
763 is_context_switch_pending = false;
764 guard.unlock();
765 while (!is_context_switch_pending) {
766 if (current_thread != nullptr && !current_thread->IsHLEThread()) {
767 current_thread->context_guard.lock();
768 if (!current_thread->IsRunnable()) {
769 current_thread->context_guard.unlock();
770 break;
771 }
772 if (current_thread->GetProcessorID() != core_id) {
773 current_thread->context_guard.unlock();
774 break;
775 }
776 }
777 std::shared_ptr<Common::Fiber>* next_context;
778 if (current_thread != nullptr) {
779 next_context = &current_thread->GetHostContext();
780 } else {
781 next_context = &idle_thread->GetHostContext();
782 }
783 Common::Fiber::YieldTo(switch_fiber, *next_context);
784 }
494 } 785 }
495} 786}
496 787
497void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 788void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
498 const u64 prev_switch_ticks = last_context_switch_time; 789 const u64 prev_switch_ticks = last_context_switch_time;
499 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); 790 const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
500 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 791 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
501 792
502 if (thread != nullptr) { 793 if (thread != nullptr) {
@@ -510,6 +801,16 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
510 last_context_switch_time = most_recent_switch_ticks; 801 last_context_switch_time = most_recent_switch_ticks;
511} 802}
512 803
804void Scheduler::Initialize() {
805 std::string name = "Idle Thread Id:" + std::to_string(core_id);
806 std::function<void(void*)> init_func = system.GetCpuManager().GetIdleThreadStartFunc();
807 void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
808 ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
809 auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
810 nullptr, std::move(init_func), init_func_parameter);
811 idle_thread = std::move(thread_res).Unwrap();
812}
813
513void Scheduler::Shutdown() { 814void Scheduler::Shutdown() {
514 current_thread = nullptr; 815 current_thread = nullptr;
515 selected_thread = nullptr; 816 selected_thread = nullptr;
@@ -538,4 +839,13 @@ SchedulerLockAndSleep::~SchedulerLockAndSleep() {
538 time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); 839 time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
539} 840}
540 841
842void SchedulerLockAndSleep::Release() {
843 if (sleep_cancelled) {
844 return;
845 }
846 auto& time_manager = kernel.TimeManager();
847 time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
848 sleep_cancelled = true;
849}
850
541} // namespace Kernel 851} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 07df33f9c..b3b4b5169 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -11,9 +11,14 @@
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/multi_level_queue.h" 13#include "common/multi_level_queue.h"
14#include "common/spin_lock.h"
14#include "core/hardware_properties.h" 15#include "core/hardware_properties.h"
15#include "core/hle/kernel/thread.h" 16#include "core/hle/kernel/thread.h"
16 17
18namespace Common {
19class Fiber;
20}
21
17namespace Core { 22namespace Core {
18class ARM_Interface; 23class ARM_Interface;
19class System; 24class System;
@@ -41,41 +46,17 @@ public:
41 return thread_list; 46 return thread_list;
42 } 47 }
43 48
44 /** 49 /// Notify the scheduler a thread's status has changed.
45 * Add a thread to the suggested queue of a cpu core. Suggested threads may be 50 void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags);
46 * picked if no thread is scheduled to run on the core.
47 */
48 void Suggest(u32 priority, std::size_t core, Thread* thread);
49
50 /**
51 * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
52 * picked if no thread is scheduled to run on the core.
53 */
54 void Unsuggest(u32 priority, std::size_t core, Thread* thread);
55
56 /**
57 * Add a thread to the scheduling queue of a cpu core. The thread is added at the
58 * back the queue in its priority level.
59 */
60 void Schedule(u32 priority, std::size_t core, Thread* thread);
61
62 /**
63 * Add a thread to the scheduling queue of a cpu core. The thread is added at the
64 * front the queue in its priority level.
65 */
66 void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
67 51
68 /// Reschedule an already scheduled thread based on a new priority 52 /// Notify the scheduler a thread's priority has changed.
69 void Reschedule(u32 priority, std::size_t core, Thread* thread); 53 void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority);
70
71 /// Unschedules a thread.
72 void Unschedule(u32 priority, std::size_t core, Thread* thread);
73 54
74 /// Selects a core and forces it to unload its current thread's context 55 /// Notify the scheduler a thread's core and/or affinity mask has changed.
75 void UnloadThread(std::size_t core); 56 void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core);
76 57
77 /** 58 /**
78 * Takes care of selecting the new scheduled thread in three steps: 59 * Takes care of selecting the new scheduled threads in three steps:
79 * 60 *
80 * 1. First a thread is selected from the top of the priority queue. If no thread 61 * 1. First a thread is selected from the top of the priority queue. If no thread
81 * is obtained then we move to step two, else we are done. 62 * is obtained then we move to step two, else we are done.
@@ -85,8 +66,10 @@ public:
85 * 66 *
86 * 3. Third is no suggested thread is found, we do a second pass and pick a running 67 * 3. Third is no suggested thread is found, we do a second pass and pick a running
87 * thread in another core and swap it with its current thread. 68 * thread in another core and swap it with its current thread.
69 *
70 * returns the cores needing scheduling.
88 */ 71 */
89 void SelectThread(std::size_t core); 72 u32 SelectThreads();
90 73
91 bool HaveReadyThreads(std::size_t core_id) const { 74 bool HaveReadyThreads(std::size_t core_id) const {
92 return !scheduled_queue[core_id].empty(); 75 return !scheduled_queue[core_id].empty();
@@ -149,6 +132,40 @@ private:
149 /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling 132 /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling
150 /// and reschedules current core if needed. 133 /// and reschedules current core if needed.
151 void Unlock(); 134 void Unlock();
135
136 void EnableInterruptAndSchedule(u32 cores_pending_reschedule,
137 Core::EmuThreadHandle global_thread);
138
139 /**
140 * Add a thread to the suggested queue of a cpu core. Suggested threads may be
141 * picked if no thread is scheduled to run on the core.
142 */
143 void Suggest(u32 priority, std::size_t core, Thread* thread);
144
145 /**
146 * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
147 * picked if no thread is scheduled to run on the core.
148 */
149 void Unsuggest(u32 priority, std::size_t core, Thread* thread);
150
151 /**
152 * Add a thread to the scheduling queue of a cpu core. The thread is added at the
153 * back the queue in its priority level.
154 */
155 void Schedule(u32 priority, std::size_t core, Thread* thread);
156
157 /**
158 * Add a thread to the scheduling queue of a cpu core. The thread is added at the
159 * front the queue in its priority level.
160 */
161 void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
162
163 /// Reschedule an already scheduled thread based on a new priority
164 void Reschedule(u32 priority, std::size_t core, Thread* thread);
165
166 /// Unschedules a thread.
167 void Unschedule(u32 priority, std::size_t core, Thread* thread);
168
152 /** 169 /**
153 * Transfers a thread into an specific core. If the destination_core is -1 170 * Transfers a thread into an specific core. If the destination_core is -1
154 * it will be unscheduled from its source code and added into its suggested 171 * it will be unscheduled from its source code and added into its suggested
@@ -170,10 +187,13 @@ private:
170 std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; 187 std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
171 188
172 /// Scheduler lock mechanisms. 189 /// Scheduler lock mechanisms.
173 std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock 190 bool is_locked{};
191 Common::SpinLock inner_lock{};
174 std::atomic<s64> scope_lock{}; 192 std::atomic<s64> scope_lock{};
175 Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; 193 Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
176 194
195 Common::SpinLock global_list_guard{};
196
177 /// Lists all thread ids that aren't deleted/etc. 197 /// Lists all thread ids that aren't deleted/etc.
178 std::vector<std::shared_ptr<Thread>> thread_list; 198 std::vector<std::shared_ptr<Thread>> thread_list;
179 KernelCore& kernel; 199 KernelCore& kernel;
@@ -190,11 +210,11 @@ public:
190 /// Reschedules to the next available thread (call after current thread is suspended) 210 /// Reschedules to the next available thread (call after current thread is suspended)
191 void TryDoContextSwitch(); 211 void TryDoContextSwitch();
192 212
193 /// Unloads currently running thread 213 /// The next two are for SingleCore Only.
194 void UnloadThread(); 214 /// Unload current thread before preempting core.
195 215 void Unload();
196 /// Select the threads in top of the scheduling multilist. 216 /// Reload current thread after core preemption.
197 void SelectThreads(); 217 void Reload();
198 218
199 /// Gets the current running thread 219 /// Gets the current running thread
200 Thread* GetCurrentThread() const; 220 Thread* GetCurrentThread() const;
@@ -209,15 +229,30 @@ public:
209 return is_context_switch_pending; 229 return is_context_switch_pending;
210 } 230 }
211 231
232 void Initialize();
233
212 /// Shutdowns the scheduler. 234 /// Shutdowns the scheduler.
213 void Shutdown(); 235 void Shutdown();
214 236
237 void OnThreadStart();
238
239 std::shared_ptr<Common::Fiber>& ControlContext() {
240 return switch_fiber;
241 }
242
243 const std::shared_ptr<Common::Fiber>& ControlContext() const {
244 return switch_fiber;
245 }
246
215private: 247private:
216 friend class GlobalScheduler; 248 friend class GlobalScheduler;
217 249
218 /// Switches the CPU's active thread context to that of the specified thread 250 /// Switches the CPU's active thread context to that of the specified thread
219 void SwitchContext(); 251 void SwitchContext();
220 252
253 /// When a thread wakes up, it must run this through it's new scheduler
254 void SwitchContextStep2();
255
221 /** 256 /**
222 * Called on every context switch to update the internal timestamp 257 * Called on every context switch to update the internal timestamp
223 * This also updates the running time ticks for the given thread and 258 * This also updates the running time ticks for the given thread and
@@ -231,14 +266,24 @@ private:
231 */ 266 */
232 void UpdateLastContextSwitchTime(Thread* thread, Process* process); 267 void UpdateLastContextSwitchTime(Thread* thread, Process* process);
233 268
269 static void OnSwitch(void* this_scheduler);
270 void SwitchToCurrent();
271
234 std::shared_ptr<Thread> current_thread = nullptr; 272 std::shared_ptr<Thread> current_thread = nullptr;
235 std::shared_ptr<Thread> selected_thread = nullptr; 273 std::shared_ptr<Thread> selected_thread = nullptr;
274 std::shared_ptr<Thread> current_thread_prev = nullptr;
275 std::shared_ptr<Thread> selected_thread_set = nullptr;
276 std::shared_ptr<Thread> idle_thread = nullptr;
277
278 std::shared_ptr<Common::Fiber> switch_fiber = nullptr;
236 279
237 Core::System& system; 280 Core::System& system;
238 u64 last_context_switch_time = 0; 281 u64 last_context_switch_time = 0;
239 u64 idle_selection_count = 0; 282 u64 idle_selection_count = 0;
240 const std::size_t core_id; 283 const std::size_t core_id;
241 284
285 Common::SpinLock guard{};
286
242 bool is_context_switch_pending = false; 287 bool is_context_switch_pending = false;
243}; 288};
244 289
@@ -261,6 +306,8 @@ public:
261 sleep_cancelled = true; 306 sleep_cancelled = true;
262 } 307 }
263 308
309 void Release();
310
264private: 311private:
265 Handle& event_handle; 312 Handle& event_handle;
266 Thread* time_task; 313 Thread* time_task;
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 25438b86b..7b23a6889 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -17,6 +17,7 @@
17#include "core/hle/kernel/hle_ipc.h" 17#include "core/hle/kernel/hle_ipc.h"
18#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
19#include "core/hle/kernel/process.h" 19#include "core/hle/kernel/process.h"
20#include "core/hle/kernel/scheduler.h"
20#include "core/hle/kernel/server_session.h" 21#include "core/hle/kernel/server_session.h"
21#include "core/hle/kernel/session.h" 22#include "core/hle/kernel/session.h"
22#include "core/hle/kernel/thread.h" 23#include "core/hle/kernel/thread.h"
@@ -168,9 +169,12 @@ ResultCode ServerSession::CompleteSyncRequest() {
168 } 169 }
169 170
170 // Some service requests require the thread to block 171 // Some service requests require the thread to block
171 if (!context.IsThreadWaiting()) { 172 {
172 context.GetThread().ResumeFromWait(); 173 SchedulerLock lock(kernel);
173 context.GetThread().SetWaitSynchronizationResult(result); 174 if (!context.IsThreadWaiting()) {
175 context.GetThread().ResumeFromWait();
176 context.GetThread().SetSynchronizationResults(nullptr, result);
177 }
174 } 178 }
175 179
176 request_queue.Pop(); 180 request_queue.Pop();
@@ -180,8 +184,10 @@ ResultCode ServerSession::CompleteSyncRequest() {
180 184
181ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, 185ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
182 Core::Memory::Memory& memory) { 186 Core::Memory::Memory& memory) {
183 Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {}); 187 ResultCode result = QueueSyncRequest(std::move(thread), memory);
184 return QueueSyncRequest(std::move(thread), memory); 188 const u64 delay = kernel.IsMulticore() ? 0U : 20000U;
189 Core::System::GetInstance().CoreTiming().ScheduleEvent(delay, request_event, {});
190 return result;
185} 191}
186 192
187} // namespace Kernel 193} // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 4ae4529f5..5db19dcf3 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -10,14 +10,15 @@
10 10
11#include "common/alignment.h" 11#include "common/alignment.h"
12#include "common/assert.h" 12#include "common/assert.h"
13#include "common/fiber.h"
13#include "common/logging/log.h" 14#include "common/logging/log.h"
14#include "common/microprofile.h" 15#include "common/microprofile.h"
15#include "common/string_util.h" 16#include "common/string_util.h"
16#include "core/arm/exclusive_monitor.h" 17#include "core/arm/exclusive_monitor.h"
17#include "core/core.h" 18#include "core/core.h"
18#include "core/core_manager.h"
19#include "core/core_timing.h" 19#include "core/core_timing.h"
20#include "core/core_timing_util.h" 20#include "core/core_timing_util.h"
21#include "core/cpu_manager.h"
21#include "core/hle/kernel/address_arbiter.h" 22#include "core/hle/kernel/address_arbiter.h"
22#include "core/hle/kernel/client_port.h" 23#include "core/hle/kernel/client_port.h"
23#include "core/hle/kernel/client_session.h" 24#include "core/hle/kernel/client_session.h"
@@ -27,6 +28,7 @@
27#include "core/hle/kernel/memory/memory_block.h" 28#include "core/hle/kernel/memory/memory_block.h"
28#include "core/hle/kernel/memory/page_table.h" 29#include "core/hle/kernel/memory/page_table.h"
29#include "core/hle/kernel/mutex.h" 30#include "core/hle/kernel/mutex.h"
31#include "core/hle/kernel/physical_core.h"
30#include "core/hle/kernel/process.h" 32#include "core/hle/kernel/process.h"
31#include "core/hle/kernel/readable_event.h" 33#include "core/hle/kernel/readable_event.h"
32#include "core/hle/kernel/resource_limit.h" 34#include "core/hle/kernel/resource_limit.h"
@@ -37,6 +39,7 @@
37#include "core/hle/kernel/svc_wrap.h" 39#include "core/hle/kernel/svc_wrap.h"
38#include "core/hle/kernel/synchronization.h" 40#include "core/hle/kernel/synchronization.h"
39#include "core/hle/kernel/thread.h" 41#include "core/hle/kernel/thread.h"
42#include "core/hle/kernel/time_manager.h"
40#include "core/hle/kernel/transfer_memory.h" 43#include "core/hle/kernel/transfer_memory.h"
41#include "core/hle/kernel/writable_event.h" 44#include "core/hle/kernel/writable_event.h"
42#include "core/hle/lock.h" 45#include "core/hle/lock.h"
@@ -133,6 +136,7 @@ enum class ResourceLimitValueType {
133 136
134ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, 137ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
135 u32 resource_type, ResourceLimitValueType value_type) { 138 u32 resource_type, ResourceLimitValueType value_type) {
139 std::lock_guard lock{HLE::g_hle_lock};
136 const auto type = static_cast<ResourceType>(resource_type); 140 const auto type = static_cast<ResourceType>(resource_type);
137 if (!IsValidResourceType(type)) { 141 if (!IsValidResourceType(type)) {
138 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); 142 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
@@ -160,6 +164,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
160 164
161/// Set the process heap to a given Size. It can both extend and shrink the heap. 165/// Set the process heap to a given Size. It can both extend and shrink the heap.
162static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) { 166static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
167 std::lock_guard lock{HLE::g_hle_lock};
163 LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size); 168 LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
164 169
165 // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB. 170 // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
@@ -190,6 +195,7 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s
190 195
191static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask, 196static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
192 u32 attribute) { 197 u32 attribute) {
198 std::lock_guard lock{HLE::g_hle_lock};
193 LOG_DEBUG(Kernel_SVC, 199 LOG_DEBUG(Kernel_SVC,
194 "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address, 200 "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,
195 size, mask, attribute); 201 size, mask, attribute);
@@ -226,8 +232,15 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
226 static_cast<Memory::MemoryAttribute>(attribute)); 232 static_cast<Memory::MemoryAttribute>(attribute));
227} 233}
228 234
235static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask,
236 u32 attribute) {
237 return SetMemoryAttribute(system, static_cast<VAddr>(address), static_cast<std::size_t>(size),
238 mask, attribute);
239}
240
229/// Maps a memory range into a different range. 241/// Maps a memory range into a different range.
230static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { 242static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
243 std::lock_guard lock{HLE::g_hle_lock};
231 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, 244 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
232 src_addr, size); 245 src_addr, size);
233 246
@@ -241,8 +254,14 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr
241 return page_table.Map(dst_addr, src_addr, size); 254 return page_table.Map(dst_addr, src_addr, size);
242} 255}
243 256
257static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
258 return MapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr),
259 static_cast<std::size_t>(size));
260}
261
244/// Unmaps a region that was previously mapped with svcMapMemory 262/// Unmaps a region that was previously mapped with svcMapMemory
245static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { 263static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
264 std::lock_guard lock{HLE::g_hle_lock};
246 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, 265 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
247 src_addr, size); 266 src_addr, size);
248 267
@@ -256,9 +275,15 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
256 return page_table.Unmap(dst_addr, src_addr, size); 275 return page_table.Unmap(dst_addr, src_addr, size);
257} 276}
258 277
278static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
279 return UnmapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr),
280 static_cast<std::size_t>(size));
281}
282
259/// Connect to an OS service given the port name, returns the handle to the port to out 283/// Connect to an OS service given the port name, returns the handle to the port to out
260static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, 284static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
261 VAddr port_name_address) { 285 VAddr port_name_address) {
286 std::lock_guard lock{HLE::g_hle_lock};
262 auto& memory = system.Memory(); 287 auto& memory = system.Memory();
263 288
264 if (!memory.IsValidVirtualAddress(port_name_address)) { 289 if (!memory.IsValidVirtualAddress(port_name_address)) {
@@ -317,11 +342,30 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
317 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); 342 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
318 343
319 auto thread = system.CurrentScheduler().GetCurrentThread(); 344 auto thread = system.CurrentScheduler().GetCurrentThread();
320 thread->InvalidateWakeupCallback(); 345 {
321 thread->SetStatus(ThreadStatus::WaitIPC); 346 SchedulerLock lock(system.Kernel());
322 system.PrepareReschedule(thread->GetProcessorID()); 347 thread->InvalidateHLECallback();
348 thread->SetStatus(ThreadStatus::WaitIPC);
349 session->SendSyncRequest(SharedFrom(thread), system.Memory());
350 }
351
352 if (thread->HasHLECallback()) {
353 Handle event_handle = thread->GetHLETimeEvent();
354 if (event_handle != InvalidHandle) {
355 auto& time_manager = system.Kernel().TimeManager();
356 time_manager.UnscheduleTimeEvent(event_handle);
357 }
358
359 {
360 SchedulerLock lock(system.Kernel());
361 auto* sync_object = thread->GetHLESyncObject();
362 sync_object->RemoveWaitingThread(SharedFrom(thread));
363 }
364
365 thread->InvokeHLECallback(SharedFrom(thread));
366 }
323 367
324 return session->SendSyncRequest(SharedFrom(thread), system.Memory()); 368 return thread->GetSignalingResult();
325} 369}
326 370
327static ResultCode SendSyncRequest32(Core::System& system, Handle handle) { 371static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {
@@ -383,6 +427,15 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
383 return ERR_INVALID_HANDLE; 427 return ERR_INVALID_HANDLE;
384} 428}
385 429
430static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
431 Handle handle) {
432 u64 process_id{};
433 const auto result = GetProcessId(system, &process_id, handle);
434 *process_id_low = static_cast<u32>(process_id);
435 *process_id_high = static_cast<u32>(process_id >> 32);
436 return result;
437}
438
386/// Wait for the given handles to synchronize, timeout after the specified nanoseconds 439/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
387static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address, 440static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
388 u64 handle_count, s64 nano_seconds) { 441 u64 handle_count, s64 nano_seconds) {
@@ -447,10 +500,13 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
447 } 500 }
448 501
449 thread->CancelWait(); 502 thread->CancelWait();
450 system.PrepareReschedule(thread->GetProcessorID());
451 return RESULT_SUCCESS; 503 return RESULT_SUCCESS;
452} 504}
453 505
506static ResultCode CancelSynchronization32(Core::System& system, Handle thread_handle) {
507 return CancelSynchronization(system, thread_handle);
508}
509
454/// Attempts to locks a mutex, creating it if it does not already exist 510/// Attempts to locks a mutex, creating it if it does not already exist
455static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle, 511static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
456 VAddr mutex_addr, Handle requesting_thread_handle) { 512 VAddr mutex_addr, Handle requesting_thread_handle) {
@@ -475,6 +531,12 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand
475 requesting_thread_handle); 531 requesting_thread_handle);
476} 532}
477 533
534static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle,
535 u32 mutex_addr, Handle requesting_thread_handle) {
536 return ArbitrateLock(system, holding_thread_handle, static_cast<VAddr>(mutex_addr),
537 requesting_thread_handle);
538}
539
478/// Unlock a mutex 540/// Unlock a mutex
479static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { 541static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
480 LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); 542 LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
@@ -494,6 +556,10 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
494 return current_process->GetMutex().Release(mutex_addr); 556 return current_process->GetMutex().Release(mutex_addr);
495} 557}
496 558
559static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) {
560 return ArbitrateUnlock(system, static_cast<VAddr>(mutex_addr));
561}
562
497enum class BreakType : u32 { 563enum class BreakType : u32 {
498 Panic = 0, 564 Panic = 0,
499 AssertionFailed = 1, 565 AssertionFailed = 1,
@@ -594,6 +660,7 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
594 info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt); 660 info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt);
595 661
596 if (!break_reason.signal_debugger) { 662 if (!break_reason.signal_debugger) {
663 SchedulerLock lock(system.Kernel());
597 LOG_CRITICAL( 664 LOG_CRITICAL(
598 Debug_Emulated, 665 Debug_Emulated,
599 "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", 666 "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
@@ -605,14 +672,16 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
605 const auto thread_processor_id = current_thread->GetProcessorID(); 672 const auto thread_processor_id = current_thread->GetProcessorID();
606 system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); 673 system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
607 674
608 system.Kernel().CurrentProcess()->PrepareForTermination();
609
610 // Kill the current thread 675 // Kill the current thread
676 system.Kernel().ExceptionalExit();
611 current_thread->Stop(); 677 current_thread->Stop();
612 system.PrepareReschedule();
613 } 678 }
614} 679}
615 680
681static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) {
682 Break(system, reason, static_cast<u64>(info1), static_cast<u64>(info2));
683}
684
616/// Used to output a message on a debug hardware unit - does nothing on a retail unit 685/// Used to output a message on a debug hardware unit - does nothing on a retail unit
617static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) { 686static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) {
618 if (len == 0) { 687 if (len == 0) {
@@ -627,6 +696,7 @@ static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr addre
627/// Gets system/memory information for the current process 696/// Gets system/memory information for the current process
628static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle, 697static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,
629 u64 info_sub_id) { 698 u64 info_sub_id) {
699 std::lock_guard lock{HLE::g_hle_lock};
630 LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id, 700 LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
631 info_sub_id, handle); 701 info_sub_id, handle);
632 702
@@ -863,9 +933,9 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
863 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { 933 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
864 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); 934 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
865 935
866 out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks); 936 out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);
867 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { 937 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
868 out_ticks = core_timing.GetTicks() - prev_ctx_ticks; 938 out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;
869 } 939 }
870 940
871 *result = out_ticks; 941 *result = out_ticks;
@@ -892,6 +962,7 @@ static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_h
892 962
893/// Maps memory at a desired address 963/// Maps memory at a desired address
894static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { 964static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
965 std::lock_guard lock{HLE::g_hle_lock};
895 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); 966 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
896 967
897 if (!Common::Is4KBAligned(addr)) { 968 if (!Common::Is4KBAligned(addr)) {
@@ -939,8 +1010,13 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
939 return page_table.MapPhysicalMemory(addr, size); 1010 return page_table.MapPhysicalMemory(addr, size);
940} 1011}
941 1012
1013static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) {
1014 return MapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size));
1015}
1016
942/// Unmaps memory previously mapped via MapPhysicalMemory 1017/// Unmaps memory previously mapped via MapPhysicalMemory
943static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { 1018static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
1019 std::lock_guard lock{HLE::g_hle_lock};
944 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); 1020 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
945 1021
946 if (!Common::Is4KBAligned(addr)) { 1022 if (!Common::Is4KBAligned(addr)) {
@@ -988,6 +1064,10 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
988 return page_table.UnmapPhysicalMemory(addr, size); 1064 return page_table.UnmapPhysicalMemory(addr, size);
989} 1065}
990 1066
1067static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) {
1068 return UnmapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size));
1069}
1070
991/// Sets the thread activity 1071/// Sets the thread activity
992static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { 1072static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
993 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); 1073 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
@@ -1017,10 +1097,11 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
1017 return ERR_BUSY; 1097 return ERR_BUSY;
1018 } 1098 }
1019 1099
1020 thread->SetActivity(static_cast<ThreadActivity>(activity)); 1100 return thread->SetActivity(static_cast<ThreadActivity>(activity));
1101}
1021 1102
1022 system.PrepareReschedule(thread->GetProcessorID()); 1103static ResultCode SetThreadActivity32(Core::System& system, Handle handle, u32 activity) {
1023 return RESULT_SUCCESS; 1104 return SetThreadActivity(system, handle, activity);
1024} 1105}
1025 1106
1026/// Gets the thread context 1107/// Gets the thread context
@@ -1064,6 +1145,10 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
1064 return RESULT_SUCCESS; 1145 return RESULT_SUCCESS;
1065} 1146}
1066 1147
1148static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) {
1149 return GetThreadContext(system, static_cast<VAddr>(thread_context), handle);
1150}
1151
1067/// Gets the priority for the specified thread 1152/// Gets the priority for the specified thread
1068static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) { 1153static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) {
1069 LOG_TRACE(Kernel_SVC, "called"); 1154 LOG_TRACE(Kernel_SVC, "called");
@@ -1071,6 +1156,7 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle
1071 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1156 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1072 const std::shared_ptr<Thread> thread = handle_table.Get<Thread>(handle); 1157 const std::shared_ptr<Thread> thread = handle_table.Get<Thread>(handle);
1073 if (!thread) { 1158 if (!thread) {
1159 *priority = 0;
1074 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); 1160 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
1075 return ERR_INVALID_HANDLE; 1161 return ERR_INVALID_HANDLE;
1076 } 1162 }
@@ -1105,18 +1191,26 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
1105 1191
1106 thread->SetPriority(priority); 1192 thread->SetPriority(priority);
1107 1193
1108 system.PrepareReschedule(thread->GetProcessorID());
1109 return RESULT_SUCCESS; 1194 return RESULT_SUCCESS;
1110} 1195}
1111 1196
1197static ResultCode SetThreadPriority32(Core::System& system, Handle handle, u32 priority) {
1198 return SetThreadPriority(system, handle, priority);
1199}
1200
1112/// Get which CPU core is executing the current thread 1201/// Get which CPU core is executing the current thread
1113static u32 GetCurrentProcessorNumber(Core::System& system) { 1202static u32 GetCurrentProcessorNumber(Core::System& system) {
1114 LOG_TRACE(Kernel_SVC, "called"); 1203 LOG_TRACE(Kernel_SVC, "called");
1115 return system.CurrentScheduler().GetCurrentThread()->GetProcessorID(); 1204 return static_cast<u32>(system.CurrentPhysicalCore().CoreIndex());
1205}
1206
1207static u32 GetCurrentProcessorNumber32(Core::System& system) {
1208 return GetCurrentProcessorNumber(system);
1116} 1209}
1117 1210
1118static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr, 1211static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
1119 u64 size, u32 permissions) { 1212 u64 size, u32 permissions) {
1213 std::lock_guard lock{HLE::g_hle_lock};
1120 LOG_TRACE(Kernel_SVC, 1214 LOG_TRACE(Kernel_SVC,
1121 "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}", 1215 "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
1122 shared_memory_handle, addr, size, permissions); 1216 shared_memory_handle, addr, size, permissions);
@@ -1187,9 +1281,16 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1187 return shared_memory->Map(*current_process, addr, size, permission_type); 1281 return shared_memory->Map(*current_process, addr, size, permission_type);
1188} 1282}
1189 1283
1284static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr,
1285 u32 size, u32 permissions) {
1286 return MapSharedMemory(system, shared_memory_handle, static_cast<VAddr>(addr),
1287 static_cast<std::size_t>(size), permissions);
1288}
1289
1190static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address, 1290static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
1191 VAddr page_info_address, Handle process_handle, 1291 VAddr page_info_address, Handle process_handle,
1192 VAddr address) { 1292 VAddr address) {
1293 std::lock_guard lock{HLE::g_hle_lock};
1193 LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address); 1294 LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);
1194 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1295 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1195 std::shared_ptr<Process> process = handle_table.Get<Process>(process_handle); 1296 std::shared_ptr<Process> process = handle_table.Get<Process>(process_handle);
@@ -1372,6 +1473,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1372/// Exits the current process 1473/// Exits the current process
1373static void ExitProcess(Core::System& system) { 1474static void ExitProcess(Core::System& system) {
1374 auto* current_process = system.Kernel().CurrentProcess(); 1475 auto* current_process = system.Kernel().CurrentProcess();
1476 UNIMPLEMENTED();
1375 1477
1376 LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); 1478 LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
1377 ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, 1479 ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
@@ -1381,8 +1483,10 @@ static void ExitProcess(Core::System& system) {
1381 1483
1382 // Kill the current thread 1484 // Kill the current thread
1383 system.CurrentScheduler().GetCurrentThread()->Stop(); 1485 system.CurrentScheduler().GetCurrentThread()->Stop();
1486}
1384 1487
1385 system.PrepareReschedule(); 1488static void ExitProcess32(Core::System& system) {
1489 ExitProcess(system);
1386} 1490}
1387 1491
1388/// Creates a new thread 1492/// Creates a new thread
@@ -1428,9 +1532,10 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1428 1532
1429 ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1)); 1533 ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1));
1430 1534
1535 ThreadType type = THREADTYPE_USER;
1431 CASCADE_RESULT(std::shared_ptr<Thread> thread, 1536 CASCADE_RESULT(std::shared_ptr<Thread> thread,
1432 Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, 1537 Thread::Create(system, type, "", entry_point, priority, arg, processor_id,
1433 *current_process)); 1538 stack_top, current_process));
1434 1539
1435 const auto new_thread_handle = current_process->GetHandleTable().Create(thread); 1540 const auto new_thread_handle = current_process->GetHandleTable().Create(thread);
1436 if (new_thread_handle.Failed()) { 1541 if (new_thread_handle.Failed()) {
@@ -1444,11 +1549,15 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1444 thread->SetName( 1549 thread->SetName(
1445 fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); 1550 fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
1446 1551
1447 system.PrepareReschedule(thread->GetProcessorID());
1448
1449 return RESULT_SUCCESS; 1552 return RESULT_SUCCESS;
1450} 1553}
1451 1554
1555static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority,
1556 u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) {
1557 return CreateThread(system, out_handle, static_cast<VAddr>(entry_point), static_cast<u64>(arg),
1558 static_cast<VAddr>(stack_top), priority, processor_id);
1559}
1560
1452/// Starts the thread for the provided handle 1561/// Starts the thread for the provided handle
1453static ResultCode StartThread(Core::System& system, Handle thread_handle) { 1562static ResultCode StartThread(Core::System& system, Handle thread_handle) {
1454 LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle); 1563 LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
@@ -1463,13 +1572,11 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
1463 1572
1464 ASSERT(thread->GetStatus() == ThreadStatus::Dormant); 1573 ASSERT(thread->GetStatus() == ThreadStatus::Dormant);
1465 1574
1466 thread->ResumeFromWait(); 1575 return thread->Start();
1467 1576}
1468 if (thread->GetStatus() == ThreadStatus::Ready) {
1469 system.PrepareReschedule(thread->GetProcessorID());
1470 }
1471 1577
1472 return RESULT_SUCCESS; 1578static ResultCode StartThread32(Core::System& system, Handle thread_handle) {
1579 return StartThread(system, thread_handle);
1473} 1580}
1474 1581
1475/// Called when a thread exits 1582/// Called when a thread exits
@@ -1477,9 +1584,12 @@ static void ExitThread(Core::System& system) {
1477 LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); 1584 LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
1478 1585
1479 auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); 1586 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
1480 current_thread->Stop();
1481 system.GlobalScheduler().RemoveThread(SharedFrom(current_thread)); 1587 system.GlobalScheduler().RemoveThread(SharedFrom(current_thread));
1482 system.PrepareReschedule(); 1588 current_thread->Stop();
1589}
1590
1591static void ExitThread32(Core::System& system) {
1592 ExitThread(system);
1483} 1593}
1484 1594
1485/// Sleep the current thread 1595/// Sleep the current thread
@@ -1498,15 +1608,21 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
1498 1608
1499 if (nanoseconds <= 0) { 1609 if (nanoseconds <= 0) {
1500 switch (static_cast<SleepType>(nanoseconds)) { 1610 switch (static_cast<SleepType>(nanoseconds)) {
1501 case SleepType::YieldWithoutLoadBalancing: 1611 case SleepType::YieldWithoutLoadBalancing: {
1502 is_redundant = current_thread->YieldSimple(); 1612 auto pair = current_thread->YieldSimple();
1613 is_redundant = pair.second;
1503 break; 1614 break;
1504 case SleepType::YieldWithLoadBalancing: 1615 }
1505 is_redundant = current_thread->YieldAndBalanceLoad(); 1616 case SleepType::YieldWithLoadBalancing: {
1617 auto pair = current_thread->YieldAndBalanceLoad();
1618 is_redundant = pair.second;
1506 break; 1619 break;
1507 case SleepType::YieldAndWaitForLoadBalancing: 1620 }
1508 is_redundant = current_thread->YieldAndWaitForLoadBalancing(); 1621 case SleepType::YieldAndWaitForLoadBalancing: {
1622 auto pair = current_thread->YieldAndWaitForLoadBalancing();
1623 is_redundant = pair.second;
1509 break; 1624 break;
1625 }
1510 default: 1626 default:
1511 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); 1627 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
1512 } 1628 }
@@ -1514,13 +1630,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
1514 current_thread->Sleep(nanoseconds); 1630 current_thread->Sleep(nanoseconds);
1515 } 1631 }
1516 1632
1517 if (is_redundant) { 1633 if (is_redundant && !system.Kernel().IsMulticore()) {
1518 // If it's redundant, the core is pretty much idle. Some games keep idling 1634 system.Kernel().ExitSVCProfile();
1519 // a core while it's doing nothing, we advance timing to avoid costly continuous 1635 system.CoreTiming().AddTicks(1000U);
1520 // calls. 1636 system.GetCpuManager().PreemptSingleCore();
1521 system.CoreTiming().AddTicks(2000); 1637 system.Kernel().EnterSVCProfile();
1522 } 1638 }
1523 system.PrepareReschedule(current_thread->GetProcessorID()); 1639}
1640
1641static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) {
1642 const s64 nanoseconds = static_cast<s64>(static_cast<u64>(nanoseconds_low) |
1643 (static_cast<u64>(nanoseconds_high) << 32));
1644 SleepThread(system, nanoseconds);
1524} 1645}
1525 1646
1526/// Wait process wide key atomic 1647/// Wait process wide key atomic
@@ -1547,31 +1668,69 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
1547 } 1668 }
1548 1669
1549 ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); 1670 ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
1550 1671 auto& kernel = system.Kernel();
1672 Handle event_handle;
1673 Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
1551 auto* const current_process = system.Kernel().CurrentProcess(); 1674 auto* const current_process = system.Kernel().CurrentProcess();
1552 const auto& handle_table = current_process->GetHandleTable(); 1675 {
1553 std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1676 SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
1554 ASSERT(thread); 1677 const auto& handle_table = current_process->GetHandleTable();
1678 std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1679 ASSERT(thread);
1680
1681 current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
1682
1683 if (thread->IsPendingTermination()) {
1684 lock.CancelSleep();
1685 return ERR_THREAD_TERMINATING;
1686 }
1687
1688 const auto release_result = current_process->GetMutex().Release(mutex_addr);
1689 if (release_result.IsError()) {
1690 lock.CancelSleep();
1691 return release_result;
1692 }
1693
1694 if (nano_seconds == 0) {
1695 lock.CancelSleep();
1696 return RESULT_TIMEOUT;
1697 }
1555 1698
1556 const auto release_result = current_process->GetMutex().Release(mutex_addr); 1699 current_thread->SetCondVarWaitAddress(condition_variable_addr);
1557 if (release_result.IsError()) { 1700 current_thread->SetMutexWaitAddress(mutex_addr);
1558 return release_result; 1701 current_thread->SetWaitHandle(thread_handle);
1702 current_thread->SetStatus(ThreadStatus::WaitCondVar);
1703 current_process->InsertConditionVariableThread(SharedFrom(current_thread));
1559 } 1704 }
1560 1705
1561 Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); 1706 if (event_handle != InvalidHandle) {
1562 current_thread->SetCondVarWaitAddress(condition_variable_addr); 1707 auto& time_manager = kernel.TimeManager();
1563 current_thread->SetMutexWaitAddress(mutex_addr); 1708 time_manager.UnscheduleTimeEvent(event_handle);
1564 current_thread->SetWaitHandle(thread_handle); 1709 }
1565 current_thread->SetStatus(ThreadStatus::WaitCondVar); 1710
1566 current_thread->InvalidateWakeupCallback(); 1711 {
1567 current_process->InsertConditionVariableThread(SharedFrom(current_thread)); 1712 SchedulerLock lock(kernel);
1568 1713
1569 current_thread->WakeAfterDelay(nano_seconds); 1714 auto* owner = current_thread->GetLockOwner();
1715 if (owner != nullptr) {
1716 owner->RemoveMutexWaiter(SharedFrom(current_thread));
1717 }
1570 1718
1719 current_process->RemoveConditionVariableThread(SharedFrom(current_thread));
1720 }
1571 // Note: Deliberately don't attempt to inherit the lock owner's priority. 1721 // Note: Deliberately don't attempt to inherit the lock owner's priority.
1572 1722
1573 system.PrepareReschedule(current_thread->GetProcessorID()); 1723 return current_thread->GetSignalingResult();
1574 return RESULT_SUCCESS; 1724}
1725
1726static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr,
1727 u32 condition_variable_addr, Handle thread_handle,
1728 u32 nanoseconds_low, u32 nanoseconds_high) {
1729 const s64 nanoseconds =
1730 static_cast<s64>(nanoseconds_low | (static_cast<u64>(nanoseconds_high) << 32));
1731 return WaitProcessWideKeyAtomic(system, static_cast<VAddr>(mutex_addr),
1732 static_cast<VAddr>(condition_variable_addr), thread_handle,
1733 nanoseconds);
1575} 1734}
1576 1735
1577/// Signal process wide key 1736/// Signal process wide key
@@ -1582,7 +1741,9 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1582 ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); 1741 ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
1583 1742
1584 // Retrieve a list of all threads that are waiting for this condition variable. 1743 // Retrieve a list of all threads that are waiting for this condition variable.
1585 auto* const current_process = system.Kernel().CurrentProcess(); 1744 auto& kernel = system.Kernel();
1745 SchedulerLock lock(kernel);
1746 auto* const current_process = kernel.CurrentProcess();
1586 std::vector<std::shared_ptr<Thread>> waiting_threads = 1747 std::vector<std::shared_ptr<Thread>> waiting_threads =
1587 current_process->GetConditionVariableThreads(condition_variable_addr); 1748 current_process->GetConditionVariableThreads(condition_variable_addr);
1588 1749
@@ -1591,7 +1752,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1591 std::size_t last = waiting_threads.size(); 1752 std::size_t last = waiting_threads.size();
1592 if (target > 0) 1753 if (target > 0)
1593 last = std::min(waiting_threads.size(), static_cast<std::size_t>(target)); 1754 last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
1594 1755 auto& time_manager = kernel.TimeManager();
1595 for (std::size_t index = 0; index < last; ++index) { 1756 for (std::size_t index = 0; index < last; ++index) {
1596 auto& thread = waiting_threads[index]; 1757 auto& thread = waiting_threads[index];
1597 1758
@@ -1599,7 +1760,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1599 1760
1600 // liberate Cond Var Thread. 1761 // liberate Cond Var Thread.
1601 current_process->RemoveConditionVariableThread(thread); 1762 current_process->RemoveConditionVariableThread(thread);
1602 thread->SetCondVarWaitAddress(0);
1603 1763
1604 const std::size_t current_core = system.CurrentCoreIndex(); 1764 const std::size_t current_core = system.CurrentCoreIndex();
1605 auto& monitor = system.Monitor(); 1765 auto& monitor = system.Monitor();
@@ -1610,10 +1770,8 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1610 u32 update_val = 0; 1770 u32 update_val = 0;
1611 const VAddr mutex_address = thread->GetMutexWaitAddress(); 1771 const VAddr mutex_address = thread->GetMutexWaitAddress();
1612 do { 1772 do {
1613 monitor.SetExclusive(current_core, mutex_address);
1614
1615 // If the mutex is not yet acquired, acquire it. 1773 // If the mutex is not yet acquired, acquire it.
1616 mutex_val = memory.Read32(mutex_address); 1774 mutex_val = monitor.ExclusiveRead32(current_core, mutex_address);
1617 1775
1618 if (mutex_val != 0) { 1776 if (mutex_val != 0) {
1619 update_val = mutex_val | Mutex::MutexHasWaitersFlag; 1777 update_val = mutex_val | Mutex::MutexHasWaitersFlag;
@@ -1621,33 +1779,28 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1621 update_val = thread->GetWaitHandle(); 1779 update_val = thread->GetWaitHandle();
1622 } 1780 }
1623 } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val)); 1781 } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
1782 monitor.ClearExclusive();
1624 if (mutex_val == 0) { 1783 if (mutex_val == 0) {
1625 // We were able to acquire the mutex, resume this thread. 1784 // We were able to acquire the mutex, resume this thread.
1626 ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
1627 thread->ResumeFromWait();
1628
1629 auto* const lock_owner = thread->GetLockOwner(); 1785 auto* const lock_owner = thread->GetLockOwner();
1630 if (lock_owner != nullptr) { 1786 if (lock_owner != nullptr) {
1631 lock_owner->RemoveMutexWaiter(thread); 1787 lock_owner->RemoveMutexWaiter(thread);
1632 } 1788 }
1633 1789
1634 thread->SetLockOwner(nullptr); 1790 thread->SetLockOwner(nullptr);
1635 thread->SetMutexWaitAddress(0); 1791 thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
1636 thread->SetWaitHandle(0); 1792 thread->ResumeFromWait();
1637 thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
1638 system.PrepareReschedule(thread->GetProcessorID());
1639 } else { 1793 } else {
1640 // The mutex is already owned by some other thread, make this thread wait on it. 1794 // The mutex is already owned by some other thread, make this thread wait on it.
1641 const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); 1795 const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
1642 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1796 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1643 auto owner = handle_table.Get<Thread>(owner_handle); 1797 auto owner = handle_table.Get<Thread>(owner_handle);
1644 ASSERT(owner); 1798 ASSERT(owner);
1645 ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); 1799 if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
1646 thread->InvalidateWakeupCallback(); 1800 thread->SetStatus(ThreadStatus::WaitMutex);
1647 thread->SetStatus(ThreadStatus::WaitMutex); 1801 }
1648 1802
1649 owner->AddMutexWaiter(thread); 1803 owner->AddMutexWaiter(thread);
1650 system.PrepareReschedule(thread->GetProcessorID());
1651 } 1804 }
1652 } 1805 }
1653} 1806}
@@ -1678,12 +1831,15 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
1678 auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); 1831 auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
1679 const ResultCode result = 1832 const ResultCode result =
1680 address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); 1833 address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
1681 if (result == RESULT_SUCCESS) {
1682 system.PrepareReschedule();
1683 }
1684 return result; 1834 return result;
1685} 1835}
1686 1836
1837static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value,
1838 u32 timeout_low, u32 timeout_high) {
1839 s64 timeout = static_cast<s64>(timeout_low | (static_cast<u64>(timeout_high) << 32));
1840 return WaitForAddress(system, static_cast<VAddr>(address), type, value, timeout);
1841}
1842
1687// Signals to an address (via Address Arbiter) 1843// Signals to an address (via Address Arbiter)
1688static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, 1844static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
1689 s32 num_to_wake) { 1845 s32 num_to_wake) {
@@ -1707,6 +1863,11 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
1707 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake); 1863 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
1708} 1864}
1709 1865
1866static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value,
1867 s32 num_to_wake) {
1868 return SignalToAddress(system, static_cast<VAddr>(address), type, value, num_to_wake);
1869}
1870
1710static void KernelDebug([[maybe_unused]] Core::System& system, 1871static void KernelDebug([[maybe_unused]] Core::System& system,
1711 [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1, 1872 [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
1712 [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) { 1873 [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
@@ -1725,14 +1886,21 @@ static u64 GetSystemTick(Core::System& system) {
1725 auto& core_timing = system.CoreTiming(); 1886 auto& core_timing = system.CoreTiming();
1726 1887
1727 // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) 1888 // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
1728 const u64 result{Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks())}; 1889 const u64 result{system.CoreTiming().GetClockTicks()};
1729 1890
1730 // Advance time to defeat dumb games that busy-wait for the frame to end. 1891 if (!system.Kernel().IsMulticore()) {
1731 core_timing.AddTicks(400); 1892 core_timing.AddTicks(400U);
1893 }
1732 1894
1733 return result; 1895 return result;
1734} 1896}
1735 1897
1898static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) {
1899 u64 time = GetSystemTick(system);
1900 *time_low = static_cast<u32>(time);
1901 *time_high = static_cast<u32>(time >> 32);
1902}
1903
1736/// Close a handle 1904/// Close a handle
1737static ResultCode CloseHandle(Core::System& system, Handle handle) { 1905static ResultCode CloseHandle(Core::System& system, Handle handle) {
1738 LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle); 1906 LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle);
@@ -1765,9 +1933,14 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
1765 return ERR_INVALID_HANDLE; 1933 return ERR_INVALID_HANDLE;
1766} 1934}
1767 1935
1936static ResultCode ResetSignal32(Core::System& system, Handle handle) {
1937 return ResetSignal(system, handle);
1938}
1939
1768/// Creates a TransferMemory object 1940/// Creates a TransferMemory object
1769static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size, 1941static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,
1770 u32 permissions) { 1942 u32 permissions) {
1943 std::lock_guard lock{HLE::g_hle_lock};
1771 LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size, 1944 LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
1772 permissions); 1945 permissions);
1773 1946
@@ -1812,6 +1985,12 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1812 return RESULT_SUCCESS; 1985 return RESULT_SUCCESS;
1813} 1986}
1814 1987
1988static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size,
1989 u32 permissions) {
1990 return CreateTransferMemory(system, handle, static_cast<VAddr>(addr),
1991 static_cast<std::size_t>(size), permissions);
1992}
1993
1815static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core, 1994static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,
1816 u64* mask) { 1995 u64* mask) {
1817 LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); 1996 LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
@@ -1821,6 +2000,8 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
1821 if (!thread) { 2000 if (!thread) {
1822 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", 2001 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
1823 thread_handle); 2002 thread_handle);
2003 *core = 0;
2004 *mask = 0;
1824 return ERR_INVALID_HANDLE; 2005 return ERR_INVALID_HANDLE;
1825 } 2006 }
1826 2007
@@ -1830,6 +2011,15 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
1830 return RESULT_SUCCESS; 2011 return RESULT_SUCCESS;
1831} 2012}
1832 2013
2014static ResultCode GetThreadCoreMask32(Core::System& system, Handle thread_handle, u32* core,
2015 u32* mask_low, u32* mask_high) {
2016 u64 mask{};
2017 const auto result = GetThreadCoreMask(system, thread_handle, core, &mask);
2018 *mask_high = static_cast<u32>(mask >> 32);
2019 *mask_low = static_cast<u32>(mask);
2020 return result;
2021}
2022
1833static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core, 2023static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
1834 u64 affinity_mask) { 2024 u64 affinity_mask) {
1835 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}", 2025 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
@@ -1861,7 +2051,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
1861 return ERR_INVALID_COMBINATION; 2051 return ERR_INVALID_COMBINATION;
1862 } 2052 }
1863 2053
1864 if (core < Core::NUM_CPU_CORES) { 2054 if (core < Core::Hardware::NUM_CPU_CORES) {
1865 if ((affinity_mask & (1ULL << core)) == 0) { 2055 if ((affinity_mask & (1ULL << core)) == 0) {
1866 LOG_ERROR(Kernel_SVC, 2056 LOG_ERROR(Kernel_SVC,
1867 "Core is not enabled for the current mask, core={}, mask={:016X}", core, 2057 "Core is not enabled for the current mask, core={}, mask={:016X}", core,
@@ -1883,11 +2073,14 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
1883 return ERR_INVALID_HANDLE; 2073 return ERR_INVALID_HANDLE;
1884 } 2074 }
1885 2075
1886 system.PrepareReschedule(thread->GetProcessorID()); 2076 return thread->SetCoreAndAffinityMask(core, affinity_mask);
1887 thread->ChangeCore(core, affinity_mask); 2077}
1888 system.PrepareReschedule(thread->GetProcessorID());
1889 2078
1890 return RESULT_SUCCESS; 2079static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core,
2080 u32 affinity_mask_low, u32 affinity_mask_high) {
2081 const u64 affinity_mask =
2082 static_cast<u64>(affinity_mask_low) | (static_cast<u64>(affinity_mask_high) << 32);
2083 return SetThreadCoreMask(system, thread_handle, core, affinity_mask);
1891} 2084}
1892 2085
1893static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) { 2086static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) {
@@ -1918,6 +2111,10 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
1918 return RESULT_SUCCESS; 2111 return RESULT_SUCCESS;
1919} 2112}
1920 2113
2114static ResultCode CreateEvent32(Core::System& system, Handle* write_handle, Handle* read_handle) {
2115 return CreateEvent(system, write_handle, read_handle);
2116}
2117
1921static ResultCode ClearEvent(Core::System& system, Handle handle) { 2118static ResultCode ClearEvent(Core::System& system, Handle handle) {
1922 LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle); 2119 LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle);
1923 2120
@@ -1939,6 +2136,10 @@ static ResultCode ClearEvent(Core::System& system, Handle handle) {
1939 return ERR_INVALID_HANDLE; 2136 return ERR_INVALID_HANDLE;
1940} 2137}
1941 2138
2139static ResultCode ClearEvent32(Core::System& system, Handle handle) {
2140 return ClearEvent(system, handle);
2141}
2142
1942static ResultCode SignalEvent(Core::System& system, Handle handle) { 2143static ResultCode SignalEvent(Core::System& system, Handle handle) {
1943 LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle); 2144 LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle);
1944 2145
@@ -1951,10 +2152,13 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
1951 } 2152 }
1952 2153
1953 writable_event->Signal(); 2154 writable_event->Signal();
1954 system.PrepareReschedule();
1955 return RESULT_SUCCESS; 2155 return RESULT_SUCCESS;
1956} 2156}
1957 2157
2158static ResultCode SignalEvent32(Core::System& system, Handle handle) {
2159 return SignalEvent(system, handle);
2160}
2161
1958static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) { 2162static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) {
1959 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type); 2163 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type);
1960 2164
@@ -1982,6 +2186,7 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
1982} 2186}
1983 2187
1984static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) { 2188static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) {
2189 std::lock_guard lock{HLE::g_hle_lock};
1985 LOG_DEBUG(Kernel_SVC, "called"); 2190 LOG_DEBUG(Kernel_SVC, "called");
1986 2191
1987 auto& kernel = system.Kernel(); 2192 auto& kernel = system.Kernel();
@@ -2139,6 +2344,15 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
2139 return RESULT_SUCCESS; 2344 return RESULT_SUCCESS;
2140} 2345}
2141 2346
2347static ResultCode FlushProcessDataCache32(Core::System& system, Handle handle, u32 address,
2348 u32 size) {
2349 // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a nope
2350 // as all emulation is done in the same cache level in host architecture, thus data cache
2351 // does not need flushing.
2352 LOG_DEBUG(Kernel_SVC, "called");
2353 return RESULT_SUCCESS;
2354}
2355
2142namespace { 2356namespace {
2143struct FunctionDef { 2357struct FunctionDef {
2144 using Func = void(Core::System&); 2358 using Func = void(Core::System&);
@@ -2153,57 +2367,57 @@ static const FunctionDef SVC_Table_32[] = {
2153 {0x00, nullptr, "Unknown"}, 2367 {0x00, nullptr, "Unknown"},
2154 {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"}, 2368 {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"},
2155 {0x02, nullptr, "Unknown"}, 2369 {0x02, nullptr, "Unknown"},
2156 {0x03, nullptr, "SetMemoryAttribute32"}, 2370 {0x03, SvcWrap32<SetMemoryAttribute32>, "SetMemoryAttribute32"},
2157 {0x04, nullptr, "MapMemory32"}, 2371 {0x04, SvcWrap32<MapMemory32>, "MapMemory32"},
2158 {0x05, nullptr, "UnmapMemory32"}, 2372 {0x05, SvcWrap32<UnmapMemory32>, "UnmapMemory32"},
2159 {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"}, 2373 {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"},
2160 {0x07, nullptr, "ExitProcess32"}, 2374 {0x07, SvcWrap32<ExitProcess32>, "ExitProcess32"},
2161 {0x08, nullptr, "CreateThread32"}, 2375 {0x08, SvcWrap32<CreateThread32>, "CreateThread32"},
2162 {0x09, nullptr, "StartThread32"}, 2376 {0x09, SvcWrap32<StartThread32>, "StartThread32"},
2163 {0x0a, nullptr, "ExitThread32"}, 2377 {0x0a, SvcWrap32<ExitThread32>, "ExitThread32"},
2164 {0x0b, nullptr, "SleepThread32"}, 2378 {0x0b, SvcWrap32<SleepThread32>, "SleepThread32"},
2165 {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"}, 2379 {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"},
2166 {0x0d, nullptr, "SetThreadPriority32"}, 2380 {0x0d, SvcWrap32<SetThreadPriority32>, "SetThreadPriority32"},
2167 {0x0e, nullptr, "GetThreadCoreMask32"}, 2381 {0x0e, SvcWrap32<GetThreadCoreMask32>, "GetThreadCoreMask32"},
2168 {0x0f, nullptr, "SetThreadCoreMask32"}, 2382 {0x0f, SvcWrap32<SetThreadCoreMask32>, "SetThreadCoreMask32"},
2169 {0x10, nullptr, "GetCurrentProcessorNumber32"}, 2383 {0x10, SvcWrap32<GetCurrentProcessorNumber32>, "GetCurrentProcessorNumber32"},
2170 {0x11, nullptr, "SignalEvent32"}, 2384 {0x11, SvcWrap32<SignalEvent32>, "SignalEvent32"},
2171 {0x12, nullptr, "ClearEvent32"}, 2385 {0x12, SvcWrap32<ClearEvent32>, "ClearEvent32"},
2172 {0x13, nullptr, "MapSharedMemory32"}, 2386 {0x13, SvcWrap32<MapSharedMemory32>, "MapSharedMemory32"},
2173 {0x14, nullptr, "UnmapSharedMemory32"}, 2387 {0x14, nullptr, "UnmapSharedMemory32"},
2174 {0x15, nullptr, "CreateTransferMemory32"}, 2388 {0x15, SvcWrap32<CreateTransferMemory32>, "CreateTransferMemory32"},
2175 {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"}, 2389 {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"},
2176 {0x17, nullptr, "ResetSignal32"}, 2390 {0x17, SvcWrap32<ResetSignal32>, "ResetSignal32"},
2177 {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"}, 2391 {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"},
2178 {0x19, nullptr, "CancelSynchronization32"}, 2392 {0x19, SvcWrap32<CancelSynchronization32>, "CancelSynchronization32"},
2179 {0x1a, nullptr, "ArbitrateLock32"}, 2393 {0x1a, SvcWrap32<ArbitrateLock32>, "ArbitrateLock32"},
2180 {0x1b, nullptr, "ArbitrateUnlock32"}, 2394 {0x1b, SvcWrap32<ArbitrateUnlock32>, "ArbitrateUnlock32"},
2181 {0x1c, nullptr, "WaitProcessWideKeyAtomic32"}, 2395 {0x1c, SvcWrap32<WaitProcessWideKeyAtomic32>, "WaitProcessWideKeyAtomic32"},
2182 {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"}, 2396 {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"},
2183 {0x1e, nullptr, "GetSystemTick32"}, 2397 {0x1e, SvcWrap32<GetSystemTick32>, "GetSystemTick32"},
2184 {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"}, 2398 {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"},
2185 {0x20, nullptr, "Unknown"}, 2399 {0x20, nullptr, "Unknown"},
2186 {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"}, 2400 {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"},
2187 {0x22, nullptr, "SendSyncRequestWithUserBuffer32"}, 2401 {0x22, nullptr, "SendSyncRequestWithUserBuffer32"},
2188 {0x23, nullptr, "Unknown"}, 2402 {0x23, nullptr, "Unknown"},
2189 {0x24, nullptr, "GetProcessId32"}, 2403 {0x24, SvcWrap32<GetProcessId32>, "GetProcessId32"},
2190 {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"}, 2404 {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"},
2191 {0x26, nullptr, "Break32"}, 2405 {0x26, SvcWrap32<Break32>, "Break32"},
2192 {0x27, nullptr, "OutputDebugString32"}, 2406 {0x27, nullptr, "OutputDebugString32"},
2193 {0x28, nullptr, "Unknown"}, 2407 {0x28, nullptr, "Unknown"},
2194 {0x29, SvcWrap32<GetInfo32>, "GetInfo32"}, 2408 {0x29, SvcWrap32<GetInfo32>, "GetInfo32"},
2195 {0x2a, nullptr, "Unknown"}, 2409 {0x2a, nullptr, "Unknown"},
2196 {0x2b, nullptr, "Unknown"}, 2410 {0x2b, nullptr, "Unknown"},
2197 {0x2c, nullptr, "MapPhysicalMemory32"}, 2411 {0x2c, SvcWrap32<MapPhysicalMemory32>, "MapPhysicalMemory32"},
2198 {0x2d, nullptr, "UnmapPhysicalMemory32"}, 2412 {0x2d, SvcWrap32<UnmapPhysicalMemory32>, "UnmapPhysicalMemory32"},
2199 {0x2e, nullptr, "Unknown"}, 2413 {0x2e, nullptr, "Unknown"},
2200 {0x2f, nullptr, "Unknown"}, 2414 {0x2f, nullptr, "Unknown"},
2201 {0x30, nullptr, "Unknown"}, 2415 {0x30, nullptr, "Unknown"},
2202 {0x31, nullptr, "Unknown"}, 2416 {0x31, nullptr, "Unknown"},
2203 {0x32, nullptr, "SetThreadActivity32"}, 2417 {0x32, SvcWrap32<SetThreadActivity32>, "SetThreadActivity32"},
2204 {0x33, nullptr, "GetThreadContext32"}, 2418 {0x33, SvcWrap32<GetThreadContext32>, "GetThreadContext32"},
2205 {0x34, nullptr, "WaitForAddress32"}, 2419 {0x34, SvcWrap32<WaitForAddress32>, "WaitForAddress32"},
2206 {0x35, nullptr, "SignalToAddress32"}, 2420 {0x35, SvcWrap32<SignalToAddress32>, "SignalToAddress32"},
2207 {0x36, nullptr, "Unknown"}, 2421 {0x36, nullptr, "Unknown"},
2208 {0x37, nullptr, "Unknown"}, 2422 {0x37, nullptr, "Unknown"},
2209 {0x38, nullptr, "Unknown"}, 2423 {0x38, nullptr, "Unknown"},
@@ -2219,7 +2433,7 @@ static const FunctionDef SVC_Table_32[] = {
2219 {0x42, nullptr, "Unknown"}, 2433 {0x42, nullptr, "Unknown"},
2220 {0x43, nullptr, "ReplyAndReceive32"}, 2434 {0x43, nullptr, "ReplyAndReceive32"},
2221 {0x44, nullptr, "Unknown"}, 2435 {0x44, nullptr, "Unknown"},
2222 {0x45, nullptr, "CreateEvent32"}, 2436 {0x45, SvcWrap32<CreateEvent32>, "CreateEvent32"},
2223 {0x46, nullptr, "Unknown"}, 2437 {0x46, nullptr, "Unknown"},
2224 {0x47, nullptr, "Unknown"}, 2438 {0x47, nullptr, "Unknown"},
2225 {0x48, nullptr, "Unknown"}, 2439 {0x48, nullptr, "Unknown"},
@@ -2245,7 +2459,7 @@ static const FunctionDef SVC_Table_32[] = {
2245 {0x5c, nullptr, "Unknown"}, 2459 {0x5c, nullptr, "Unknown"},
2246 {0x5d, nullptr, "Unknown"}, 2460 {0x5d, nullptr, "Unknown"},
2247 {0x5e, nullptr, "Unknown"}, 2461 {0x5e, nullptr, "Unknown"},
2248 {0x5F, nullptr, "FlushProcessDataCache32"}, 2462 {0x5F, SvcWrap32<FlushProcessDataCache32>, "FlushProcessDataCache32"},
2249 {0x60, nullptr, "Unknown"}, 2463 {0x60, nullptr, "Unknown"},
2250 {0x61, nullptr, "Unknown"}, 2464 {0x61, nullptr, "Unknown"},
2251 {0x62, nullptr, "Unknown"}, 2465 {0x62, nullptr, "Unknown"},
@@ -2423,13 +2637,10 @@ static const FunctionDef* GetSVCInfo64(u32 func_num) {
2423 return &SVC_Table_64[func_num]; 2637 return &SVC_Table_64[func_num];
2424} 2638}
2425 2639
2426MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
2427
2428void Call(Core::System& system, u32 immediate) { 2640void Call(Core::System& system, u32 immediate) {
2429 MICROPROFILE_SCOPE(Kernel_SVC); 2641 system.ExitDynarmicProfile();
2430 2642 auto& kernel = system.Kernel();
2431 // Lock the global kernel mutex when we enter the kernel HLE. 2643 kernel.EnterSVCProfile();
2432 std::lock_guard lock{HLE::g_hle_lock};
2433 2644
2434 const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) 2645 const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
2435 : GetSVCInfo32(immediate); 2646 : GetSVCInfo32(immediate);
@@ -2442,6 +2653,9 @@ void Call(Core::System& system, u32 immediate) {
2442 } else { 2653 } else {
2443 LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate); 2654 LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate);
2444 } 2655 }
2656
2657 kernel.ExitSVCProfile();
2658 system.EnterDynarmicProfile();
2445} 2659}
2446 2660
2447} // namespace Kernel::Svc 2661} // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 7d735e3fa..0b6dd9df0 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -350,13 +350,50 @@ void SvcWrap64(Core::System& system) {
350 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); 350 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
351} 351}
352 352
353// Used by QueryMemory32 353// Used by QueryMemory32, ArbitrateLock32
354template <ResultCode func(Core::System&, u32, u32, u32)> 354template <ResultCode func(Core::System&, u32, u32, u32)>
355void SvcWrap32(Core::System& system) { 355void SvcWrap32(Core::System& system) {
356 FuncReturn32(system, 356 FuncReturn32(system,
357 func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw); 357 func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw);
358} 358}
359 359
360// Used by Break32
361template <void func(Core::System&, u32, u32, u32)>
362void SvcWrap32(Core::System& system) {
363 func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2));
364}
365
366// Used by ExitProcess32, ExitThread32
367template <void func(Core::System&)>
368void SvcWrap32(Core::System& system) {
369 func(system);
370}
371
372// Used by GetCurrentProcessorNumber32
373template <u32 func(Core::System&)>
374void SvcWrap32(Core::System& system) {
375 FuncReturn32(system, func(system));
376}
377
378// Used by SleepThread32
379template <void func(Core::System&, u32, u32)>
380void SvcWrap32(Core::System& system) {
381 func(system, Param32(system, 0), Param32(system, 1));
382}
383
384// Used by CreateThread32
385template <ResultCode func(Core::System&, Handle*, u32, u32, u32, u32, s32)>
386void SvcWrap32(Core::System& system) {
387 Handle param_1 = 0;
388
389 const u32 retval = func(system, &param_1, Param32(system, 0), Param32(system, 1),
390 Param32(system, 2), Param32(system, 3), Param32(system, 4))
391 .raw;
392
393 system.CurrentArmInterface().SetReg(1, param_1);
394 FuncReturn(system, retval);
395}
396
360// Used by GetInfo32 397// Used by GetInfo32
361template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)> 398template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)>
362void SvcWrap32(Core::System& system) { 399void SvcWrap32(Core::System& system) {
@@ -393,18 +430,114 @@ void SvcWrap32(Core::System& system) {
393 FuncReturn(system, retval); 430 FuncReturn(system, retval);
394} 431}
395 432
433// Used by GetSystemTick32
434template <void func(Core::System&, u32*, u32*)>
435void SvcWrap32(Core::System& system) {
436 u32 param_1 = 0;
437 u32 param_2 = 0;
438
439 func(system, &param_1, &param_2);
440 system.CurrentArmInterface().SetReg(0, param_1);
441 system.CurrentArmInterface().SetReg(1, param_2);
442}
443
444// Used by CreateEvent32
445template <ResultCode func(Core::System&, Handle*, Handle*)>
446void SvcWrap32(Core::System& system) {
447 Handle param_1 = 0;
448 Handle param_2 = 0;
449
450 const u32 retval = func(system, &param_1, &param_2).raw;
451 system.CurrentArmInterface().SetReg(1, param_1);
452 system.CurrentArmInterface().SetReg(2, param_2);
453 FuncReturn(system, retval);
454}
455
456// Used by GetThreadId32
457template <ResultCode func(Core::System&, Handle, u32*, u32*, u32*)>
458void SvcWrap32(Core::System& system) {
459 u32 param_1 = 0;
460 u32 param_2 = 0;
461 u32 param_3 = 0;
462
463 const u32 retval = func(system, Param32(system, 2), &param_1, &param_2, &param_3).raw;
464 system.CurrentArmInterface().SetReg(1, param_1);
465 system.CurrentArmInterface().SetReg(2, param_2);
466 system.CurrentArmInterface().SetReg(3, param_3);
467 FuncReturn(system, retval);
468}
469
396// Used by SignalProcessWideKey32 470// Used by SignalProcessWideKey32
397template <void func(Core::System&, u32, s32)> 471template <void func(Core::System&, u32, s32)>
398void SvcWrap32(Core::System& system) { 472void SvcWrap32(Core::System& system) {
399 func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1))); 473 func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1)));
400} 474}
401 475
402// Used by SendSyncRequest32 476// Used by SetThreadPriority32
477template <ResultCode func(Core::System&, Handle, u32)>
478void SvcWrap32(Core::System& system) {
479 const u32 retval =
480 func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw;
481 FuncReturn(system, retval);
482}
483
484// Used by SetThreadCoreMask32
485template <ResultCode func(Core::System&, Handle, u32, u32, u32)>
486void SvcWrap32(Core::System& system) {
487 const u32 retval =
488 func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
489 static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
490 .raw;
491 FuncReturn(system, retval);
492}
493
494// Used by WaitProcessWideKeyAtomic32
495template <ResultCode func(Core::System&, u32, u32, Handle, u32, u32)>
496void SvcWrap32(Core::System& system) {
497 const u32 retval =
498 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
499 static_cast<Handle>(Param(system, 2)), static_cast<u32>(Param(system, 3)),
500 static_cast<u32>(Param(system, 4)))
501 .raw;
502 FuncReturn(system, retval);
503}
504
505// Used by WaitForAddress32
506template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)>
507void SvcWrap32(Core::System& system) {
508 const u32 retval = func(system, static_cast<u32>(Param(system, 0)),
509 static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)),
510 static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4)))
511 .raw;
512 FuncReturn(system, retval);
513}
514
515// Used by SignalToAddress32
516template <ResultCode func(Core::System&, u32, u32, s32, s32)>
517void SvcWrap32(Core::System& system) {
518 const u32 retval =
519 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
520 static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
521 .raw;
522 FuncReturn(system, retval);
523}
524
525// Used by SendSyncRequest32, ArbitrateUnlock32
403template <ResultCode func(Core::System&, u32)> 526template <ResultCode func(Core::System&, u32)>
404void SvcWrap32(Core::System& system) { 527void SvcWrap32(Core::System& system) {
405 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 528 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
406} 529}
407 530
531// Used by CreateTransferMemory32
532template <ResultCode func(Core::System&, Handle*, u32, u32, u32)>
533void SvcWrap32(Core::System& system) {
534 Handle handle = 0;
535 const u32 retval =
536 func(system, &handle, Param32(system, 1), Param32(system, 2), Param32(system, 3)).raw;
537 system.CurrentArmInterface().SetReg(1, handle);
538 FuncReturn(system, retval);
539}
540
408// Used by WaitSynchronization32 541// Used by WaitSynchronization32
409template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)> 542template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>
410void SvcWrap32(Core::System& system) { 543void SvcWrap32(Core::System& system) {
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index dc37fad1a..851b702a5 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -10,78 +10,107 @@
10#include "core/hle/kernel/synchronization.h" 10#include "core/hle/kernel/synchronization.h"
11#include "core/hle/kernel/synchronization_object.h" 11#include "core/hle/kernel/synchronization_object.h"
12#include "core/hle/kernel/thread.h" 12#include "core/hle/kernel/thread.h"
13#include "core/hle/kernel/time_manager.h"
13 14
14namespace Kernel { 15namespace Kernel {
15 16
16/// Default thread wakeup callback for WaitSynchronization
17static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
18 std::shared_ptr<SynchronizationObject> object,
19 std::size_t index) {
20 ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
21
22 if (reason == ThreadWakeupReason::Timeout) {
23 thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
24 return true;
25 }
26
27 ASSERT(reason == ThreadWakeupReason::Signal);
28 thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
29 thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
30 return true;
31}
32
33Synchronization::Synchronization(Core::System& system) : system{system} {} 17Synchronization::Synchronization(Core::System& system) : system{system} {}
34 18
35void Synchronization::SignalObject(SynchronizationObject& obj) const { 19void Synchronization::SignalObject(SynchronizationObject& obj) const {
20 auto& kernel = system.Kernel();
21 SchedulerLock lock(kernel);
22 auto& time_manager = kernel.TimeManager();
36 if (obj.IsSignaled()) { 23 if (obj.IsSignaled()) {
37 obj.WakeupAllWaitingThreads(); 24 for (auto thread : obj.GetWaitingThreads()) {
25 if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
26 if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) {
27 ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
28 ASSERT(thread->IsWaitingSync());
29 }
30 thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
31 thread->ResumeFromWait();
32 }
33 }
34 obj.ClearWaitingThreads();
38 } 35 }
39} 36}
40 37
41std::pair<ResultCode, Handle> Synchronization::WaitFor( 38std::pair<ResultCode, Handle> Synchronization::WaitFor(
42 std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { 39 std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
40 auto& kernel = system.Kernel();
43 auto* const thread = system.CurrentScheduler().GetCurrentThread(); 41 auto* const thread = system.CurrentScheduler().GetCurrentThread();
44 // Find the first object that is acquirable in the provided list of objects 42 Handle event_handle = InvalidHandle;
45 const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(), 43 {
46 [thread](const std::shared_ptr<SynchronizationObject>& object) { 44 SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
47 return object->IsSignaled(); 45 const auto itr =
48 }); 46 std::find_if(sync_objects.begin(), sync_objects.end(),
49 47 [thread](const std::shared_ptr<SynchronizationObject>& object) {
50 if (itr != sync_objects.end()) { 48 return object->IsSignaled();
51 // We found a ready object, acquire it and set the result value 49 });
52 SynchronizationObject* object = itr->get(); 50
53 object->Acquire(thread); 51 if (itr != sync_objects.end()) {
54 const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); 52 // We found a ready object, acquire it and set the result value
55 return {RESULT_SUCCESS, index}; 53 SynchronizationObject* object = itr->get();
54 object->Acquire(thread);
55 const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
56 lock.CancelSleep();
57 return {RESULT_SUCCESS, index};
58 }
59
60 if (nano_seconds == 0) {
61 lock.CancelSleep();
62 return {RESULT_TIMEOUT, InvalidHandle};
63 }
64
65 if (thread->IsPendingTermination()) {
66 lock.CancelSleep();
67 return {ERR_THREAD_TERMINATING, InvalidHandle};
68 }
69
70 if (thread->IsSyncCancelled()) {
71 thread->SetSyncCancelled(false);
72 lock.CancelSleep();
73 return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
74 }
75
76 for (auto& object : sync_objects) {
77 object->AddWaitingThread(SharedFrom(thread));
78 }
79
80 thread->SetSynchronizationObjects(&sync_objects);
81 thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
82 thread->SetStatus(ThreadStatus::WaitSynch);
83 thread->SetWaitingSync(true);
56 } 84 }
85 thread->SetWaitingSync(false);
57 86
58 // No objects were ready to be acquired, prepare to suspend the thread. 87 if (event_handle != InvalidHandle) {
59 88 auto& time_manager = kernel.TimeManager();
60 // If a timeout value of 0 was provided, just return the Timeout error code instead of 89 time_manager.UnscheduleTimeEvent(event_handle);
61 // suspending the thread.
62 if (nano_seconds == 0) {
63 return {RESULT_TIMEOUT, InvalidHandle};
64 } 90 }
65 91
66 if (thread->IsSyncCancelled()) { 92 {
67 thread->SetSyncCancelled(false); 93 SchedulerLock lock(kernel);
68 return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; 94 ResultCode signaling_result = thread->GetSignalingResult();
95 SynchronizationObject* signaling_object = thread->GetSignalingObject();
96 thread->SetSynchronizationObjects(nullptr);
97 auto shared_thread = SharedFrom(thread);
98 for (auto& obj : sync_objects) {
99 obj->RemoveWaitingThread(shared_thread);
100 }
101 if (signaling_object != nullptr) {
102 const auto itr = std::find_if(
103 sync_objects.begin(), sync_objects.end(),
104 [signaling_object](const std::shared_ptr<SynchronizationObject>& object) {
105 return object.get() == signaling_object;
106 });
107 ASSERT(itr != sync_objects.end());
108 signaling_object->Acquire(thread);
109 const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
110 return {signaling_result, index};
111 }
112 return {signaling_result, -1};
69 } 113 }
70
71 for (auto& object : sync_objects) {
72 object->AddWaitingThread(SharedFrom(thread));
73 }
74
75 thread->SetSynchronizationObjects(std::move(sync_objects));
76 thread->SetStatus(ThreadStatus::WaitSynch);
77
78 // Create an event to wake the thread up after the specified nanosecond delay has passed
79 thread->WakeAfterDelay(nano_seconds);
80 thread->SetWakeupCallback(DefaultThreadWakeupCallback);
81
82 system.PrepareReschedule(thread->GetProcessorID());
83
84 return {RESULT_TIMEOUT, InvalidHandle};
85} 114}
86 115
87} // namespace Kernel 116} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
index 43f3eef18..ba4d39157 100644
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -38,68 +38,8 @@ void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread)
38 waiting_threads.erase(itr); 38 waiting_threads.erase(itr);
39} 39}
40 40
41std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const { 41void SynchronizationObject::ClearWaitingThreads() {
42 Thread* candidate = nullptr; 42 waiting_threads.clear();
43 u32 candidate_priority = THREADPRIO_LOWEST + 1;
44
45 for (const auto& thread : waiting_threads) {
46 const ThreadStatus thread_status = thread->GetStatus();
47
48 // The list of waiting threads must not contain threads that are not waiting to be awakened.
49 ASSERT_MSG(thread_status == ThreadStatus::WaitSynch ||
50 thread_status == ThreadStatus::WaitHLEEvent,
51 "Inconsistent thread statuses in waiting_threads");
52
53 if (thread->GetPriority() >= candidate_priority)
54 continue;
55
56 if (ShouldWait(thread.get()))
57 continue;
58
59 candidate = thread.get();
60 candidate_priority = thread->GetPriority();
61 }
62
63 return SharedFrom(candidate);
64}
65
66void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
67 ASSERT(!ShouldWait(thread.get()));
68
69 if (!thread) {
70 return;
71 }
72
73 if (thread->IsSleepingOnWait()) {
74 for (const auto& object : thread->GetSynchronizationObjects()) {
75 ASSERT(!object->ShouldWait(thread.get()));
76 object->Acquire(thread.get());
77 }
78 } else {
79 Acquire(thread.get());
80 }
81
82 const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this));
83
84 thread->ClearSynchronizationObjects();
85
86 thread->CancelWakeupTimer();
87
88 bool resume = true;
89 if (thread->HasWakeupCallback()) {
90 resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Signal, thread, SharedFrom(this),
91 index);
92 }
93 if (resume) {
94 thread->ResumeFromWait();
95 kernel.PrepareReschedule(thread->GetProcessorID());
96 }
97}
98
99void SynchronizationObject::WakeupAllWaitingThreads() {
100 while (auto thread = GetHighestPriorityReadyThread()) {
101 WakeupWaitingThread(thread);
102 }
103} 43}
104 44
105const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const { 45const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
index 741c31faf..f89b24204 100644
--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -12,6 +12,7 @@
12namespace Kernel { 12namespace Kernel {
13 13
14class KernelCore; 14class KernelCore;
15class Synchronization;
15class Thread; 16class Thread;
16 17
17/// Class that represents a Kernel object that a thread can be waiting on 18/// Class that represents a Kernel object that a thread can be waiting on
@@ -49,24 +50,11 @@ public:
49 */ 50 */
50 void RemoveWaitingThread(std::shared_ptr<Thread> thread); 51 void RemoveWaitingThread(std::shared_ptr<Thread> thread);
51 52
52 /**
53 * Wake up all threads waiting on this object that can be awoken, in priority order,
54 * and set the synchronization result and output of the thread.
55 */
56 void WakeupAllWaitingThreads();
57
58 /**
59 * Wakes up a single thread waiting on this object.
60 * @param thread Thread that is waiting on this object to wakeup.
61 */
62 void WakeupWaitingThread(std::shared_ptr<Thread> thread);
63
64 /// Obtains the highest priority thread that is ready to run from this object's waiting list.
65 std::shared_ptr<Thread> GetHighestPriorityReadyThread() const;
66
67 /// Get a const reference to the waiting threads list for debug use 53 /// Get a const reference to the waiting threads list for debug use
68 const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const; 54 const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
69 55
56 void ClearWaitingThreads();
57
70protected: 58protected:
71 bool is_signaled{}; // Tells if this sync object is signalled; 59 bool is_signaled{}; // Tells if this sync object is signalled;
72 60
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index db7f379ac..2b1092697 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -9,12 +9,21 @@
9 9
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/fiber.h"
12#include "common/logging/log.h" 13#include "common/logging/log.h"
13#include "common/thread_queue_list.h" 14#include "common/thread_queue_list.h"
14#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
16#ifdef ARCHITECTURE_x86_64
17#include "core/arm/dynarmic/arm_dynarmic_32.h"
18#include "core/arm/dynarmic/arm_dynarmic_64.h"
19#endif
20#include "core/arm/cpu_interrupt_handler.h"
21#include "core/arm/exclusive_monitor.h"
22#include "core/arm/unicorn/arm_unicorn.h"
15#include "core/core.h" 23#include "core/core.h"
16#include "core/core_timing.h" 24#include "core/core_timing.h"
17#include "core/core_timing_util.h" 25#include "core/core_timing_util.h"
26#include "core/cpu_manager.h"
18#include "core/hardware_properties.h" 27#include "core/hardware_properties.h"
19#include "core/hle/kernel/errors.h" 28#include "core/hle/kernel/errors.h"
20#include "core/hle/kernel/handle_table.h" 29#include "core/hle/kernel/handle_table.h"
@@ -23,6 +32,7 @@
23#include "core/hle/kernel/process.h" 32#include "core/hle/kernel/process.h"
24#include "core/hle/kernel/scheduler.h" 33#include "core/hle/kernel/scheduler.h"
25#include "core/hle/kernel/thread.h" 34#include "core/hle/kernel/thread.h"
35#include "core/hle/kernel/time_manager.h"
26#include "core/hle/result.h" 36#include "core/hle/result.h"
27#include "core/memory.h" 37#include "core/memory.h"
28 38
@@ -44,46 +54,26 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
44Thread::~Thread() = default; 54Thread::~Thread() = default;
45 55
46void Thread::Stop() { 56void Thread::Stop() {
47 // Cancel any outstanding wakeup events for this thread 57 {
48 Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), 58 SchedulerLock lock(kernel);
49 global_handle); 59 SetStatus(ThreadStatus::Dead);
50 kernel.GlobalHandleTable().Close(global_handle); 60 Signal();
51 global_handle = 0; 61 kernel.GlobalHandleTable().Close(global_handle);
52 SetStatus(ThreadStatus::Dead);
53 Signal();
54
55 // Clean up any dangling references in objects that this thread was waiting for
56 for (auto& wait_object : wait_objects) {
57 wait_object->RemoveWaitingThread(SharedFrom(this));
58 }
59 wait_objects.clear();
60
61 owner_process->UnregisterThread(this);
62
63 // Mark the TLS slot in the thread's page as free.
64 owner_process->FreeTLSRegion(tls_address);
65}
66
67void Thread::WakeAfterDelay(s64 nanoseconds) {
68 // Don't schedule a wakeup if the thread wants to wait forever
69 if (nanoseconds == -1)
70 return;
71 62
72 // This function might be called from any thread so we have to be cautious and use the 63 if (owner_process) {
73 // thread-safe version of ScheduleEvent. 64 owner_process->UnregisterThread(this);
74 const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
75 Core::System::GetInstance().CoreTiming().ScheduleEvent(
76 cycles, kernel.ThreadWakeupCallbackEventType(), global_handle);
77}
78 65
79void Thread::CancelWakeupTimer() { 66 // Mark the TLS slot in the thread's page as free.
80 Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), 67 owner_process->FreeTLSRegion(tls_address);
81 global_handle); 68 }
69 arm_interface.reset();
70 has_exited = true;
71 }
72 global_handle = 0;
82} 73}
83 74
84void Thread::ResumeFromWait() { 75void Thread::ResumeFromWait() {
85 ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects"); 76 SchedulerLock lock(kernel);
86
87 switch (status) { 77 switch (status) {
88 case ThreadStatus::Paused: 78 case ThreadStatus::Paused:
89 case ThreadStatus::WaitSynch: 79 case ThreadStatus::WaitSynch:
@@ -99,7 +89,7 @@ void Thread::ResumeFromWait() {
99 case ThreadStatus::Ready: 89 case ThreadStatus::Ready:
100 // The thread's wakeup callback must have already been cleared when the thread was first 90 // The thread's wakeup callback must have already been cleared when the thread was first
101 // awoken. 91 // awoken.
102 ASSERT(wakeup_callback == nullptr); 92 ASSERT(hle_callback == nullptr);
103 // If the thread is waiting on multiple wait objects, it might be awoken more than once 93 // If the thread is waiting on multiple wait objects, it might be awoken more than once
104 // before actually resuming. We can ignore subsequent wakeups if the thread status has 94 // before actually resuming. We can ignore subsequent wakeups if the thread status has
105 // already been set to ThreadStatus::Ready. 95 // already been set to ThreadStatus::Ready.
@@ -115,24 +105,31 @@ void Thread::ResumeFromWait() {
115 return; 105 return;
116 } 106 }
117 107
118 wakeup_callback = nullptr; 108 SetStatus(ThreadStatus::Ready);
109}
110
111void Thread::OnWakeUp() {
112 SchedulerLock lock(kernel);
119 113
120 if (activity == ThreadActivity::Paused) { 114 SetStatus(ThreadStatus::Ready);
121 SetStatus(ThreadStatus::Paused); 115}
122 return;
123 }
124 116
117ResultCode Thread::Start() {
118 SchedulerLock lock(kernel);
125 SetStatus(ThreadStatus::Ready); 119 SetStatus(ThreadStatus::Ready);
120 return RESULT_SUCCESS;
126} 121}
127 122
128void Thread::CancelWait() { 123void Thread::CancelWait() {
129 if (GetSchedulingStatus() != ThreadSchedStatus::Paused) { 124 SchedulerLock lock(kernel);
125 if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {
130 is_sync_cancelled = true; 126 is_sync_cancelled = true;
131 return; 127 return;
132 } 128 }
129 // TODO(Blinkhawk): Implement cancel of server session
133 is_sync_cancelled = false; 130 is_sync_cancelled = false;
134 SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED); 131 SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
135 ResumeFromWait(); 132 SetStatus(ThreadStatus::Ready);
136} 133}
137 134
138static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, 135static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
@@ -153,12 +150,29 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
153 context.fpcr = 0; 150 context.fpcr = 0;
154} 151}
155 152
156ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name, 153std::shared_ptr<Common::Fiber>& Thread::GetHostContext() {
157 VAddr entry_point, u32 priority, u64 arg, 154 return host_context;
158 s32 processor_id, VAddr stack_top, 155}
159 Process& owner_process) { 156
157ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags,
158 std::string name, VAddr entry_point, u32 priority,
159 u64 arg, s32 processor_id, VAddr stack_top,
160 Process* owner_process) {
161 std::function<void(void*)> init_func = system.GetCpuManager().GetGuestThreadStartFunc();
162 void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
163 return Create(system, type_flags, name, entry_point, priority, arg, processor_id, stack_top,
164 owner_process, std::move(init_func), init_func_parameter);
165}
166
167ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags,
168 std::string name, VAddr entry_point, u32 priority,
169 u64 arg, s32 processor_id, VAddr stack_top,
170 Process* owner_process,
171 std::function<void(void*)>&& thread_start_func,
172 void* thread_start_parameter) {
173 auto& kernel = system.Kernel();
160 // Check if priority is in ranged. Lowest priority -> highest priority id. 174 // Check if priority is in ranged. Lowest priority -> highest priority id.
161 if (priority > THREADPRIO_LOWEST) { 175 if (priority > THREADPRIO_LOWEST && ((type_flags & THREADTYPE_IDLE) == 0)) {
162 LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority); 176 LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);
163 return ERR_INVALID_THREAD_PRIORITY; 177 return ERR_INVALID_THREAD_PRIORITY;
164 } 178 }
@@ -168,11 +182,12 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
168 return ERR_INVALID_PROCESSOR_ID; 182 return ERR_INVALID_PROCESSOR_ID;
169 } 183 }
170 184
171 auto& system = Core::System::GetInstance(); 185 if (owner_process) {
172 if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) { 186 if (!system.Memory().IsValidVirtualAddress(*owner_process, entry_point)) {
173 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 187 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
174 // TODO (bunnei): Find the correct error code to use here 188 // TODO (bunnei): Find the correct error code to use here
175 return RESULT_UNKNOWN; 189 return RESULT_UNKNOWN;
190 }
176 } 191 }
177 192
178 std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel); 193 std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel);
@@ -183,51 +198,82 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
183 thread->stack_top = stack_top; 198 thread->stack_top = stack_top;
184 thread->tpidr_el0 = 0; 199 thread->tpidr_el0 = 0;
185 thread->nominal_priority = thread->current_priority = priority; 200 thread->nominal_priority = thread->current_priority = priority;
186 thread->last_running_ticks = system.CoreTiming().GetTicks(); 201 thread->last_running_ticks = 0;
187 thread->processor_id = processor_id; 202 thread->processor_id = processor_id;
188 thread->ideal_core = processor_id; 203 thread->ideal_core = processor_id;
189 thread->affinity_mask = 1ULL << processor_id; 204 thread->affinity_mask = 1ULL << processor_id;
190 thread->wait_objects.clear(); 205 thread->wait_objects = nullptr;
191 thread->mutex_wait_address = 0; 206 thread->mutex_wait_address = 0;
192 thread->condvar_wait_address = 0; 207 thread->condvar_wait_address = 0;
193 thread->wait_handle = 0; 208 thread->wait_handle = 0;
194 thread->name = std::move(name); 209 thread->name = std::move(name);
195 thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap(); 210 thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();
196 thread->owner_process = &owner_process; 211 thread->owner_process = owner_process;
197 auto& scheduler = kernel.GlobalScheduler(); 212 thread->type = type_flags;
198 scheduler.AddThread(thread); 213 if ((type_flags & THREADTYPE_IDLE) == 0) {
199 thread->tls_address = thread->owner_process->CreateTLSRegion(); 214 auto& scheduler = kernel.GlobalScheduler();
200 215 scheduler.AddThread(thread);
201 thread->owner_process->RegisterThread(thread.get()); 216 }
217 if (owner_process) {
218 thread->tls_address = thread->owner_process->CreateTLSRegion();
219 thread->owner_process->RegisterThread(thread.get());
220 } else {
221 thread->tls_address = 0;
222 }
223 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
224 // to initialize the context
225 thread->arm_interface.reset();
226 if ((type_flags & THREADTYPE_HLE) == 0) {
227#ifdef ARCHITECTURE_x86_64
228 if (owner_process && !owner_process->Is64BitProcess()) {
229 thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
230 system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
231 processor_id);
232 } else {
233 thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
234 system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
235 processor_id);
236 }
202 237
203 ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), 238#else
204 static_cast<u32>(entry_point), static_cast<u32>(arg)); 239 if (owner_process && !owner_process->Is64BitProcess()) {
205 ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); 240 thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
241 system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32,
242 processor_id);
243 } else {
244 thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
245 system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64,
246 processor_id);
247 }
248 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
249#endif
250 ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
251 static_cast<u32>(entry_point), static_cast<u32>(arg));
252 ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
253 }
254 thread->host_context =
255 std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter);
206 256
207 return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); 257 return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
208} 258}
209 259
210void Thread::SetPriority(u32 priority) { 260void Thread::SetPriority(u32 priority) {
261 SchedulerLock lock(kernel);
211 ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST, 262 ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,
212 "Invalid priority value."); 263 "Invalid priority value.");
213 nominal_priority = priority; 264 nominal_priority = priority;
214 UpdatePriority(); 265 UpdatePriority();
215} 266}
216 267
217void Thread::SetWaitSynchronizationResult(ResultCode result) { 268void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) {
218 context_32.cpu_registers[0] = result.raw; 269 signaling_object = object;
219 context_64.cpu_registers[0] = result.raw; 270 signaling_result = result;
220}
221
222void Thread::SetWaitSynchronizationOutput(s32 output) {
223 context_32.cpu_registers[1] = output;
224 context_64.cpu_registers[1] = output;
225} 271}
226 272
227s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { 273s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
228 ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); 274 ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything");
229 const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); 275 const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object);
230 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); 276 return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1);
231} 277}
232 278
233VAddr Thread::GetCommandBufferAddress() const { 279VAddr Thread::GetCommandBufferAddress() const {
@@ -236,6 +282,14 @@ VAddr Thread::GetCommandBufferAddress() const {
236 return GetTLSAddress() + command_header_offset; 282 return GetTLSAddress() + command_header_offset;
237} 283}
238 284
285Core::ARM_Interface& Thread::ArmInterface() {
286 return *arm_interface;
287}
288
289const Core::ARM_Interface& Thread::ArmInterface() const {
290 return *arm_interface;
291}
292
239void Thread::SetStatus(ThreadStatus new_status) { 293void Thread::SetStatus(ThreadStatus new_status) {
240 if (new_status == status) { 294 if (new_status == status) {
241 return; 295 return;
@@ -257,10 +311,6 @@ void Thread::SetStatus(ThreadStatus new_status) {
257 break; 311 break;
258 } 312 }
259 313
260 if (status == ThreadStatus::Running) {
261 last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
262 }
263
264 status = new_status; 314 status = new_status;
265} 315}
266 316
@@ -341,75 +391,116 @@ void Thread::UpdatePriority() {
341 lock_owner->UpdatePriority(); 391 lock_owner->UpdatePriority();
342} 392}
343 393
344void Thread::ChangeCore(u32 core, u64 mask) {
345 SetCoreAndAffinityMask(core, mask);
346}
347
348bool Thread::AllSynchronizationObjectsReady() const { 394bool Thread::AllSynchronizationObjectsReady() const {
349 return std::none_of(wait_objects.begin(), wait_objects.end(), 395 return std::none_of(wait_objects->begin(), wait_objects->end(),
350 [this](const std::shared_ptr<SynchronizationObject>& object) { 396 [this](const std::shared_ptr<SynchronizationObject>& object) {
351 return object->ShouldWait(this); 397 return object->ShouldWait(this);
352 }); 398 });
353} 399}
354 400
355bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, 401bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
356 std::shared_ptr<SynchronizationObject> object, 402 ASSERT(hle_callback);
357 std::size_t index) { 403 return hle_callback(std::move(thread));
358 ASSERT(wakeup_callback);
359 return wakeup_callback(reason, std::move(thread), std::move(object), index);
360} 404}
361 405
362void Thread::SetActivity(ThreadActivity value) { 406ResultCode Thread::SetActivity(ThreadActivity value) {
363 activity = value; 407 SchedulerLock lock(kernel);
408
409 auto sched_status = GetSchedulingStatus();
410
411 if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) {
412 return ERR_INVALID_STATE;
413 }
414
415 if (IsPendingTermination()) {
416 return RESULT_SUCCESS;
417 }
364 418
365 if (value == ThreadActivity::Paused) { 419 if (value == ThreadActivity::Paused) {
366 // Set status if not waiting 420 if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) != 0) {
367 if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { 421 return ERR_INVALID_STATE;
368 SetStatus(ThreadStatus::Paused); 422 }
369 kernel.PrepareReschedule(processor_id); 423 AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
424 } else {
425 if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) == 0) {
426 return ERR_INVALID_STATE;
370 } 427 }
371 } else if (status == ThreadStatus::Paused) { 428 RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
372 // Ready to reschedule
373 ResumeFromWait();
374 } 429 }
430 return RESULT_SUCCESS;
375} 431}
376 432
377void Thread::Sleep(s64 nanoseconds) { 433ResultCode Thread::Sleep(s64 nanoseconds) {
378 // Sleep current thread and check for next thread to schedule 434 Handle event_handle{};
379 SetStatus(ThreadStatus::WaitSleep); 435 {
436 SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
437 SetStatus(ThreadStatus::WaitSleep);
438 }
380 439
381 // Create an event to wake the thread up after the specified nanosecond delay has passed 440 if (event_handle != InvalidHandle) {
382 WakeAfterDelay(nanoseconds); 441 auto& time_manager = kernel.TimeManager();
442 time_manager.UnscheduleTimeEvent(event_handle);
443 }
444 return RESULT_SUCCESS;
445}
446
447std::pair<ResultCode, bool> Thread::YieldSimple() {
448 bool is_redundant = false;
449 {
450 SchedulerLock lock(kernel);
451 is_redundant = kernel.GlobalScheduler().YieldThread(this);
452 }
453 return {RESULT_SUCCESS, is_redundant};
454}
455
456std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() {
457 bool is_redundant = false;
458 {
459 SchedulerLock lock(kernel);
460 is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
461 }
462 return {RESULT_SUCCESS, is_redundant};
383} 463}
384 464
385bool Thread::YieldSimple() { 465std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() {
386 auto& scheduler = kernel.GlobalScheduler(); 466 bool is_redundant = false;
387 return scheduler.YieldThread(this); 467 {
468 SchedulerLock lock(kernel);
469 is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
470 }
471 return {RESULT_SUCCESS, is_redundant};
388} 472}
389 473
390bool Thread::YieldAndBalanceLoad() { 474void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
391 auto& scheduler = kernel.GlobalScheduler(); 475 const u32 old_state = scheduling_state;
392 return scheduler.YieldThreadAndBalanceLoad(this); 476 pausing_state |= static_cast<u32>(flag);
477 const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
478 scheduling_state = base_scheduling | pausing_state;
479 kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
393} 480}
394 481
395bool Thread::YieldAndWaitForLoadBalancing() { 482void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
396 auto& scheduler = kernel.GlobalScheduler(); 483 const u32 old_state = scheduling_state;
397 return scheduler.YieldThreadAndWaitForLoadBalancing(this); 484 pausing_state &= ~static_cast<u32>(flag);
485 const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
486 scheduling_state = base_scheduling | pausing_state;
487 kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
398} 488}
399 489
400void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { 490void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
401 const u32 old_flags = scheduling_state; 491 const u32 old_state = scheduling_state;
402 scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) | 492 scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
403 static_cast<u32>(new_status); 493 static_cast<u32>(new_status);
404 AdjustSchedulingOnStatus(old_flags); 494 kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
405} 495}
406 496
407void Thread::SetCurrentPriority(u32 new_priority) { 497void Thread::SetCurrentPriority(u32 new_priority) {
408 const u32 old_priority = std::exchange(current_priority, new_priority); 498 const u32 old_priority = std::exchange(current_priority, new_priority);
409 AdjustSchedulingOnPriority(old_priority); 499 kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority);
410} 500}
411 501
412ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { 502ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
503 SchedulerLock lock(kernel);
413 const auto HighestSetCore = [](u64 mask, u32 max_cores) { 504 const auto HighestSetCore = [](u64 mask, u32 max_cores) {
414 for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) { 505 for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) {
415 if (((mask >> core) & 1) != 0) { 506 if (((mask >> core) & 1) != 0) {
@@ -443,111 +534,12 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
443 processor_id = ideal_core; 534 processor_id = ideal_core;
444 } 535 }
445 } 536 }
446 AdjustSchedulingOnAffinity(old_affinity_mask, old_core); 537 kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core);
447 } 538 }
448 } 539 }
449 return RESULT_SUCCESS; 540 return RESULT_SUCCESS;
450} 541}
451 542
452void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
453 if (old_flags == scheduling_state) {
454 return;
455 }
456
457 auto& scheduler = kernel.GlobalScheduler();
458 if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
459 ThreadSchedStatus::Runnable) {
460 // In this case the thread was running, now it's pausing/exitting
461 if (processor_id >= 0) {
462 scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this);
463 }
464
465 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
466 if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
467 scheduler.Unsuggest(current_priority, core, this);
468 }
469 }
470 } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
471 // The thread is now set to running from being stopped
472 if (processor_id >= 0) {
473 scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
474 }
475
476 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
477 if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
478 scheduler.Suggest(current_priority, core, this);
479 }
480 }
481 }
482
483 scheduler.SetReselectionPending();
484}
485
486void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
487 if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
488 return;
489 }
490 auto& scheduler = kernel.GlobalScheduler();
491 if (processor_id >= 0) {
492 scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
493 }
494
495 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
496 if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
497 scheduler.Unsuggest(old_priority, core, this);
498 }
499 }
500
501 // Add thread to the new priority queues.
502 Thread* current_thread = GetCurrentThread();
503
504 if (processor_id >= 0) {
505 if (current_thread == this) {
506 scheduler.SchedulePrepend(current_priority, static_cast<u32>(processor_id), this);
507 } else {
508 scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
509 }
510 }
511
512 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
513 if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
514 scheduler.Suggest(current_priority, core, this);
515 }
516 }
517
518 scheduler.SetReselectionPending();
519}
520
521void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
522 auto& scheduler = kernel.GlobalScheduler();
523 if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
524 current_priority >= THREADPRIO_COUNT) {
525 return;
526 }
527
528 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
529 if (((old_affinity_mask >> core) & 1) != 0) {
530 if (core == static_cast<u32>(old_core)) {
531 scheduler.Unschedule(current_priority, core, this);
532 } else {
533 scheduler.Unsuggest(current_priority, core, this);
534 }
535 }
536 }
537
538 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
539 if (((affinity_mask >> core) & 1) != 0) {
540 if (core == static_cast<u32>(processor_id)) {
541 scheduler.Schedule(current_priority, core, this);
542 } else {
543 scheduler.Suggest(current_priority, core, this);
544 }
545 }
546 }
547
548 scheduler.SetReselectionPending();
549}
550
551//////////////////////////////////////////////////////////////////////////////////////////////////// 543////////////////////////////////////////////////////////////////////////////////////////////////////
552 544
553/** 545/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 23fdef8a4..c0342c462 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -6,26 +6,47 @@
6 6
7#include <functional> 7#include <functional>
8#include <string> 8#include <string>
9#include <utility>
9#include <vector> 10#include <vector>
10 11
11#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/spin_lock.h"
12#include "core/arm/arm_interface.h" 14#include "core/arm/arm_interface.h"
13#include "core/hle/kernel/object.h" 15#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/synchronization_object.h" 16#include "core/hle/kernel/synchronization_object.h"
15#include "core/hle/result.h" 17#include "core/hle/result.h"
16 18
19namespace Common {
20class Fiber;
21}
22
23namespace Core {
24class ARM_Interface;
25class System;
26} // namespace Core
27
17namespace Kernel { 28namespace Kernel {
18 29
30class GlobalScheduler;
19class KernelCore; 31class KernelCore;
20class Process; 32class Process;
21class Scheduler; 33class Scheduler;
22 34
23enum ThreadPriority : u32 { 35enum ThreadPriority : u32 {
24 THREADPRIO_HIGHEST = 0, ///< Highest thread priority 36 THREADPRIO_HIGHEST = 0, ///< Highest thread priority
25 THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps 37 THREADPRIO_MAX_CORE_MIGRATION = 2, ///< Highest priority for a core migration
26 THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps 38 THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
27 THREADPRIO_LOWEST = 63, ///< Lowest thread priority 39 THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps
28 THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities. 40 THREADPRIO_LOWEST = 63, ///< Lowest thread priority
41 THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities.
42};
43
44enum ThreadType : u32 {
45 THREADTYPE_USER = 0x1,
46 THREADTYPE_KERNEL = 0x2,
47 THREADTYPE_HLE = 0x4,
48 THREADTYPE_IDLE = 0x8,
49 THREADTYPE_SUSPEND = 0x10,
29}; 50};
30 51
31enum ThreadProcessorId : s32 { 52enum ThreadProcessorId : s32 {
@@ -107,26 +128,45 @@ public:
107 128
108 using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; 129 using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
109 130
110 using WakeupCallback = 131 using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>;
111 std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, 132
112 std::shared_ptr<SynchronizationObject> object, std::size_t index)>; 133 /**
134 * Creates and returns a new thread. The new thread is immediately scheduled
135 * @param system The instance of the whole system
136 * @param name The friendly name desired for the thread
137 * @param entry_point The address at which the thread should start execution
138 * @param priority The thread's priority
139 * @param arg User data to pass to the thread
140 * @param processor_id The ID(s) of the processors on which the thread is desired to be run
141 * @param stack_top The address of the thread's stack top
142 * @param owner_process The parent process for the thread, if null, it's a kernel thread
143 * @return A shared pointer to the newly created thread
144 */
145 static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags,
146 std::string name, VAddr entry_point,
147 u32 priority, u64 arg, s32 processor_id,
148 VAddr stack_top, Process* owner_process);
113 149
114 /** 150 /**
115 * Creates and returns a new thread. The new thread is immediately scheduled 151 * Creates and returns a new thread. The new thread is immediately scheduled
116 * @param kernel The kernel instance this thread will be created under. 152 * @param system The instance of the whole system
117 * @param name The friendly name desired for the thread 153 * @param name The friendly name desired for the thread
118 * @param entry_point The address at which the thread should start execution 154 * @param entry_point The address at which the thread should start execution
119 * @param priority The thread's priority 155 * @param priority The thread's priority
120 * @param arg User data to pass to the thread 156 * @param arg User data to pass to the thread
121 * @param processor_id The ID(s) of the processors on which the thread is desired to be run 157 * @param processor_id The ID(s) of the processors on which the thread is desired to be run
122 * @param stack_top The address of the thread's stack top 158 * @param stack_top The address of the thread's stack top
123 * @param owner_process The parent process for the thread 159 * @param owner_process The parent process for the thread, if null, it's a kernel thread
160 * @param thread_start_func The function where the host context will start.
161 * @param thread_start_parameter The parameter which will passed to host context on init
124 * @return A shared pointer to the newly created thread 162 * @return A shared pointer to the newly created thread
125 */ 163 */
126 static ResultVal<std::shared_ptr<Thread>> Create(KernelCore& kernel, std::string name, 164 static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags,
127 VAddr entry_point, u32 priority, u64 arg, 165 std::string name, VAddr entry_point,
128 s32 processor_id, VAddr stack_top, 166 u32 priority, u64 arg, s32 processor_id,
129 Process& owner_process); 167 VAddr stack_top, Process* owner_process,
168 std::function<void(void*)>&& thread_start_func,
169 void* thread_start_parameter);
130 170
131 std::string GetName() const override { 171 std::string GetName() const override {
132 return name; 172 return name;
@@ -181,7 +221,7 @@ public:
181 void UpdatePriority(); 221 void UpdatePriority();
182 222
183 /// Changes the core that the thread is running or scheduled to run on. 223 /// Changes the core that the thread is running or scheduled to run on.
184 void ChangeCore(u32 core, u64 mask); 224 ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
185 225
186 /** 226 /**
187 * Gets the thread's thread ID 227 * Gets the thread's thread ID
@@ -194,6 +234,10 @@ public:
194 /// Resumes a thread from waiting 234 /// Resumes a thread from waiting
195 void ResumeFromWait(); 235 void ResumeFromWait();
196 236
237 void OnWakeUp();
238
239 ResultCode Start();
240
197 /// Cancels a waiting operation that this thread may or may not be within. 241 /// Cancels a waiting operation that this thread may or may not be within.
198 /// 242 ///
199 /// When the thread is within a waiting state, this will set the thread's 243 /// When the thread is within a waiting state, this will set the thread's
@@ -202,26 +246,19 @@ public:
202 /// 246 ///
203 void CancelWait(); 247 void CancelWait();
204 248
205 /** 249 void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
206 * Schedules an event to wake up the specified thread after the specified delay
207 * @param nanoseconds The time this thread will be allowed to sleep for
208 */
209 void WakeAfterDelay(s64 nanoseconds);
210 250
211 /// Cancel any outstanding wakeup events for this thread 251 Core::ARM_Interface& ArmInterface();
212 void CancelWakeupTimer();
213 252
214 /** 253 const Core::ARM_Interface& ArmInterface() const;
215 * Sets the result after the thread awakens (from svcWaitSynchronization)
216 * @param result Value to set to the returned result
217 */
218 void SetWaitSynchronizationResult(ResultCode result);
219 254
220 /** 255 SynchronizationObject* GetSignalingObject() const {
221 * Sets the output parameter value after the thread awakens (from svcWaitSynchronization) 256 return signaling_object;
222 * @param output Value to set to the output parameter 257 }
223 */ 258
224 void SetWaitSynchronizationOutput(s32 output); 259 ResultCode GetSignalingResult() const {
260 return signaling_result;
261 }
225 262
226 /** 263 /**
227 * Retrieves the index that this particular object occupies in the list of objects 264 * Retrieves the index that this particular object occupies in the list of objects
@@ -269,11 +306,6 @@ public:
269 */ 306 */
270 VAddr GetCommandBufferAddress() const; 307 VAddr GetCommandBufferAddress() const;
271 308
272 /// Returns whether this thread is waiting on objects from a WaitSynchronization call.
273 bool IsSleepingOnWait() const {
274 return status == ThreadStatus::WaitSynch;
275 }
276
277 ThreadContext32& GetContext32() { 309 ThreadContext32& GetContext32() {
278 return context_32; 310 return context_32;
279 } 311 }
@@ -290,6 +322,28 @@ public:
290 return context_64; 322 return context_64;
291 } 323 }
292 324
325 bool IsHLEThread() const {
326 return (type & THREADTYPE_HLE) != 0;
327 }
328
329 bool IsSuspendThread() const {
330 return (type & THREADTYPE_SUSPEND) != 0;
331 }
332
333 bool IsIdleThread() const {
334 return (type & THREADTYPE_IDLE) != 0;
335 }
336
337 bool WasRunning() const {
338 return was_running;
339 }
340
341 void SetWasRunning(bool value) {
342 was_running = value;
343 }
344
345 std::shared_ptr<Common::Fiber>& GetHostContext();
346
293 ThreadStatus GetStatus() const { 347 ThreadStatus GetStatus() const {
294 return status; 348 return status;
295 } 349 }
@@ -325,18 +379,18 @@ public:
325 } 379 }
326 380
327 const ThreadSynchronizationObjects& GetSynchronizationObjects() const { 381 const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
328 return wait_objects; 382 return *wait_objects;
329 } 383 }
330 384
331 void SetSynchronizationObjects(ThreadSynchronizationObjects objects) { 385 void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) {
332 wait_objects = std::move(objects); 386 wait_objects = objects;
333 } 387 }
334 388
335 void ClearSynchronizationObjects() { 389 void ClearSynchronizationObjects() {
336 for (const auto& waiting_object : wait_objects) { 390 for (const auto& waiting_object : *wait_objects) {
337 waiting_object->RemoveWaitingThread(SharedFrom(this)); 391 waiting_object->RemoveWaitingThread(SharedFrom(this));
338 } 392 }
339 wait_objects.clear(); 393 wait_objects->clear();
340 } 394 }
341 395
342 /// Determines whether all the objects this thread is waiting on are ready. 396 /// Determines whether all the objects this thread is waiting on are ready.
@@ -386,26 +440,35 @@ public:
386 arb_wait_address = address; 440 arb_wait_address = address;
387 } 441 }
388 442
389 bool HasWakeupCallback() const { 443 bool HasHLECallback() const {
390 return wakeup_callback != nullptr; 444 return hle_callback != nullptr;
391 } 445 }
392 446
393 void SetWakeupCallback(WakeupCallback callback) { 447 void SetHLECallback(HLECallback callback) {
394 wakeup_callback = std::move(callback); 448 hle_callback = std::move(callback);
395 } 449 }
396 450
397 void InvalidateWakeupCallback() { 451 void SetHLETimeEvent(Handle time_event) {
398 SetWakeupCallback(nullptr); 452 hle_time_event = time_event;
399 } 453 }
400 454
401 /** 455 void SetHLESyncObject(SynchronizationObject* object) {
402 * Invokes the thread's wakeup callback. 456 hle_object = object;
403 * 457 }
404 * @pre A valid wakeup callback has been set. Violating this precondition 458
405 * will cause an assertion to trigger. 459 Handle GetHLETimeEvent() const {
406 */ 460 return hle_time_event;
407 bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, 461 }
408 std::shared_ptr<SynchronizationObject> object, std::size_t index); 462
463 SynchronizationObject* GetHLESyncObject() const {
464 return hle_object;
465 }
466
467 void InvalidateHLECallback() {
468 SetHLECallback(nullptr);
469 }
470
471 bool InvokeHLECallback(std::shared_ptr<Thread> thread);
409 472
410 u32 GetIdealCore() const { 473 u32 GetIdealCore() const {
411 return ideal_core; 474 return ideal_core;
@@ -415,23 +478,19 @@ public:
415 return affinity_mask; 478 return affinity_mask;
416 } 479 }
417 480
418 ThreadActivity GetActivity() const { 481 ResultCode SetActivity(ThreadActivity value);
419 return activity;
420 }
421
422 void SetActivity(ThreadActivity value);
423 482
424 /// Sleeps this thread for the given amount of nanoseconds. 483 /// Sleeps this thread for the given amount of nanoseconds.
425 void Sleep(s64 nanoseconds); 484 ResultCode Sleep(s64 nanoseconds);
426 485
427 /// Yields this thread without rebalancing loads. 486 /// Yields this thread without rebalancing loads.
428 bool YieldSimple(); 487 std::pair<ResultCode, bool> YieldSimple();
429 488
430 /// Yields this thread and does a load rebalancing. 489 /// Yields this thread and does a load rebalancing.
431 bool YieldAndBalanceLoad(); 490 std::pair<ResultCode, bool> YieldAndBalanceLoad();
432 491
433 /// Yields this thread and if the core is left idle, loads are rebalanced 492 /// Yields this thread and if the core is left idle, loads are rebalanced
434 bool YieldAndWaitForLoadBalancing(); 493 std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing();
435 494
436 void IncrementYieldCount() { 495 void IncrementYieldCount() {
437 yield_count++; 496 yield_count++;
@@ -446,6 +505,10 @@ public:
446 static_cast<u32>(ThreadSchedMasks::LowMask)); 505 static_cast<u32>(ThreadSchedMasks::LowMask));
447 } 506 }
448 507
508 bool IsRunnable() const {
509 return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable);
510 }
511
449 bool IsRunning() const { 512 bool IsRunning() const {
450 return is_running; 513 return is_running;
451 } 514 }
@@ -466,17 +529,67 @@ public:
466 return global_handle; 529 return global_handle;
467 } 530 }
468 531
532 bool IsWaitingForArbitration() const {
533 return waiting_for_arbitration;
534 }
535
536 void WaitForArbitration(bool set) {
537 waiting_for_arbitration = set;
538 }
539
540 bool IsWaitingSync() const {
541 return is_waiting_on_sync;
542 }
543
544 void SetWaitingSync(bool is_waiting) {
545 is_waiting_on_sync = is_waiting;
546 }
547
548 bool IsPendingTermination() const {
549 return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited;
550 }
551
552 bool IsPaused() const {
553 return pausing_state != 0;
554 }
555
556 bool IsContinuousOnSVC() const {
557 return is_continuous_on_svc;
558 }
559
560 void SetContinuousOnSVC(bool is_continuous) {
561 is_continuous_on_svc = is_continuous;
562 }
563
564 bool IsPhantomMode() const {
565 return is_phantom_mode;
566 }
567
568 void SetPhantomMode(bool phantom) {
569 is_phantom_mode = phantom;
570 }
571
572 bool HasExited() const {
573 return has_exited;
574 }
575
469private: 576private:
577 friend class GlobalScheduler;
578 friend class Scheduler;
579
470 void SetSchedulingStatus(ThreadSchedStatus new_status); 580 void SetSchedulingStatus(ThreadSchedStatus new_status);
581 void AddSchedulingFlag(ThreadSchedFlags flag);
582 void RemoveSchedulingFlag(ThreadSchedFlags flag);
583
471 void SetCurrentPriority(u32 new_priority); 584 void SetCurrentPriority(u32 new_priority);
472 ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
473 585
474 void AdjustSchedulingOnStatus(u32 old_flags);
475 void AdjustSchedulingOnPriority(u32 old_priority);
476 void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); 586 void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
477 587
588 Common::SpinLock context_guard{};
478 ThreadContext32 context_32{}; 589 ThreadContext32 context_32{};
479 ThreadContext64 context_64{}; 590 ThreadContext64 context_64{};
591 std::unique_ptr<Core::ARM_Interface> arm_interface{};
592 std::shared_ptr<Common::Fiber> host_context{};
480 593
481 u64 thread_id = 0; 594 u64 thread_id = 0;
482 595
@@ -485,6 +598,8 @@ private:
485 VAddr entry_point = 0; 598 VAddr entry_point = 0;
486 VAddr stack_top = 0; 599 VAddr stack_top = 0;
487 600
601 ThreadType type;
602
488 /// Nominal thread priority, as set by the emulated application. 603 /// Nominal thread priority, as set by the emulated application.
489 /// The nominal priority is the thread priority without priority 604 /// The nominal priority is the thread priority without priority
490 /// inheritance taken into account. 605 /// inheritance taken into account.
@@ -509,7 +624,10 @@ private:
509 624
510 /// Objects that the thread is waiting on, in the same order as they were 625 /// Objects that the thread is waiting on, in the same order as they were
511 /// passed to WaitSynchronization. 626 /// passed to WaitSynchronization.
512 ThreadSynchronizationObjects wait_objects; 627 ThreadSynchronizationObjects* wait_objects;
628
629 SynchronizationObject* signaling_object;
630 ResultCode signaling_result{RESULT_SUCCESS};
513 631
514 /// List of threads that are waiting for a mutex that is held by this thread. 632 /// List of threads that are waiting for a mutex that is held by this thread.
515 MutexWaitingThreads wait_mutex_threads; 633 MutexWaitingThreads wait_mutex_threads;
@@ -526,30 +644,39 @@ private:
526 644
527 /// If waiting for an AddressArbiter, this is the address being waited on. 645 /// If waiting for an AddressArbiter, this is the address being waited on.
528 VAddr arb_wait_address{0}; 646 VAddr arb_wait_address{0};
647 bool waiting_for_arbitration{};
529 648
530 /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. 649 /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
531 Handle global_handle = 0; 650 Handle global_handle = 0;
532 651
533 /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread 652 /// Callback for HLE Events
534 /// was waiting via WaitSynchronization then the object will be the last object that became 653 HLECallback hle_callback;
535 /// available. In case of a timeout, the object will be nullptr. 654 Handle hle_time_event;
536 WakeupCallback wakeup_callback; 655 SynchronizationObject* hle_object;
537 656
538 Scheduler* scheduler = nullptr; 657 Scheduler* scheduler = nullptr;
539 658
540 u32 ideal_core{0xFFFFFFFF}; 659 u32 ideal_core{0xFFFFFFFF};
541 u64 affinity_mask{0x1}; 660 u64 affinity_mask{0x1};
542 661
543 ThreadActivity activity = ThreadActivity::Normal;
544
545 s32 ideal_core_override = -1; 662 s32 ideal_core_override = -1;
546 u64 affinity_mask_override = 0x1; 663 u64 affinity_mask_override = 0x1;
547 u32 affinity_override_count = 0; 664 u32 affinity_override_count = 0;
548 665
549 u32 scheduling_state = 0; 666 u32 scheduling_state = 0;
667 u32 pausing_state = 0;
550 bool is_running = false; 668 bool is_running = false;
669 bool is_waiting_on_sync = false;
551 bool is_sync_cancelled = false; 670 bool is_sync_cancelled = false;
552 671
672 bool is_continuous_on_svc = false;
673
674 bool will_be_terminated = false;
675 bool is_phantom_mode = false;
676 bool has_exited = false;
677
678 bool was_running = false;
679
553 std::string name; 680 std::string name;
554}; 681};
555 682
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 21b290468..941305e8e 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -8,30 +8,37 @@
8#include "core/core_timing_util.h" 8#include "core/core_timing_util.h"
9#include "core/hle/kernel/handle_table.h" 9#include "core/hle/kernel/handle_table.h"
10#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/scheduler.h"
11#include "core/hle/kernel/thread.h" 12#include "core/hle/kernel/thread.h"
12#include "core/hle/kernel/time_manager.h" 13#include "core/hle/kernel/time_manager.h"
13 14
14namespace Kernel { 15namespace Kernel {
15 16
16TimeManager::TimeManager(Core::System& system) : system{system} { 17TimeManager::TimeManager(Core::System& system_) : system{system_} {
17 time_manager_event_type = Core::Timing::CreateEvent( 18 time_manager_event_type = Core::Timing::CreateEvent(
18 "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) { 19 "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
20 SchedulerLock lock(system.Kernel());
19 Handle proper_handle = static_cast<Handle>(thread_handle); 21 Handle proper_handle = static_cast<Handle>(thread_handle);
22 if (cancelled_events[proper_handle]) {
23 return;
24 }
20 std::shared_ptr<Thread> thread = 25 std::shared_ptr<Thread> thread =
21 this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); 26 this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
22 thread->ResumeFromWait(); 27 thread->OnWakeUp();
23 }); 28 });
24} 29}
25 30
26void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) { 31void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) {
32 event_handle = timetask->GetGlobalHandle();
27 if (nanoseconds > 0) { 33 if (nanoseconds > 0) {
28 ASSERT(timetask); 34 ASSERT(timetask);
29 event_handle = timetask->GetGlobalHandle(); 35 ASSERT(timetask->GetStatus() != ThreadStatus::Ready);
30 const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); 36 ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex);
31 system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle); 37 system.CoreTiming().ScheduleEvent(nanoseconds, time_manager_event_type, event_handle);
32 } else { 38 } else {
33 event_handle = InvalidHandle; 39 event_handle = InvalidHandle;
34 } 40 }
41 cancelled_events[event_handle] = false;
35} 42}
36 43
37void TimeManager::UnscheduleTimeEvent(Handle event_handle) { 44void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
@@ -39,6 +46,12 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
39 return; 46 return;
40 } 47 }
41 system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle); 48 system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle);
49 cancelled_events[event_handle] = true;
50}
51
52void TimeManager::CancelTimeEvent(Thread* time_task) {
53 Handle event_handle = time_task->GetGlobalHandle();
54 UnscheduleTimeEvent(event_handle);
42} 55}
43 56
44} // namespace Kernel 57} // namespace Kernel
diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h
index eaec486d1..307a18765 100644
--- a/src/core/hle/kernel/time_manager.h
+++ b/src/core/hle/kernel/time_manager.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <unordered_map>
8 9
9#include "core/hle/kernel/object.h" 10#include "core/hle/kernel/object.h"
10 11
@@ -35,9 +36,12 @@ public:
35 /// Unschedule an existing time event 36 /// Unschedule an existing time event
36 void UnscheduleTimeEvent(Handle event_handle); 37 void UnscheduleTimeEvent(Handle event_handle);
37 38
39 void CancelTimeEvent(Thread* time_task);
40
38private: 41private:
39 Core::System& system; 42 Core::System& system;
40 std::shared_ptr<Core::Timing::EventType> time_manager_event_type; 43 std::shared_ptr<Core::Timing::EventType> time_manager_event_type;
44 std::unordered_map<Handle, bool> cancelled_events;
41}; 45};
42 46
43} // namespace Kernel 47} // namespace Kernel
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 630a8b048..94d8c1fc6 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -44,6 +44,218 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
44 return static_cast<u32>(std::min(size, max_jpeg_image_size)); 44 return static_cast<u32>(std::min(size, max_jpeg_image_size));
45} 45}
46 46
47class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> {
48public:
49 explicit IManagerForSystemService(Common::UUID user_id)
50 : ServiceFramework("IManagerForSystemService") {
51 // clang-format off
52 static const FunctionInfo functions[] = {
53 {0, nullptr, "CheckAvailability"},
54 {1, nullptr, "GetAccountId"},
55 {2, nullptr, "EnsureIdTokenCacheAsync"},
56 {3, nullptr, "LoadIdTokenCache"},
57 {100, nullptr, "SetSystemProgramIdentification"},
58 {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
59 {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
60 {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
61 {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
62 {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
63 {120, nullptr, "GetNintendoAccountId"},
64 {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
65 {130, nullptr, "GetNintendoAccountUserResourceCache"},
66 {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
67 {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
68 {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
69 {134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+
70 {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
71 {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
72 {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
73 {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
74 {150, nullptr, "CreateAuthorizationRequest"},
75 };
76 // clang-format on
77
78 RegisterHandlers(functions);
79 }
80};
81
82// 3.0.0+
83class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> {
84public:
85 explicit IFloatingRegistrationRequest(Common::UUID user_id)
86 : ServiceFramework("IFloatingRegistrationRequest") {
87 // clang-format off
88 static const FunctionInfo functions[] = {
89 {0, nullptr, "GetSessionId"},
90 {12, nullptr, "GetAccountId"},
91 {13, nullptr, "GetLinkedNintendoAccountId"},
92 {14, nullptr, "GetNickname"},
93 {15, nullptr, "GetProfileImage"},
94 {21, nullptr, "LoadIdTokenCache"},
95 {100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync
96 {101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync
97 {102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+
98 {103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+
99 {110, nullptr, "SetSystemProgramIdentification"},
100 {111, nullptr, "EnsureIdTokenCacheAsync"},
101 };
102 // clang-format on
103
104 RegisterHandlers(functions);
105 }
106};
107
108class IAdministrator final : public ServiceFramework<IAdministrator> {
109public:
110 explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") {
111 // clang-format off
112 static const FunctionInfo functions[] = {
113 {0, nullptr, "CheckAvailability"},
114 {1, nullptr, "GetAccountId"},
115 {2, nullptr, "EnsureIdTokenCacheAsync"},
116 {3, nullptr, "LoadIdTokenCache"},
117 {100, nullptr, "SetSystemProgramIdentification"},
118 {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
119 {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
120 {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
121 {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
122 {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
123 {120, nullptr, "GetNintendoAccountId"},
124 {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
125 {130, nullptr, "GetNintendoAccountUserResourceCache"},
126 {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
127 {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
128 {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
129 {134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+
130 {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
131 {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
132 {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
133 {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
134 {150, nullptr, "CreateAuthorizationRequest"},
135 {200, nullptr, "IsRegistered"},
136 {201, nullptr, "RegisterAsync"},
137 {202, nullptr, "UnregisterAsync"},
138 {203, nullptr, "DeleteRegistrationInfoLocally"},
139 {220, nullptr, "SynchronizeProfileAsync"},
140 {221, nullptr, "UploadProfileAsync"},
141 {222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"},
142 {250, nullptr, "IsLinkedWithNintendoAccount"},
143 {251, nullptr, "CreateProcedureToLinkWithNintendoAccount"},
144 {252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"},
145 {255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"},
146 {256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"},
147 {260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
148 {261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
149 {280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"},
150 {290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+
151 {300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+
152 {400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+
153 {401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+
154 {900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+
155 {901, nullptr, "ImportAsyncForWin"}, // 9.0.0+
156 {997, nullptr, "DebugUnlinkNintendoAccountAsync"},
157 {998, nullptr, "DebugSetAvailabilityErrorDetail"},
158 };
159 // clang-format on
160
161 RegisterHandlers(functions);
162 }
163};
164
165class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> {
166public:
167 explicit IAuthorizationRequest(Common::UUID user_id)
168 : ServiceFramework("IAuthorizationRequest") {
169 // clang-format off
170 static const FunctionInfo functions[] = {
171 {0, nullptr, "GetSessionId"},
172 {10, nullptr, "InvokeWithoutInteractionAsync"},
173 {19, nullptr, "IsAuthorized"},
174 {20, nullptr, "GetAuthorizationCode"},
175 {21, nullptr, "GetIdToken"},
176 {22, nullptr, "GetState"},
177 };
178 // clang-format on
179
180 RegisterHandlers(functions);
181 }
182};
183
184class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> {
185public:
186 explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") {
187 // clang-format off
188 static const FunctionInfo functions[] = {
189 {0, nullptr, "PrepareAsync"},
190 {1, nullptr, "GetRequest"},
191 {2, nullptr, "ApplyResponse"},
192 {3, nullptr, "ApplyResponseAsync"},
193 {10, nullptr, "Suspend"},
194 };
195 // clang-format on
196
197 RegisterHandlers(functions);
198 }
199};
200
201// 3.0.0+
202class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> {
203public:
204 explicit IOAuthProcedureForExternalNsa(Common::UUID user_id)
205 : ServiceFramework("IOAuthProcedureForExternalNsa") {
206 // clang-format off
207 static const FunctionInfo functions[] = {
208 {0, nullptr, "PrepareAsync"},
209 {1, nullptr, "GetRequest"},
210 {2, nullptr, "ApplyResponse"},
211 {3, nullptr, "ApplyResponseAsync"},
212 {10, nullptr, "Suspend"},
213 {100, nullptr, "GetAccountId"},
214 {101, nullptr, "GetLinkedNintendoAccountId"},
215 {102, nullptr, "GetNickname"},
216 {103, nullptr, "GetProfileImage"},
217 };
218 // clang-format on
219
220 RegisterHandlers(functions);
221 }
222};
223
224class IOAuthProcedureForNintendoAccountLinkage final
225 : public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> {
226public:
227 explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id)
228 : ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") {
229 // clang-format off
230 static const FunctionInfo functions[] = {
231 {0, nullptr, "PrepareAsync"},
232 {1, nullptr, "GetRequest"},
233 {2, nullptr, "ApplyResponse"},
234 {3, nullptr, "ApplyResponseAsync"},
235 {10, nullptr, "Suspend"},
236 {100, nullptr, "GetRequestWithTheme"},
237 {101, nullptr, "IsNetworkServiceAccountReplaced"},
238 {199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0
239 };
240 // clang-format on
241
242 RegisterHandlers(functions);
243 }
244};
245
246class INotifier final : public ServiceFramework<INotifier> {
247public:
248 explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") {
249 // clang-format off
250 static const FunctionInfo functions[] = {
251 {0, nullptr, "GetSystemEvent"},
252 };
253 // clang-format on
254
255 RegisterHandlers(functions);
256 }
257};
258
47class IProfileCommon : public ServiceFramework<IProfileCommon> { 259class IProfileCommon : public ServiceFramework<IProfileCommon> {
48public: 260public:
49 explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, 261 explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
@@ -226,6 +438,54 @@ public:
226 : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {} 438 : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
227}; 439};
228 440
441class IAsyncContext final : public ServiceFramework<IAsyncContext> {
442public:
443 explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") {
444 // clang-format off
445 static const FunctionInfo functions[] = {
446 {0, nullptr, "GetSystemEvent"},
447 {1, nullptr, "Cancel"},
448 {2, nullptr, "HasDone"},
449 {3, nullptr, "GetResult"},
450 };
451 // clang-format on
452
453 RegisterHandlers(functions);
454 }
455};
456
457class ISessionObject final : public ServiceFramework<ISessionObject> {
458public:
459 explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") {
460 // clang-format off
461 static const FunctionInfo functions[] = {
462 {999, nullptr, "Dummy"},
463 };
464 // clang-format on
465
466 RegisterHandlers(functions);
467 }
468};
469
470class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> {
471public:
472 explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") {
473 // clang-format off
474 static const FunctionInfo functions[] = {
475 {0, nullptr, "GetSessionId"},
476 {11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew)
477 {12, nullptr, "GetAccountId"},
478 {13, nullptr, "GetLinkedNintendoAccountId"},
479 {14, nullptr, "GetNickname"},
480 {15, nullptr, "GetProfileImage"},
481 {21, nullptr, "LoadIdTokenCache"}, // 3.0.0+
482 };
483 // clang-format on
484
485 RegisterHandlers(functions);
486 }
487};
488
229class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { 489class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
230public: 490public:
231 explicit IManagerForApplication(Common::UUID user_id) 491 explicit IManagerForApplication(Common::UUID user_id)
@@ -265,6 +525,87 @@ private:
265 Common::UUID user_id; 525 Common::UUID user_id;
266}; 526};
267 527
528// 6.0.0+
529class IAsyncNetworkServiceLicenseKindContext final
530 : public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> {
531public:
532 explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id)
533 : ServiceFramework("IAsyncNetworkServiceLicenseKindContext") {
534 // clang-format off
535 static const FunctionInfo functions[] = {
536 {0, nullptr, "GetSystemEvent"},
537 {1, nullptr, "Cancel"},
538 {2, nullptr, "HasDone"},
539 {3, nullptr, "GetResult"},
540 {4, nullptr, "GetNetworkServiceLicenseKind"},
541 };
542 // clang-format on
543
544 RegisterHandlers(functions);
545 }
546};
547
548// 8.0.0+
549class IOAuthProcedureForUserRegistration final
550 : public ServiceFramework<IOAuthProcedureForUserRegistration> {
551public:
552 explicit IOAuthProcedureForUserRegistration(Common::UUID user_id)
553 : ServiceFramework("IOAuthProcedureForUserRegistration") {
554 // clang-format off
555 static const FunctionInfo functions[] = {
556 {0, nullptr, "PrepareAsync"},
557 {1, nullptr, "GetRequest"},
558 {2, nullptr, "ApplyResponse"},
559 {3, nullptr, "ApplyResponseAsync"},
560 {10, nullptr, "Suspend"},
561 {100, nullptr, "GetAccountId"},
562 {101, nullptr, "GetLinkedNintendoAccountId"},
563 {102, nullptr, "GetNickname"},
564 {103, nullptr, "GetProfileImage"},
565 {110, nullptr, "RegisterUserAsync"},
566 {111, nullptr, "GetUid"},
567 };
568 // clang-format on
569
570 RegisterHandlers(functions);
571 }
572};
573
574class DAUTH_O final : public ServiceFramework<DAUTH_O> {
575public:
576 explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") {
577 // clang-format off
578 static const FunctionInfo functions[] = {
579 {0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData
580 {1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+
581 {2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+
582 {10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+
583 {11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+
584 {12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+
585 };
586 // clang-format on
587
588 RegisterHandlers(functions);
589 }
590};
591
592// 6.0.0+
593class IAsyncResult final : public ServiceFramework<IAsyncResult> {
594public:
595 explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") {
596 // clang-format off
597 static const FunctionInfo functions[] = {
598 {0, nullptr, "GetResult"},
599 {1, nullptr, "Cancel"},
600 {2, nullptr, "IsAvailable"},
601 {3, nullptr, "GetSystemEvent"},
602 };
603 // clang-format on
604
605 RegisterHandlers(functions);
606 }
607};
608
268void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) { 609void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
269 LOG_DEBUG(Service_ACC, "called"); 610 LOG_DEBUG(Service_ACC, "called");
270 IPC::ResponseBuilder rb{ctx, 3}; 611 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/acc/acc_aa.cpp b/src/core/hle/service/acc/acc_aa.cpp
index 3bac6bcd1..51f119b12 100644
--- a/src/core/hle/service/acc/acc_aa.cpp
+++ b/src/core/hle/service/acc/acc_aa.cpp
@@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
13 {0, nullptr, "EnsureCacheAsync"}, 13 {0, nullptr, "EnsureCacheAsync"},
14 {1, nullptr, "LoadCache"}, 14 {1, nullptr, "LoadCache"},
15 {2, nullptr, "GetDeviceAccountId"}, 15 {2, nullptr, "GetDeviceAccountId"},
16 {50, nullptr, "RegisterNotificationTokenAsync"}, 16 {50, nullptr, "RegisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
17 {51, nullptr, "UnregisterNotificationTokenAsync"}, 17 {51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
18 }; 18 };
19 RegisterHandlers(functions); 19 RegisterHandlers(functions);
20} 20}
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index 2eefc6df5..85620bde3 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
17 {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"}, 17 {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},
18 {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"}, 18 {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},
19 {5, &ACC_SU::GetProfile, "GetProfile"}, 19 {5, &ACC_SU::GetProfile, "GetProfile"},
20 {6, nullptr, "GetProfileDigest"}, 20 {6, nullptr, "GetProfileDigest"}, // 3.0.0+
21 {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, 21 {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
22 {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
25 {100, nullptr, "GetUserRegistrationNotifier"}, 25 {100, nullptr, "GetUserRegistrationNotifier"},
26 {101, nullptr, "GetUserStateChangeNotifier"}, 26 {101, nullptr, "GetUserStateChangeNotifier"},
27 {102, nullptr, "GetBaasAccountManagerForSystemService"}, 27 {102, nullptr, "GetBaasAccountManagerForSystemService"},
28 {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, 28 {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
29 {104, nullptr, "GetProfileUpdateNotifier"}, 29 {104, nullptr, "GetProfileUpdateNotifier"},
30 {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 30 {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
31 {106, nullptr, "GetProfileSyncNotifier"}, 31 {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
32 {110, nullptr, "StoreSaveDataThumbnail"}, 32 {110, nullptr, "StoreSaveDataThumbnail"},
33 {111, nullptr, "ClearSaveDataThumbnail"}, 33 {111, nullptr, "ClearSaveDataThumbnail"},
34 {112, nullptr, "LoadSaveDataThumbnail"}, 34 {112, nullptr, "LoadSaveDataThumbnail"},
35 {113, nullptr, "GetSaveDataThumbnailExistence"}, 35 {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
36 {120, nullptr, "ListOpenUsersInApplication"}, 36 {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
37 {130, nullptr, "ActivateOpenContextRetention"}, 37 {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
38 {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, 38 {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
39 {150, nullptr, "AuthenticateApplicationAsync"}, 39 {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
40 {190, nullptr, "GetUserLastOpenedApplication"}, 40 {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
41 {191, nullptr, "ActivateOpenContextHolder"}, 41 {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
42 {200, nullptr, "BeginUserRegistration"}, 42 {200, nullptr, "BeginUserRegistration"},
43 {201, nullptr, "CompleteUserRegistration"}, 43 {201, nullptr, "CompleteUserRegistration"},
44 {202, nullptr, "CancelUserRegistration"}, 44 {202, nullptr, "CancelUserRegistration"},
@@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
46 {204, nullptr, "SetUserPosition"}, 46 {204, nullptr, "SetUserPosition"},
47 {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"}, 47 {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
48 {206, nullptr, "CompleteUserRegistrationForcibly"}, 48 {206, nullptr, "CompleteUserRegistrationForcibly"},
49 {210, nullptr, "CreateFloatingRegistrationRequest"}, 49 {210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+
50 {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, 50 {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
51 {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, 51 {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
52 {230, nullptr, "AuthenticateServiceAsync"}, 52 {230, nullptr, "AuthenticateServiceAsync"},
53 {250, nullptr, "GetBaasAccountAdministrator"}, 53 {250, nullptr, "GetBaasAccountAdministrator"},
54 {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"}, 54 {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"},
55 {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, 55 {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+
56 {299, nullptr, "SuspendBackgroundDaemon"}, 56 {299, nullptr, "SuspendBackgroundDaemon"},
57 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, 57 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
58 {998, nullptr, "DebugSetUserStateClose"}, 58 {998, nullptr, "DebugSetUserStateClose"},
59 {999, nullptr, "DebugSetUserStateOpen"}, 59 {999, nullptr, "DebugSetUserStateOpen"},
60 }; 60 };
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp
index fb4e7e772..49f6e20f1 100644
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
17 {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"}, 17 {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},
18 {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"}, 18 {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},
19 {5, &ACC_U0::GetProfile, "GetProfile"}, 19 {5, &ACC_U0::GetProfile, "GetProfile"},
20 {6, nullptr, "GetProfileDigest"}, 20 {6, nullptr, "GetProfileDigest"}, // 3.0.0+
21 {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, 21 {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
22 {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
25 {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"}, 25 {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
26 {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, 26 {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
27 {102, nullptr, "AuthenticateApplicationAsync"}, 27 {102, nullptr, "AuthenticateApplicationAsync"},
28 {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 28 {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
29 {110, nullptr, "StoreSaveDataThumbnail"}, 29 {110, nullptr, "StoreSaveDataThumbnail"},
30 {111, nullptr, "ClearSaveDataThumbnail"}, 30 {111, nullptr, "ClearSaveDataThumbnail"},
31 {120, nullptr, "CreateGuestLoginRequest"}, 31 {120, nullptr, "CreateGuestLoginRequest"},
32 {130, nullptr, "LoadOpenContext"}, 32 {130, nullptr, "LoadOpenContext"}, // 5.0.0+
33 {131, nullptr, "ListOpenContextStoredUsers"}, 33 {131, nullptr, "ListOpenContextStoredUsers"}, // 6.0.0+
34 {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, 34 {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+
35 {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, 35 {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
36 {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, 36 {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+
37 }; 37 };
38 // clang-format on 38 // clang-format on
39 39
diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp
index 9f29cdc82..f47004f84 100644
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -17,28 +17,29 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
17 {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"}, 17 {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},
18 {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"}, 18 {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},
19 {5, &ACC_U1::GetProfile, "GetProfile"}, 19 {5, &ACC_U1::GetProfile, "GetProfile"},
20 {6, nullptr, "GetProfileDigest"}, 20 {6, nullptr, "GetProfileDigest"}, // 3.0.0+
21 {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, 21 {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
22 {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
25 {100, nullptr, "GetUserRegistrationNotifier"}, 25 {100, nullptr, "GetUserRegistrationNotifier"},
26 {101, nullptr, "GetUserStateChangeNotifier"}, 26 {101, nullptr, "GetUserStateChangeNotifier"},
27 {102, nullptr, "GetBaasAccountManagerForSystemService"}, 27 {102, nullptr, "GetBaasAccountManagerForSystemService"},
28 {103, nullptr, "GetProfileUpdateNotifier"}, 28 {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
29 {104, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 29 {104, nullptr, "GetProfileUpdateNotifier"},
30 {105, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, 30 {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
31 {106, nullptr, "GetProfileSyncNotifier"}, 31 {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
32 {110, nullptr, "StoreSaveDataThumbnail"}, 32 {110, nullptr, "StoreSaveDataThumbnail"},
33 {111, nullptr, "ClearSaveDataThumbnail"}, 33 {111, nullptr, "ClearSaveDataThumbnail"},
34 {112, nullptr, "LoadSaveDataThumbnail"}, 34 {112, nullptr, "LoadSaveDataThumbnail"},
35 {113, nullptr, "GetSaveDataThumbnailExistence"}, 35 {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
36 {130, nullptr, "ActivateOpenContextRetention"}, 36 {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
37 {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, 37 {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
38 {150, nullptr, "AuthenticateApplicationAsync"}, 38 {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
39 {190, nullptr, "GetUserLastOpenedApplication"}, 39 {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
40 {191, nullptr, "ActivateOpenContextHolder"}, 40 {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
41 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, 41 {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
42 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
42 {998, nullptr, "DebugSetUserStateClose"}, 43 {998, nullptr, "DebugSetUserStateClose"},
43 {999, nullptr, "DebugSetUserStateOpen"}, 44 {999, nullptr, "DebugSetUserStateOpen"},
44 }; 45 };
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 4df74c4f9..20f366635 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -68,6 +68,7 @@ IWindowController::IWindowController(Core::System& system_)
68 static const FunctionInfo functions[] = { 68 static const FunctionInfo functions[] = {
69 {0, nullptr, "CreateWindow"}, 69 {0, nullptr, "CreateWindow"},
70 {1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"}, 70 {1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"},
71 {2, nullptr, "GetAppletResourceUserIdOfCallerApplet"},
71 {10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"}, 72 {10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"},
72 {11, nullptr, "ReleaseForegroundRights"}, 73 {11, nullptr, "ReleaseForegroundRights"},
73 {12, nullptr, "RejectToChangeIntoBackground"}, 74 {12, nullptr, "RejectToChangeIntoBackground"},
@@ -189,8 +190,8 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
189 {5, nullptr, "GetLastForegroundCaptureImageEx"}, 190 {5, nullptr, "GetLastForegroundCaptureImageEx"},
190 {6, nullptr, "GetLastApplicationCaptureImageEx"}, 191 {6, nullptr, "GetLastApplicationCaptureImageEx"},
191 {7, nullptr, "GetCallerAppletCaptureImageEx"}, 192 {7, nullptr, "GetCallerAppletCaptureImageEx"},
192 {8, nullptr, "TakeScreenShotOfOwnLayer"}, // 2.0.0+ 193 {8, nullptr, "TakeScreenShotOfOwnLayer"},
193 {9, nullptr, "CopyBetweenCaptureBuffers"}, // 5.0.0+ 194 {9, nullptr, "CopyBetweenCaptureBuffers"},
194 {10, nullptr, "AcquireLastApplicationCaptureBuffer"}, 195 {10, nullptr, "AcquireLastApplicationCaptureBuffer"},
195 {11, nullptr, "ReleaseLastApplicationCaptureBuffer"}, 196 {11, nullptr, "ReleaseLastApplicationCaptureBuffer"},
196 {12, nullptr, "AcquireLastForegroundCaptureBuffer"}, 197 {12, nullptr, "AcquireLastForegroundCaptureBuffer"},
@@ -200,17 +201,14 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
200 {16, nullptr, "AcquireLastApplicationCaptureBufferEx"}, 201 {16, nullptr, "AcquireLastApplicationCaptureBufferEx"},
201 {17, nullptr, "AcquireLastForegroundCaptureBufferEx"}, 202 {17, nullptr, "AcquireLastForegroundCaptureBufferEx"},
202 {18, nullptr, "AcquireCallerAppletCaptureBufferEx"}, 203 {18, nullptr, "AcquireCallerAppletCaptureBufferEx"},
203 // 2.0.0+
204 {20, nullptr, "ClearCaptureBuffer"}, 204 {20, nullptr, "ClearCaptureBuffer"},
205 {21, nullptr, "ClearAppletTransitionBuffer"}, 205 {21, nullptr, "ClearAppletTransitionBuffer"},
206 // 4.0.0+
207 {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"}, 206 {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"},
208 {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"}, 207 {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"},
209 {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"}, 208 {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"},
210 {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"}, 209 {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"},
211 {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"}, 210 {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"},
212 {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"}, 211 {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"},
213 // 6.0.0+
214 {28, nullptr, "TakeScreenShotOfOwnLayerEx"}, 212 {28, nullptr, "TakeScreenShotOfOwnLayerEx"},
215 }; 213 };
216 // clang-format on 214 // clang-format on
@@ -225,7 +223,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
225 static const FunctionInfo functions[] = { 223 static const FunctionInfo functions[] = {
226 {0, nullptr, "NotifyMessageToHomeMenuForDebug"}, 224 {0, nullptr, "NotifyMessageToHomeMenuForDebug"},
227 {1, nullptr, "OpenMainApplication"}, 225 {1, nullptr, "OpenMainApplication"},
228 {10, nullptr, "EmulateButtonEvent"}, 226 {10, nullptr, "PerformSystemButtonPressing"},
229 {20, nullptr, "InvalidateTransitionLayer"}, 227 {20, nullptr, "InvalidateTransitionLayer"},
230 {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"}, 228 {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
231 {40, nullptr, "GetAppletResourceUsageInfo"}, 229 {40, nullptr, "GetAppletResourceUsageInfo"},
@@ -267,7 +265,7 @@ ISelfController::ISelfController(Core::System& system,
267 {16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"}, 265 {16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"},
268 {17, nullptr, "SetControllerFirmwareUpdateSection"}, 266 {17, nullptr, "SetControllerFirmwareUpdateSection"},
269 {18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"}, 267 {18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"},
270 {19, &ISelfController::SetScreenShotImageOrientation, "SetScreenShotImageOrientation"}, 268 {19, &ISelfController::SetAlbumImageOrientation, "SetAlbumImageOrientation"},
271 {20, nullptr, "SetDesirableKeyboardLayout"}, 269 {20, nullptr, "SetDesirableKeyboardLayout"},
272 {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"}, 270 {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"},
273 {41, nullptr, "IsSystemBufferSharingEnabled"}, 271 {41, nullptr, "IsSystemBufferSharingEnabled"},
@@ -443,7 +441,7 @@ void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext&
443 rb.Push(RESULT_SUCCESS); 441 rb.Push(RESULT_SUCCESS);
444} 442}
445 443
446void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) { 444void ISelfController::SetAlbumImageOrientation(Kernel::HLERequestContext& ctx) {
447 LOG_WARNING(Service_AM, "(STUBBED) called"); 445 LOG_WARNING(Service_AM, "(STUBBED) called");
448 446
449 IPC::ResponseBuilder rb{ctx, 2}; 447 IPC::ResponseBuilder rb{ctx, 2};
@@ -607,6 +605,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system,
607 {20, nullptr, "PushToGeneralChannel"}, 605 {20, nullptr, "PushToGeneralChannel"},
608 {30, nullptr, "GetHomeButtonReaderLockAccessor"}, 606 {30, nullptr, "GetHomeButtonReaderLockAccessor"},
609 {31, nullptr, "GetReaderLockAccessorEx"}, 607 {31, nullptr, "GetReaderLockAccessorEx"},
608 {32, nullptr, "GetWriterLockAccessorEx"},
610 {40, nullptr, "GetCradleFwVersion"}, 609 {40, nullptr, "GetCradleFwVersion"},
611 {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"}, 610 {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"},
612 {51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"}, 611 {51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"},
@@ -1132,6 +1131,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
1132 {24, nullptr, "GetLaunchStorageInfoForDebug"}, 1131 {24, nullptr, "GetLaunchStorageInfoForDebug"},
1133 {25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"}, 1132 {25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"},
1134 {26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"}, 1133 {26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"},
1134 {27, nullptr, "CreateCacheStorage"},
1135 {30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"}, 1135 {30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"},
1136 {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"}, 1136 {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"},
1137 {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"}, 1137 {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"},
@@ -1157,6 +1157,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
1157 {120, nullptr, "ExecuteProgram"}, 1157 {120, nullptr, "ExecuteProgram"},
1158 {121, nullptr, "ClearUserChannel"}, 1158 {121, nullptr, "ClearUserChannel"},
1159 {122, nullptr, "UnpopToUserChannel"}, 1159 {122, nullptr, "UnpopToUserChannel"},
1160 {123, nullptr, "GetPreviousProgramIndex"},
1161 {124, nullptr, "EnableApplicationAllThreadDumpOnCrash"},
1160 {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"}, 1162 {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"},
1161 {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"}, 1163 {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
1162 {141, nullptr, "TryPopFromFriendInvitationStorageChannel"}, 1164 {141, nullptr, "TryPopFromFriendInvitationStorageChannel"},
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 469f7f814..2f69466ec 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -138,7 +138,7 @@ private:
138 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx); 138 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
139 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx); 139 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
140 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx); 140 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
141 void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx); 141 void SetAlbumImageOrientation(Kernel::HLERequestContext& ctx);
142 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx); 142 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
143 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 143 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
144 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 144 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 54e63c138..fbe3686ae 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
30 config.sub_text.size()); 30 config.sub_text.size());
31 params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(), 31 params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),
32 config.guide_text.size()); 32 config.guide_text.size());
33 params.initial_text = initial_text; 33 params.initial_text = std::move(initial_text);
34 params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit; 34 params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;
35 params.password = static_cast<bool>(config.is_password); 35 params.password = static_cast<bool>(config.is_password);
36 params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position); 36 params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position);
@@ -60,7 +60,7 @@ void SoftwareKeyboard::Initialize() {
60 std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig)); 60 std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig));
61 61
62 const auto work_buffer_storage = broker.PopNormalDataToApplet(); 62 const auto work_buffer_storage = broker.PopNormalDataToApplet();
63 ASSERT(work_buffer_storage != nullptr); 63 ASSERT_OR_EXECUTE(work_buffer_storage != nullptr, { return; });
64 const auto& work_buffer = work_buffer_storage->GetData(); 64 const auto& work_buffer = work_buffer_storage->GetData();
65 65
66 if (config.initial_string_size == 0) 66 if (config.initial_string_size == 0)
@@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() {
109 109
110 const auto parameters = ConvertToFrontendParameters(config, initial_text); 110 const auto parameters = ConvertToFrontendParameters(config, initial_text);
111 111
112 frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); }, 112 frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); },
113 parameters); 113 parameters);
114} 114}
115 115
diff --git a/src/core/hle/service/am/spsm.cpp b/src/core/hle/service/am/spsm.cpp
index 003ee8667..f27729ce7 100644
--- a/src/core/hle/service/am/spsm.cpp
+++ b/src/core/hle/service/am/spsm.cpp
@@ -10,17 +10,17 @@ SPSM::SPSM() : ServiceFramework{"spsm"} {
10 // clang-format off 10 // clang-format off
11 static const FunctionInfo functions[] = { 11 static const FunctionInfo functions[] = {
12 {0, nullptr, "GetState"}, 12 {0, nullptr, "GetState"},
13 {1, nullptr, "SleepSystemAndWaitAwake"}, 13 {1, nullptr, "EnterSleep"},
14 {2, nullptr, "Unknown1"}, 14 {2, nullptr, "GetLastWakeReason"},
15 {3, nullptr, "Unknown2"}, 15 {3, nullptr, "Shutdown"},
16 {4, nullptr, "GetNotificationMessageEventHandle"}, 16 {4, nullptr, "GetNotificationMessageEventHandle"},
17 {5, nullptr, "Unknown3"}, 17 {5, nullptr, "ReceiveNotificationMessage"},
18 {6, nullptr, "Unknown4"}, 18 {6, nullptr, "AnalyzeLogForLastSleepWakeSequence"},
19 {7, nullptr, "Unknown5"}, 19 {7, nullptr, "ResetEventLog"},
20 {8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"}, 20 {8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"},
21 {9, nullptr, "ChangeHomeButtonLongPressingTime"}, 21 {9, nullptr, "ChangeHomeButtonLongPressingTime"},
22 {10, nullptr, "Unknown6"}, 22 {10, nullptr, "PutErrorState"},
23 {11, nullptr, "Unknown7"}, 23 {11, nullptr, "InvalidateCurrentHomeButtonPressing"},
24 }; 24 };
25 // clang-format on 25 // clang-format on
26 26
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index 4227a4adf..8e79f707b 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -60,6 +60,7 @@ AOC_U::AOC_U(Core::System& system)
60 {6, nullptr, "PrepareAddOnContentByApplicationId"}, 60 {6, nullptr, "PrepareAddOnContentByApplicationId"},
61 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, 61 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
62 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, 62 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
63 {9, nullptr, "GetAddOnContentLostErrorCode"},
63 {100, nullptr, "CreateEcPurchasedEventManager"}, 64 {100, nullptr, "CreateEcPurchasedEventManager"},
64 {101, nullptr, "CreatePermanentEcPurchasedEventManager"}, 65 {101, nullptr, "CreatePermanentEcPurchasedEventManager"},
65 }; 66 };
diff --git a/src/core/hle/service/bcat/bcat.cpp b/src/core/hle/service/bcat/bcat.cpp
index 8bb2528c9..b31766212 100644
--- a/src/core/hle/service/bcat/bcat.cpp
+++ b/src/core/hle/service/bcat/bcat.cpp
@@ -14,6 +14,8 @@ BCAT::BCAT(Core::System& system, std::shared_ptr<Module> module,
14 {0, &BCAT::CreateBcatService, "CreateBcatService"}, 14 {0, &BCAT::CreateBcatService, "CreateBcatService"},
15 {1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"}, 15 {1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"},
16 {2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"}, 16 {2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"},
17 {3, nullptr, "CreateDeliveryCacheProgressService"},
18 {4, nullptr, "CreateDeliveryCacheProgressServiceWithApplicationId"},
17 }; 19 };
18 // clang-format on 20 // clang-format on
19 RegisterHandlers(functions); 21 RegisterHandlers(functions);
diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp
index 34aba7a27..603b64d4f 100644
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -143,10 +143,13 @@ public:
143 {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"}, 143 {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},
144 {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"}, 144 {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},
145 {30100, &IBcatService::SetPassphrase, "SetPassphrase"}, 145 {30100, &IBcatService::SetPassphrase, "SetPassphrase"},
146 {30101, nullptr, "Unknown"},
147 {30102, nullptr, "Unknown2"},
146 {30200, nullptr, "RegisterBackgroundDeliveryTask"}, 148 {30200, nullptr, "RegisterBackgroundDeliveryTask"},
147 {30201, nullptr, "UnregisterBackgroundDeliveryTask"}, 149 {30201, nullptr, "UnregisterBackgroundDeliveryTask"},
148 {30202, nullptr, "BlockDeliveryTask"}, 150 {30202, nullptr, "BlockDeliveryTask"},
149 {30203, nullptr, "UnblockDeliveryTask"}, 151 {30203, nullptr, "UnblockDeliveryTask"},
152 {30210, nullptr, "SetDeliveryTaskTimer"},
150 {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"}, 153 {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"},
151 {90100, nullptr, "EnumerateBackgroundDeliveryTask"}, 154 {90100, nullptr, "EnumerateBackgroundDeliveryTask"},
152 {90200, nullptr, "GetDeliveryList"}, 155 {90200, nullptr, "GetDeliveryList"},
diff --git a/src/core/hle/service/bpc/bpc.cpp b/src/core/hle/service/bpc/bpc.cpp
index 1c1ecdb60..fac6b2f9c 100644
--- a/src/core/hle/service/bpc/bpc.cpp
+++ b/src/core/hle/service/bpc/bpc.cpp
@@ -23,9 +23,14 @@ public:
23 {5, nullptr, "GetBoardPowerControlEvent"}, 23 {5, nullptr, "GetBoardPowerControlEvent"},
24 {6, nullptr, "GetSleepButtonState"}, 24 {6, nullptr, "GetSleepButtonState"},
25 {7, nullptr, "GetPowerEvent"}, 25 {7, nullptr, "GetPowerEvent"},
26 {8, nullptr, "Unknown1"}, 26 {8, nullptr, "CreateWakeupTimer"},
27 {9, nullptr, "Unknown2"}, 27 {9, nullptr, "CancelWakeupTimer"},
28 {10, nullptr, "Unknown3"}, 28 {10, nullptr, "EnableWakeupTimerOnDevice"},
29 {11, nullptr, "CreateWakeupTimerEx"},
30 {12, nullptr, "GetLastEnabledWakeupTimerType"},
31 {13, nullptr, "CleanAllWakeupTimers"},
32 {14, nullptr, "Unknown"},
33 {15, nullptr, "Unknown2"},
29 }; 34 };
30 // clang-format on 35 // clang-format on
31 36
@@ -38,10 +43,11 @@ public:
38 explicit BPC_R() : ServiceFramework{"bpc:r"} { 43 explicit BPC_R() : ServiceFramework{"bpc:r"} {
39 // clang-format off 44 // clang-format off
40 static const FunctionInfo functions[] = { 45 static const FunctionInfo functions[] = {
41 {0, nullptr, "GetExternalRtcValue"}, 46 {0, nullptr, "GetRtcTime"},
42 {1, nullptr, "SetExternalRtcValue"}, 47 {1, nullptr, "SetRtcTime"},
43 {2, nullptr, "ReadExternalRtcResetFlag"}, 48 {2, nullptr, "GetRtcResetDetected"},
44 {3, nullptr, "ClearExternalRtcResetFlag"}, 49 {3, nullptr, "ClearRtcResetDetected"},
50 {4, nullptr, "SetUpRtcResetOnShutdown"},
45 }; 51 };
46 // clang-format on 52 // clang-format on
47 53
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 40a06c9fd..f311afa2f 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -58,102 +58,103 @@ public:
58 {1, nullptr, "InitializeBluetooth"}, 58 {1, nullptr, "InitializeBluetooth"},
59 {2, nullptr, "EnableBluetooth"}, 59 {2, nullptr, "EnableBluetooth"},
60 {3, nullptr, "DisableBluetooth"}, 60 {3, nullptr, "DisableBluetooth"},
61 {4, nullptr, "CleanupBluetooth"}, 61 {4, nullptr, "FinalizeBluetooth"},
62 {5, nullptr, "GetAdapterProperties"}, 62 {5, nullptr, "GetAdapterProperties"},
63 {6, nullptr, "GetAdapterProperty"}, 63 {6, nullptr, "GetAdapterProperty"},
64 {7, nullptr, "SetAdapterProperty"}, 64 {7, nullptr, "SetAdapterProperty"},
65 {8, nullptr, "StartDiscovery"}, 65 {8, nullptr, "StartInquiry"},
66 {9, nullptr, "CancelDiscovery"}, 66 {9, nullptr, "StopInquiry"},
67 {10, nullptr, "CreateBond"}, 67 {10, nullptr, "CreateBond"},
68 {11, nullptr, "RemoveBond"}, 68 {11, nullptr, "RemoveBond"},
69 {12, nullptr, "CancelBond"}, 69 {12, nullptr, "CancelBond"},
70 {13, nullptr, "PinReply"}, 70 {13, nullptr, "RespondToPinRequest"},
71 {14, nullptr, "SspReply"}, 71 {14, nullptr, "RespondToSspRequest"},
72 {15, nullptr, "GetEventInfo"}, 72 {15, nullptr, "GetEventInfo"},
73 {16, nullptr, "InitializeHid"}, 73 {16, nullptr, "InitializeHid"},
74 {17, nullptr, "HidConnect"}, 74 {17, nullptr, "OpenHidConnection"},
75 {18, nullptr, "HidDisconnect"}, 75 {18, nullptr, "CloseHidConnection"},
76 {19, nullptr, "HidSendData"}, 76 {19, nullptr, "WriteHidData"},
77 {20, nullptr, "HidSendData2"}, 77 {20, nullptr, "WriteHidData2"},
78 {21, nullptr, "HidSetReport"}, 78 {21, nullptr, "SetHidReport"},
79 {22, nullptr, "HidGetReport"}, 79 {22, nullptr, "GetHidReport"},
80 {23, nullptr, "HidWakeController"}, 80 {23, nullptr, "TriggerConnection"},
81 {24, nullptr, "HidAddPairedDevice"}, 81 {24, nullptr, "AddPairedDeviceInfo"},
82 {25, nullptr, "HidGetPairedDevice"}, 82 {25, nullptr, "GetPairedDeviceInfo"},
83 {26, nullptr, "CleanupHid"}, 83 {26, nullptr, "FinalizeHid"},
84 {27, nullptr, "HidGetEventInfo"}, 84 {27, nullptr, "GetHidEventInfo"},
85 {28, nullptr, "ExtSetTsi"}, 85 {28, nullptr, "SetTsi"},
86 {29, nullptr, "ExtSetBurstMode"}, 86 {29, nullptr, "EnableBurstMode"},
87 {30, nullptr, "ExtSetZeroRetran"}, 87 {30, nullptr, "SetZeroRetransmission"},
88 {31, nullptr, "ExtSetMcMode"}, 88 {31, nullptr, "EnableMcMode"},
89 {32, nullptr, "ExtStartLlrMode"}, 89 {32, nullptr, "EnableLlrScan"},
90 {33, nullptr, "ExtExitLlrMode"}, 90 {33, nullptr, "DisableLlrScan"},
91 {34, nullptr, "ExtSetRadio"}, 91 {34, nullptr, "EnableRadio"},
92 {35, nullptr, "ExtSetVisibility"}, 92 {35, nullptr, "SetVisibility"},
93 {36, nullptr, "ExtSetTbfcScan"}, 93 {36, nullptr, "EnableTbfcScan"},
94 {37, nullptr, "RegisterHidReportEvent"}, 94 {37, nullptr, "RegisterHidReportEvent"},
95 {38, nullptr, "HidGetReportEventInfo"}, 95 {38, nullptr, "GetHidReportEventInfo"},
96 {39, nullptr, "GetLatestPlr"}, 96 {39, nullptr, "GetLatestPlr"},
97 {40, nullptr, "ExtGetPendingConnections"}, 97 {40, nullptr, "GetPendingConnections"},
98 {41, nullptr, "GetChannelMap"}, 98 {41, nullptr, "GetChannelMap"},
99 {42, nullptr, "EnableBluetoothBoostSetting"}, 99 {42, nullptr, "EnableTxPowerBoostSetting"},
100 {43, nullptr, "IsBluetoothBoostSettingEnabled"}, 100 {43, nullptr, "IsTxPowerBoostSettingEnabled"},
101 {44, nullptr, "EnableBluetoothAfhSetting"}, 101 {44, nullptr, "EnableAfhSetting"},
102 {45, nullptr, "IsBluetoothAfhSettingEnabled"}, 102 {45, nullptr, "IsAfhSettingEnabled"},
103 {46, nullptr, "InitializeBluetoothLe"}, 103 {46, nullptr, "InitializeBle"},
104 {47, nullptr, "EnableBluetoothLe"}, 104 {47, nullptr, "EnableBle"},
105 {48, nullptr, "DisableBluetoothLe"}, 105 {48, nullptr, "DisableBle"},
106 {49, nullptr, "CleanupBluetoothLe"}, 106 {49, nullptr, "FinalizeBle"},
107 {50, nullptr, "SetLeVisibility"}, 107 {50, nullptr, "SetBleVisibility"},
108 {51, nullptr, "SetLeConnectionParameter"}, 108 {51, nullptr, "SetBleConnectionParameter"},
109 {52, nullptr, "SetLeDefaultConnectionParameter"}, 109 {52, nullptr, "SetBleDefaultConnectionParameter"},
110 {53, nullptr, "SetLeAdvertiseData"}, 110 {53, nullptr, "SetBleAdvertiseData"},
111 {54, nullptr, "SetLeAdvertiseParameter"}, 111 {54, nullptr, "SetBleAdvertiseParameter"},
112 {55, nullptr, "StartLeScan"}, 112 {55, nullptr, "StartBleScan"},
113 {56, nullptr, "StopLeScan"}, 113 {56, nullptr, "StopBleScan"},
114 {57, nullptr, "AddLeScanFilterCondition"}, 114 {57, nullptr, "AddBleScanFilterCondition"},
115 {58, nullptr, "DeleteLeScanFilterCondition"}, 115 {58, nullptr, "DeleteBleScanFilterCondition"},
116 {59, nullptr, "DeleteLeScanFilter"}, 116 {59, nullptr, "DeleteBleScanFilter"},
117 {60, nullptr, "ClearLeScanFilters"}, 117 {60, nullptr, "ClearBleScanFilters"},
118 {61, nullptr, "EnableLeScanFilter"}, 118 {61, nullptr, "EnableBleScanFilter"},
119 {62, nullptr, "RegisterLeClient"}, 119 {62, nullptr, "RegisterGattClient"},
120 {63, nullptr, "UnregisterLeClient"}, 120 {63, nullptr, "UnregisterGattClient"},
121 {64, nullptr, "UnregisterLeClientAll"}, 121 {64, nullptr, "UnregisterAllGattClients"},
122 {65, nullptr, "LeClientConnect"}, 122 {65, nullptr, "ConnectGattServer"},
123 {66, nullptr, "LeClientCancelConnection"}, 123 {66, nullptr, "CancelConnectGattServer"},
124 {67, nullptr, "LeClientDisconnect"}, 124 {67, nullptr, "DisconnectGattServer"},
125 {68, nullptr, "LeClientGetAttributes"}, 125 {68, nullptr, "GetGattAttribute"},
126 {69, nullptr, "LeClientDiscoverService"}, 126 {69, nullptr, "GetGattService"},
127 {70, nullptr, "LeClientConfigureMtu"}, 127 {70, nullptr, "ConfigureAttMtu"},
128 {71, nullptr, "RegisterLeServer"}, 128 {71, nullptr, "RegisterGattServer"},
129 {72, nullptr, "UnregisterLeServer"}, 129 {72, nullptr, "UnregisterGattServer"},
130 {73, nullptr, "LeServerConnect"}, 130 {73, nullptr, "ConnectGattClient"},
131 {74, nullptr, "LeServerDisconnect"}, 131 {74, nullptr, "DisconnectGattClient"},
132 {75, nullptr, "CreateLeService"}, 132 {75, nullptr, "AddGattService"},
133 {76, nullptr, "StartLeService"}, 133 {76, nullptr, "EnableGattService"},
134 {77, nullptr, "AddLeCharacteristic"}, 134 {77, nullptr, "AddGattCharacteristic"},
135 {78, nullptr, "AddLeDescriptor"}, 135 {78, nullptr, "AddGattDescriptor"},
136 {79, nullptr, "GetLeCoreEventInfo"}, 136 {79, nullptr, "GetBleManagedEventInfo"},
137 {80, nullptr, "LeGetFirstCharacteristic"}, 137 {80, nullptr, "GetGattFirstCharacteristic"},
138 {81, nullptr, "LeGetNextCharacteristic"}, 138 {81, nullptr, "GetGattNextCharacteristic"},
139 {82, nullptr, "LeGetFirstDescriptor"}, 139 {82, nullptr, "GetGattFirstDescriptor"},
140 {83, nullptr, "LeGetNextDescriptor"}, 140 {83, nullptr, "GetGattNextDescriptor"},
141 {84, nullptr, "RegisterLeCoreDataPath"}, 141 {84, nullptr, "RegisterGattManagedDataPath"},
142 {85, nullptr, "UnregisterLeCoreDataPath"}, 142 {85, nullptr, "UnregisterGattManagedDataPath"},
143 {86, nullptr, "RegisterLeHidDataPath"}, 143 {86, nullptr, "RegisterGattHidDataPath"},
144 {87, nullptr, "UnregisterLeHidDataPath"}, 144 {87, nullptr, "UnregisterGattHidDataPath"},
145 {88, nullptr, "RegisterLeDataPath"}, 145 {88, nullptr, "RegisterGattDataPath"},
146 {89, nullptr, "UnregisterLeDataPath"}, 146 {89, nullptr, "UnregisterGattDataPath"},
147 {90, nullptr, "LeClientReadCharacteristic"}, 147 {90, nullptr, "ReadGattCharacteristic"},
148 {91, nullptr, "LeClientReadDescriptor"}, 148 {91, nullptr, "ReadGattDescriptor"},
149 {92, nullptr, "LeClientWriteCharacteristic"}, 149 {92, nullptr, "WriteGattCharacteristic"},
150 {93, nullptr, "LeClientWriteDescriptor"}, 150 {93, nullptr, "WriteGattDescriptor"},
151 {94, nullptr, "LeClientRegisterNotification"}, 151 {94, nullptr, "RegisterGattNotification"},
152 {95, nullptr, "LeClientDeregisterNotification"}, 152 {95, nullptr, "UnregisterGattNotification"},
153 {96, nullptr, "GetLeHidEventInfo"}, 153 {96, nullptr, "GetLeHidEventInfo"},
154 {97, nullptr, "RegisterBleHidEvent"}, 154 {97, nullptr, "RegisterBleHidEvent"},
155 {98, nullptr, "SetLeScanParameter"}, 155 {98, nullptr, "SetBleScanParameter"},
156 {256, nullptr, "GetIsManufacturingMode"}, 156 {99, nullptr, "MoveToSecondaryPiconet"},
157 {256, nullptr, "IsManufacturingMode"},
157 {257, nullptr, "EmulateBluetoothCrash"}, 158 {257, nullptr, "EmulateBluetoothCrash"},
158 {258, nullptr, "GetBleChannelMap"}, 159 {258, nullptr, "GetBleChannelMap"},
159 }; 160 };
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp
index 251b3c9df..0d251c6d0 100644
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -132,66 +132,71 @@ public:
132 explicit BTM() : ServiceFramework{"btm"} { 132 explicit BTM() : ServiceFramework{"btm"} {
133 // clang-format off 133 // clang-format off
134 static const FunctionInfo functions[] = { 134 static const FunctionInfo functions[] = {
135 {0, nullptr, "Unknown1"}, 135 {0, nullptr, "GetState"},
136 {1, nullptr, "Unknown2"}, 136 {1, nullptr, "GetHostDeviceProperty"},
137 {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"}, 137 {2, nullptr, "AcquireDeviceConditionEvent"},
138 {3, nullptr, "Unknown3"}, 138 {3, nullptr, "GetDeviceCondition"},
139 {4, nullptr, "Unknown4"}, 139 {4, nullptr, "SetBurstMode"},
140 {5, nullptr, "Unknown5"}, 140 {5, nullptr, "SetSlotMode"},
141 {6, nullptr, "Unknown6"}, 141 {6, nullptr, "SetBluetoothMode"},
142 {7, nullptr, "Unknown7"}, 142 {7, nullptr, "SetWlanMode"},
143 {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"}, 143 {8, nullptr, "AcquireDeviceInfoEvent"},
144 {9, nullptr, "Unknown8"}, 144 {9, nullptr, "GetDeviceInfo"},
145 {10, nullptr, "Unknown9"}, 145 {10, nullptr, "AddDeviceInfo"},
146 {11, nullptr, "Unknown10"}, 146 {11, nullptr, "RemoveDeviceInfo"},
147 {12, nullptr, "Unknown11"}, 147 {12, nullptr, "IncreaseDeviceInfoOrder"},
148 {13, nullptr, "Unknown12"}, 148 {13, nullptr, "LlrNotify"},
149 {14, nullptr, "EnableRadio"}, 149 {14, nullptr, "EnableRadio"},
150 {15, nullptr, "DisableRadio"}, 150 {15, nullptr, "DisableRadio"},
151 {16, nullptr, "Unknown13"}, 151 {16, nullptr, "HidDisconnect"},
152 {17, nullptr, "Unknown14"}, 152 {17, nullptr, "HidSetRetransmissionMode"},
153 {18, nullptr, "Unknown15"}, 153 {18, nullptr, "AcquireAwakeReqEvent"},
154 {19, nullptr, "Unknown16"}, 154 {19, nullptr, "AcquireLlrStateEvent"},
155 {20, nullptr, "Unknown17"}, 155 {20, nullptr, "IsLlrStarted"},
156 {21, nullptr, "Unknown18"}, 156 {21, nullptr, "EnableSlotSaving"},
157 {22, nullptr, "Unknown19"}, 157 {22, nullptr, "ProtectDeviceInfo"},
158 {23, nullptr, "Unknown20"}, 158 {23, nullptr, "AcquireBleScanEvent"},
159 {24, nullptr, "Unknown21"}, 159 {24, nullptr, "GetBleScanParameterGeneral"},
160 {25, nullptr, "Unknown22"}, 160 {25, nullptr, "GetBleScanParameterSmartDevice"},
161 {26, nullptr, "Unknown23"}, 161 {26, nullptr, "StartBleScanForGeneral"},
162 {27, nullptr, "Unknown24"}, 162 {27, nullptr, "StopBleScanForGeneral"},
163 {28, nullptr, "Unknown25"}, 163 {28, nullptr, "GetBleScanResultsForGeneral"},
164 {29, nullptr, "Unknown26"}, 164 {29, nullptr, "StartBleScanForPairedDevice"},
165 {30, nullptr, "Unknown27"}, 165 {30, nullptr, "StopBleScanForPairedDevice"},
166 {31, nullptr, "Unknown28"}, 166 {31, nullptr, "StartBleScanForSmartDevice"},
167 {32, nullptr, "Unknown29"}, 167 {32, nullptr, "StopBleScanForSmartDevice"},
168 {33, nullptr, "Unknown30"}, 168 {33, nullptr, "GetBleScanResultsForSmartDevice"},
169 {34, nullptr, "Unknown31"}, 169 {34, nullptr, "AcquireBleConnectionEvent"},
170 {35, nullptr, "Unknown32"}, 170 {35, nullptr, "BleConnect"},
171 {36, nullptr, "Unknown33"}, 171 {36, nullptr, "BleOverrideConnection"},
172 {37, nullptr, "Unknown34"}, 172 {37, nullptr, "BleDisconnect"},
173 {38, nullptr, "Unknown35"}, 173 {38, nullptr, "BleGetConnectionState"},
174 {39, nullptr, "Unknown36"}, 174 {39, nullptr, "BleGetGattClientConditionList"},
175 {40, nullptr, "Unknown37"}, 175 {40, nullptr, "AcquireBlePairingEvent"},
176 {41, nullptr, "Unknown38"}, 176 {41, nullptr, "BlePairDevice"},
177 {42, nullptr, "Unknown39"}, 177 {42, nullptr, "BleUnpairDeviceOnBoth"},
178 {43, nullptr, "Unknown40"}, 178 {43, nullptr, "BleUnpairDevice"},
179 {44, nullptr, "Unknown41"}, 179 {44, nullptr, "BleGetPairedAddresses"},
180 {45, nullptr, "Unknown42"}, 180 {45, nullptr, "AcquireBleServiceDiscoveryEvent"},
181 {46, nullptr, "Unknown43"}, 181 {46, nullptr, "GetGattServices"},
182 {47, nullptr, "Unknown44"}, 182 {47, nullptr, "GetGattService"},
183 {48, nullptr, "Unknown45"}, 183 {48, nullptr, "GetGattIncludedServices"},
184 {49, nullptr, "Unknown46"}, 184 {49, nullptr, "GetBelongingService"},
185 {50, nullptr, "Unknown47"}, 185 {50, nullptr, "GetGattCharacteristics"},
186 {51, nullptr, "Unknown48"}, 186 {51, nullptr, "GetGattDescriptors"},
187 {52, nullptr, "Unknown49"}, 187 {52, nullptr, "AcquireBleMtuConfigEvent"},
188 {53, nullptr, "Unknown50"}, 188 {53, nullptr, "ConfigureBleMtu"},
189 {54, nullptr, "Unknown51"}, 189 {54, nullptr, "GetBleMtu"},
190 {55, nullptr, "Unknown52"}, 190 {55, nullptr, "RegisterBleGattDataPath"},
191 {56, nullptr, "Unknown53"}, 191 {56, nullptr, "UnregisterBleGattDataPath"},
192 {57, nullptr, "Unknown54"}, 192 {57, nullptr, "RegisterAppletResourceUserId"},
193 {58, nullptr, "Unknown55"}, 193 {58, nullptr, "UnregisterAppletResourceUserId"},
194 {59, nullptr, "Unknown56"}, 194 {59, nullptr, "SetAppletResourceUserId"},
195 {60, nullptr, "Unknown60"},
196 {61, nullptr, "Unknown61"},
197 {62, nullptr, "Unknown62"},
198 {63, nullptr, "Unknown63"},
199 {64, nullptr, "Unknown64"},
195 }; 200 };
196 // clang-format on 201 // clang-format on
197 202
@@ -204,19 +209,19 @@ public:
204 explicit BTM_DBG() : ServiceFramework{"btm:dbg"} { 209 explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
205 // clang-format off 210 // clang-format off
206 static const FunctionInfo functions[] = { 211 static const FunctionInfo functions[] = {
207 {0, nullptr, "RegisterSystemEventForDiscovery"}, 212 {0, nullptr, "AcquireDiscoveryEvent"},
208 {1, nullptr, "Unknown1"}, 213 {1, nullptr, "StartDiscovery"},
209 {2, nullptr, "Unknown2"}, 214 {2, nullptr, "CancelDiscovery"},
210 {3, nullptr, "Unknown3"}, 215 {3, nullptr, "GetDeviceProperty"},
211 {4, nullptr, "Unknown4"}, 216 {4, nullptr, "CreateBond"},
212 {5, nullptr, "Unknown5"}, 217 {5, nullptr, "CancelBond"},
213 {6, nullptr, "Unknown6"}, 218 {6, nullptr, "SetTsiMode"},
214 {7, nullptr, "Unknown7"}, 219 {7, nullptr, "GeneralTest"},
215 {8, nullptr, "Unknown8"}, 220 {8, nullptr, "HidConnect"},
216 {9, nullptr, "Unknown9"}, 221 {9, nullptr, "GeneralGet"},
217 {10, nullptr, "Unknown10"}, 222 {10, nullptr, "GetGattClientDisconnectionReason"},
218 {11, nullptr, "Unknown11"}, 223 {11, nullptr, "GetBleConnectionParameter"},
219 {12, nullptr, "Unknown11"}, 224 {12, nullptr, "GetBleConnectionParameterRequest"},
220 }; 225 };
221 // clang-format on 226 // clang-format on
222 227
diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp
index 26c8a7081..ba5749b84 100644
--- a/src/core/hle/service/caps/caps.cpp
+++ b/src/core/hle/service/caps/caps.cpp
@@ -1,4 +1,4 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h
index fc70a4c27..b8c67b6e2 100644
--- a/src/core/hle/service/caps/caps.h
+++ b/src/core/hle/service/caps/caps.h
@@ -1,4 +1,4 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -12,73 +12,79 @@ class ServiceManager;
12 12
13namespace Service::Capture { 13namespace Service::Capture {
14 14
15enum AlbumImageOrientation { 15enum class AlbumImageOrientation {
16 Orientation0 = 0, 16 Orientation0 = 0,
17 Orientation1 = 1, 17 Orientation1 = 1,
18 Orientation2 = 2, 18 Orientation2 = 2,
19 Orientation3 = 3, 19 Orientation3 = 3,
20}; 20};
21 21
22enum AlbumReportOption { 22enum class AlbumReportOption {
23 Disable = 0, 23 Disable = 0,
24 Enable = 1, 24 Enable = 1,
25}; 25};
26 26
27enum ContentType : u8 { 27enum class ContentType : u8 {
28 Screenshot = 0, 28 Screenshot = 0,
29 Movie = 1, 29 Movie = 1,
30 ExtraMovie = 3, 30 ExtraMovie = 3,
31}; 31};
32 32
33enum AlbumStorage : u8 { 33enum class AlbumStorage : u8 {
34 NAND = 0, 34 NAND = 0,
35 SD = 1, 35 SD = 1,
36}; 36};
37 37
38struct AlbumFileDateTime { 38struct AlbumFileDateTime {
39 u16 year; 39 s16 year{};
40 u8 month; 40 s8 month{};
41 u8 day; 41 s8 day{};
42 u8 hour; 42 s8 hour{};
43 u8 minute; 43 s8 minute{};
44 u8 second; 44 s8 second{};
45 u8 uid; 45 s8 uid{};
46}; 46};
47static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size.");
47 48
48struct AlbumEntry { 49struct AlbumEntry {
49 u64 size; 50 u64 size{};
50 u64 application_id; 51 u64 application_id{};
51 AlbumFileDateTime datetime; 52 AlbumFileDateTime datetime{};
52 AlbumStorage storage; 53 AlbumStorage storage{};
53 ContentType content; 54 ContentType content{};
54 u8 padding[6]; 55 INSERT_PADDING_BYTES(6);
55}; 56};
57static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size.");
56 58
57struct AlbumFileEntry { 59struct AlbumFileEntry {
58 u64 size; 60 u64 size{}; // Size of the entry
59 u64 hash; 61 u64 hash{}; // AES256 with hardcoded key over AlbumEntry
60 AlbumFileDateTime datetime; 62 AlbumFileDateTime datetime{};
61 AlbumStorage storage; 63 AlbumStorage storage{};
62 ContentType content; 64 ContentType content{};
63 u8 padding[5]; 65 INSERT_PADDING_BYTES(5);
64 u8 unknown; 66 u8 unknown{1}; // Set to 1 on official SW
65}; 67};
68static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size.");
66 69
67struct ApplicationAlbumEntry { 70struct ApplicationAlbumEntry {
68 u64 size; 71 u64 size{}; // Size of the entry
69 u64 hash; 72 u64 hash{}; // AES256 with hardcoded key over AlbumEntry
70 AlbumFileDateTime datetime; 73 AlbumFileDateTime datetime{};
71 AlbumStorage storage; 74 AlbumStorage storage{};
72 ContentType content; 75 ContentType content{};
73 u8 padding[5]; 76 INSERT_PADDING_BYTES(5);
74 u8 unknown; 77 u8 unknown{1}; // Set to 1 on official SW
75}; 78};
79static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size.");
76 80
77struct ApplicationAlbumFileEntry { 81struct ApplicationAlbumFileEntry {
78 ApplicationAlbumEntry entry; 82 ApplicationAlbumEntry entry{};
79 AlbumFileDateTime datetime; 83 AlbumFileDateTime datetime{};
80 u64 unknown; 84 u64 unknown{};
81}; 85};
86static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30,
87 "ApplicationAlbumFileEntry has incorrect size.");
82 88
83/// Registers all Capture services with the specified service manager. 89/// Registers all Capture services with the specified service manager.
84void InstallInterfaces(SM::ServiceManager& sm); 90void InstallInterfaces(SM::ServiceManager& sm);
diff --git a/src/core/hle/service/caps/caps_a.cpp b/src/core/hle/service/caps/caps_a.cpp
index 88a3fdc05..a0a3b2ae3 100644
--- a/src/core/hle/service/caps/caps_a.cpp
+++ b/src/core/hle/service/caps/caps_a.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_a.h b/src/core/hle/service/caps/caps_a.h
index 8de832491..cb93aad5b 100644
--- a/src/core/hle/service/caps/caps_a.h
+++ b/src/core/hle/service/caps/caps_a.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_c.cpp b/src/core/hle/service/caps/caps_c.cpp
index ea6452ffa..ab17a187e 100644
--- a/src/core/hle/service/caps/caps_c.cpp
+++ b/src/core/hle/service/caps/caps_c.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_c.h b/src/core/hle/service/caps/caps_c.h
index d07cdb441..a9d028689 100644
--- a/src/core/hle/service/caps/caps_c.h
+++ b/src/core/hle/service/caps/caps_c.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_sc.cpp b/src/core/hle/service/caps/caps_sc.cpp
index d01a8a58e..822ee96c8 100644
--- a/src/core/hle/service/caps/caps_sc.cpp
+++ b/src/core/hle/service/caps/caps_sc.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_sc.h b/src/core/hle/service/caps/caps_sc.h
index 9ba372f7a..ac3e929ca 100644
--- a/src/core/hle/service/caps/caps_sc.h
+++ b/src/core/hle/service/caps/caps_sc.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_ss.cpp b/src/core/hle/service/caps/caps_ss.cpp
index eaa3a7494..24dc716e7 100644
--- a/src/core/hle/service/caps/caps_ss.cpp
+++ b/src/core/hle/service/caps/caps_ss.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_ss.h b/src/core/hle/service/caps/caps_ss.h
index e258a6925..450686e4f 100644
--- a/src/core/hle/service/caps/caps_ss.h
+++ b/src/core/hle/service/caps/caps_ss.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_su.cpp b/src/core/hle/service/caps/caps_su.cpp
index e8b0698e8..fffb2ecf9 100644
--- a/src/core/hle/service/caps/caps_su.cpp
+++ b/src/core/hle/service/caps/caps_su.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_su.h b/src/core/hle/service/caps/caps_su.h
index c494d7c84..62c9603a9 100644
--- a/src/core/hle/service/caps/caps_su.h
+++ b/src/core/hle/service/caps/caps_su.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_u.cpp b/src/core/hle/service/caps/caps_u.cpp
index 78bab6ed8..f36d8de2d 100644
--- a/src/core/hle/service/caps/caps_u.cpp
+++ b/src/core/hle/service/caps/caps_u.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -58,19 +58,25 @@ void CAPS_U::GetAlbumContentsFileListForApplication(Kernel::HLERequestContext& c
58 // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total 58 // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total
59 // output entries (which is copied to a s32 by official SW). 59 // output entries (which is copied to a s32 by official SW).
60 IPC::RequestParser rp{ctx}; 60 IPC::RequestParser rp{ctx};
61 [[maybe_unused]] const auto application_album_file_entries = rp.PopRaw<std::array<u8, 0x30>>(); 61 const auto pid{rp.Pop<s32>()};
62 const auto pid = rp.Pop<s32>(); 62 const auto content_type{rp.PopEnum<ContentType>()};
63 const auto content_type = rp.PopRaw<ContentType>(); 63 const auto start_posix_time{rp.Pop<s64>()};
64 [[maybe_unused]] const auto start_datetime = rp.PopRaw<AlbumFileDateTime>(); 64 const auto end_posix_time{rp.Pop<s64>()};
65 [[maybe_unused]] const auto end_datetime = rp.PopRaw<AlbumFileDateTime>(); 65 const auto applet_resource_user_id{rp.Pop<u64>()};
66 const auto applet_resource_user_id = rp.Pop<u64>(); 66
67 // TODO: Update this when we implement the album.
68 // Currently we do not have a method of accessing album entries, set this to 0 for now.
69 constexpr s32 total_entries{0};
70
67 LOG_WARNING(Service_Capture, 71 LOG_WARNING(Service_Capture,
68 "(STUBBED) called. pid={}, content_type={}, applet_resource_user_id={}", pid, 72 "(STUBBED) called. pid={}, content_type={}, start_posix_time={}, "
69 content_type, applet_resource_user_id); 73 "end_posix_time={}, applet_resource_user_id={}, total_entries={}",
74 pid, content_type, start_posix_time, end_posix_time, applet_resource_user_id,
75 total_entries);
70 76
71 IPC::ResponseBuilder rb{ctx, 3}; 77 IPC::ResponseBuilder rb{ctx, 3};
72 rb.Push(RESULT_SUCCESS); 78 rb.Push(RESULT_SUCCESS);
73 rb.Push<s32>(0); 79 rb.Push(total_entries);
74} 80}
75 81
76} // namespace Service::Capture 82} // namespace Service::Capture
diff --git a/src/core/hle/service/caps/caps_u.h b/src/core/hle/service/caps/caps_u.h
index e6e0716ff..689364de4 100644
--- a/src/core/hle/service/caps/caps_u.h
+++ b/src/core/hle/service/caps/caps_u.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index f8e9df4b1..9365f27e1 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -27,8 +27,8 @@ public:
27 {8, &ETicket::GetTitleKey, "GetTitleKey"}, 27 {8, &ETicket::GetTitleKey, "GetTitleKey"},
28 {9, &ETicket::CountCommonTicket, "CountCommonTicket"}, 28 {9, &ETicket::CountCommonTicket, "CountCommonTicket"},
29 {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"}, 29 {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"},
30 {11, &ETicket::ListCommonTicket, "ListCommonTicket"}, 30 {11, &ETicket::ListCommonTicketRightsIds, "ListCommonTicketRightsIds"},
31 {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"}, 31 {12, &ETicket::ListPersonalizedTicketRightsIds, "ListPersonalizedTicketRightsIds"},
32 {13, nullptr, "ListMissingPersonalizedTicket"}, 32 {13, nullptr, "ListMissingPersonalizedTicket"},
33 {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"}, 33 {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"},
34 {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"}, 34 {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"},
@@ -55,7 +55,46 @@ public:
55 {36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"}, 55 {36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"},
56 {37, nullptr, "OwnTicket2"}, 56 {37, nullptr, "OwnTicket2"},
57 {38, nullptr, "OwnTicket3"}, 57 {38, nullptr, "OwnTicket3"},
58 {501, nullptr, "Unknown501"},
59 {502, nullptr, "Unknown502"},
58 {503, nullptr, "GetTitleKey"}, 60 {503, nullptr, "GetTitleKey"},
61 {504, nullptr, "Unknown504"},
62 {508, nullptr, "Unknown508"},
63 {509, nullptr, "Unknown509"},
64 {510, nullptr, "Unknown510"},
65 {511, nullptr, "Unknown511"},
66 {1001, nullptr, "Unknown1001"},
67 {1002, nullptr, "Unknown1001"},
68 {1003, nullptr, "Unknown1003"},
69 {1004, nullptr, "Unknown1004"},
70 {1005, nullptr, "Unknown1005"},
71 {1006, nullptr, "Unknown1006"},
72 {1007, nullptr, "Unknown1007"},
73 {1009, nullptr, "Unknown1009"},
74 {1010, nullptr, "Unknown1010"},
75 {1011, nullptr, "Unknown1011"},
76 {1012, nullptr, "Unknown1012"},
77 {1013, nullptr, "Unknown1013"},
78 {1014, nullptr, "Unknown1014"},
79 {1015, nullptr, "Unknown1015"},
80 {1016, nullptr, "Unknown1016"},
81 {1017, nullptr, "Unknown1017"},
82 {1018, nullptr, "Unknown1018"},
83 {1019, nullptr, "Unknown1019"},
84 {1020, nullptr, "Unknown1020"},
85 {1021, nullptr, "Unknown1021"},
86 {1501, nullptr, "Unknown1501"},
87 {1502, nullptr, "Unknown1502"},
88 {1503, nullptr, "Unknown1503"},
89 {1504, nullptr, "Unknown1504"},
90 {1505, nullptr, "Unknown1505"},
91 {2000, nullptr, "Unknown2000"},
92 {2001, nullptr, "Unknown2001"},
93 {2100, nullptr, "Unknown2100"},
94 {2501, nullptr, "Unknown2501"},
95 {2502, nullptr, "Unknown2502"},
96 {3001, nullptr, "Unknown3001"},
97 {3002, nullptr, "Unknown3002"},
59 }; 98 };
60 // clang-format on 99 // clang-format on
61 RegisterHandlers(functions); 100 RegisterHandlers(functions);
@@ -147,7 +186,7 @@ private:
147 rb.Push<u32>(count); 186 rb.Push<u32>(count);
148 } 187 }
149 188
150 void ListCommonTicket(Kernel::HLERequestContext& ctx) { 189 void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) {
151 u32 out_entries; 190 u32 out_entries;
152 if (keys.GetCommonTickets().empty()) 191 if (keys.GetCommonTickets().empty())
153 out_entries = 0; 192 out_entries = 0;
@@ -170,7 +209,7 @@ private:
170 rb.Push<u32>(out_entries); 209 rb.Push<u32>(out_entries);
171 } 210 }
172 211
173 void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) { 212 void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) {
174 u32 out_entries; 213 u32 out_entries;
175 if (keys.GetPersonalizedTickets().empty()) 214 if (keys.GetPersonalizedTickets().empty())
176 out_entries = 0; 215 out_entries = 0;
diff --git a/src/core/hle/service/eupld/eupld.cpp b/src/core/hle/service/eupld/eupld.cpp
index 2df30acee..0d6d244f4 100644
--- a/src/core/hle/service/eupld/eupld.cpp
+++ b/src/core/hle/service/eupld/eupld.cpp
@@ -19,6 +19,7 @@ public:
19 {1, nullptr, "ImportCrt"}, 19 {1, nullptr, "ImportCrt"},
20 {2, nullptr, "ImportPki"}, 20 {2, nullptr, "ImportPki"},
21 {3, nullptr, "SetAutoUpload"}, 21 {3, nullptr, "SetAutoUpload"},
22 {4, nullptr, "GetAutoUpload"},
22 }; 23 };
23 // clang-format on 24 // clang-format on
24 25
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index 68f259b70..b7adaffc7 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -25,9 +25,13 @@ public:
25 {10101, &IFriendService::GetFriendList, "GetFriendList"}, 25 {10101, &IFriendService::GetFriendList, "GetFriendList"},
26 {10102, nullptr, "UpdateFriendInfo"}, 26 {10102, nullptr, "UpdateFriendInfo"},
27 {10110, nullptr, "GetFriendProfileImage"}, 27 {10110, nullptr, "GetFriendProfileImage"},
28 {10120, nullptr, "Unknown10120"},
29 {10121, nullptr, "Unknown10121"},
28 {10200, nullptr, "SendFriendRequestForApplication"}, 30 {10200, nullptr, "SendFriendRequestForApplication"},
29 {10211, nullptr, "AddFacedFriendRequestForApplication"}, 31 {10211, nullptr, "AddFacedFriendRequestForApplication"},
30 {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, 32 {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
33 {10420, nullptr, "Unknown10420"},
34 {10421, nullptr, "Unknown10421"},
31 {10500, nullptr, "GetProfileList"}, 35 {10500, nullptr, "GetProfileList"},
32 {10600, nullptr, "DeclareOpenOnlinePlaySession"}, 36 {10600, nullptr, "DeclareOpenOnlinePlaySession"},
33 {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, 37 {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -97,6 +101,8 @@ public:
97 {30900, nullptr, "SendFriendInvitation"}, 101 {30900, nullptr, "SendFriendInvitation"},
98 {30910, nullptr, "ReadFriendInvitation"}, 102 {30910, nullptr, "ReadFriendInvitation"},
99 {30911, nullptr, "ReadAllFriendInvitations"}, 103 {30911, nullptr, "ReadAllFriendInvitations"},
104 {40100, nullptr, "Unknown40100"},
105 {40400, nullptr, "Unknown40400"},
100 {49900, nullptr, "DeleteNetworkServiceAccountCache"}, 106 {49900, nullptr, "DeleteNetworkServiceAccountCache"},
101 }; 107 };
102 // clang-format on 108 // clang-format on
diff --git a/src/core/hle/service/grc/grc.cpp b/src/core/hle/service/grc/grc.cpp
index 24910ac6c..401e0b208 100644
--- a/src/core/hle/service/grc/grc.cpp
+++ b/src/core/hle/service/grc/grc.cpp
@@ -17,6 +17,9 @@ public:
17 static const FunctionInfo functions[] = { 17 static const FunctionInfo functions[] = {
18 {1, nullptr, "OpenContinuousRecorder"}, 18 {1, nullptr, "OpenContinuousRecorder"},
19 {2, nullptr, "OpenGameMovieTrimmer"}, 19 {2, nullptr, "OpenGameMovieTrimmer"},
20 {3, nullptr, "OpenOffscreenRecorder"},
21 {101, nullptr, "CreateMovieMaker"},
22 {9903, nullptr, "SetOffscreenRecordingMarker"}
20 }; 23 };
21 // clang-format on 24 // clang-format on
22 25
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index 1f2131ec8..cb35919e9 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -23,7 +23,7 @@ void Controller_DebugPad::OnRelease() {}
23 23
24void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 24void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
25 std::size_t size) { 25 std::size_t size) {
26 shared_memory.header.timestamp = core_timing.GetTicks(); 26 shared_memory.header.timestamp = core_timing.GetCPUTicks();
27 shared_memory.header.total_entry_count = 17; 27 shared_memory.header.total_entry_count = 17;
28 28
29 if (!IsControllerActivated()) { 29 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 6e990dd00..b7b7bfeae 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -19,7 +19,7 @@ void Controller_Gesture::OnRelease() {}
19 19
20void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 20void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 std::size_t size) { 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks(); 22 shared_memory.header.timestamp = core_timing.GetCPUTicks();
23 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
24 24
25 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index 9a8d354ba..feae89525 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -21,7 +21,7 @@ void Controller_Keyboard::OnRelease() {}
21 21
22void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 22void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
23 std::size_t size) { 23 std::size_t size) {
24 shared_memory.header.timestamp = core_timing.GetTicks(); 24 shared_memory.header.timestamp = core_timing.GetCPUTicks();
25 shared_memory.header.total_entry_count = 17; 25 shared_memory.header.total_entry_count = 17;
26 26
27 if (!IsControllerActivated()) { 27 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 93d88ea50..ac40989c5 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -19,7 +19,7 @@ void Controller_Mouse::OnRelease() {}
19 19
20void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 20void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 std::size_t size) { 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks(); 22 shared_memory.header.timestamp = core_timing.GetCPUTicks();
23 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
24 24
25 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index c55d900e2..ef67ad690 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -328,7 +328,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
328 const auto& last_entry = 328 const auto& last_entry =
329 main_controller->npad[main_controller->common.last_entry_index]; 329 main_controller->npad[main_controller->common.last_entry_index];
330 330
331 main_controller->common.timestamp = core_timing.GetTicks(); 331 main_controller->common.timestamp = core_timing.GetCPUTicks();
332 main_controller->common.last_entry_index = 332 main_controller->common.last_entry_index =
333 (main_controller->common.last_entry_index + 1) % 17; 333 (main_controller->common.last_entry_index + 1) % 17;
334 334
@@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
566 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; 566 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
567} 567}
568 568
569void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) {
570 gyroscope_zero_drift_mode = drift_mode;
571}
572
573Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const {
574 return gyroscope_zero_drift_mode;
575}
576
569void Controller_NPad::StartLRAssignmentMode() { 577void Controller_NPad::StartLRAssignmentMode() {
570 // Nothing internally is used for lr assignment mode. Since we have the ability to set the 578 // Nothing internally is used for lr assignment mode. Since we have the ability to set the
571 // controller types from boot, it doesn't really matter about showing a selection screen 579 // controller types from boot, it doesn't really matter about showing a selection screen
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 931f03430..5d4c58a43 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -58,6 +58,12 @@ public:
58 }; 58 };
59 static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size"); 59 static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size");
60 60
61 enum class GyroscopeZeroDriftMode : u32 {
62 Loose = 0,
63 Standard = 1,
64 Tight = 2,
65 };
66
61 enum class NpadHoldType : u64 { 67 enum class NpadHoldType : u64 {
62 Vertical = 0, 68 Vertical = 0,
63 Horizontal = 1, 69 Horizontal = 1,
@@ -117,6 +123,8 @@ public:
117 123
118 void ConnectNPad(u32 npad_id); 124 void ConnectNPad(u32 npad_id);
119 void DisconnectNPad(u32 npad_id); 125 void DisconnectNPad(u32 npad_id);
126 void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode);
127 GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const;
120 LedPattern GetLedPattern(u32 npad_id); 128 LedPattern GetLedPattern(u32 npad_id);
121 void SetVibrationEnabled(bool can_vibrate); 129 void SetVibrationEnabled(bool can_vibrate);
122 bool IsVibrationEnabled() const; 130 bool IsVibrationEnabled() const;
@@ -324,8 +332,8 @@ private:
324 std::array<Kernel::EventPair, 10> styleset_changed_events; 332 std::array<Kernel::EventPair, 10> styleset_changed_events;
325 Vibration last_processed_vibration{}; 333 Vibration last_processed_vibration{};
326 std::array<ControllerHolder, 10> connected_controllers{}; 334 std::array<ControllerHolder, 10> connected_controllers{};
335 GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard};
327 bool can_controllers_vibrate{true}; 336 bool can_controllers_vibrate{true};
328
329 std::array<ControllerPad, 10> npad_pad_states{}; 337 std::array<ControllerPad, 10> npad_pad_states{};
330 bool is_in_lr_assignment_mode{false}; 338 bool is_in_lr_assignment_mode{false};
331 Core::System& system; 339 Core::System& system;
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 9e527d176..e7483bfa2 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -23,7 +23,7 @@ void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u
23 } 23 }
24 24
25 CommonHeader header{}; 25 CommonHeader header{};
26 header.timestamp = core_timing.GetTicks(); 26 header.timestamp = core_timing.GetCPUTicks();
27 header.total_entry_count = 17; 27 header.total_entry_count = 17;
28 header.entry_count = 0; 28 header.entry_count = 0;
29 header.last_entry_index = 0; 29 header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index 1c6e55566..e326f8f5c 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -22,7 +22,7 @@ void Controller_Touchscreen::OnRelease() {}
22 22
23void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 23void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
24 std::size_t size) { 24 std::size_t size) {
25 shared_memory.header.timestamp = core_timing.GetTicks(); 25 shared_memory.header.timestamp = core_timing.GetCPUTicks();
26 shared_memory.header.total_entry_count = 17; 26 shared_memory.header.total_entry_count = 17;
27 27
28 if (!IsControllerActivated()) { 28 if (!IsControllerActivated()) {
@@ -49,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin
49 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; 49 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
50 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; 50 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
51 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; 51 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
52 const u64 tick = core_timing.GetTicks(); 52 const u64 tick = core_timing.GetCPUTicks();
53 touch_entry.delta_time = tick - last_touch; 53 touch_entry.delta_time = tick - last_touch;
54 last_touch = tick; 54 last_touch = tick;
55 touch_entry.finger = Settings::values.touchscreen.finger; 55 touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index 27511b27b..2503ef241 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -20,7 +20,7 @@ void Controller_XPad::OnRelease() {}
20void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 20void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 std::size_t size) { 21 std::size_t size) {
22 for (auto& xpad_entry : shared_memory.shared_memory_entries) { 22 for (auto& xpad_entry : shared_memory.shared_memory_entries) {
23 xpad_entry.header.timestamp = core_timing.GetTicks(); 23 xpad_entry.header.timestamp = core_timing.GetCPUTicks();
24 xpad_entry.header.total_entry_count = 17; 24 xpad_entry.header.total_entry_count = 17;
25 25
26 if (!IsControllerActivated()) { 26 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 72a050de2..e9020e0dc 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -39,11 +39,9 @@ namespace Service::HID {
39 39
40// Updating period for each HID device. 40// Updating period for each HID device.
41// TODO(ogniK): Find actual polling rate of hid 41// TODO(ogniK): Find actual polling rate of hid
42constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66); 42constexpr s64 pad_update_ticks = static_cast<s64>(1000000000 / 66);
43[[maybe_unused]] constexpr s64 accelerometer_update_ticks = 43[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast<s64>(1000000000 / 100);
44 static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); 44[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast<s64>(1000000000 / 100);
45[[maybe_unused]] constexpr s64 gyroscope_update_ticks =
46 static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
47constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; 45constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
48 46
49IAppletResource::IAppletResource(Core::System& system) 47IAppletResource::IAppletResource(Core::System& system)
@@ -78,8 +76,8 @@ IAppletResource::IAppletResource(Core::System& system)
78 76
79 // Register update callbacks 77 // Register update callbacks
80 pad_update_event = 78 pad_update_event =
81 Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) { 79 Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) {
82 UpdateControllers(userdata, cycles_late); 80 UpdateControllers(userdata, ns_late);
83 }); 81 });
84 82
85 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) 83 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
@@ -109,7 +107,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
109 rb.PushCopyObjects(shared_mem); 107 rb.PushCopyObjects(shared_mem);
110} 108}
111 109
112void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) { 110void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) {
113 auto& core_timing = system.CoreTiming(); 111 auto& core_timing = system.CoreTiming();
114 112
115 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); 113 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
@@ -120,7 +118,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
120 controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE); 118 controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
121 } 119 }
122 120
123 core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); 121 core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event);
124} 122}
125 123
126class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { 124class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
@@ -185,8 +183,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
185 {77, nullptr, "GetAccelerometerPlayMode"}, 183 {77, nullptr, "GetAccelerometerPlayMode"},
186 {78, nullptr, "ResetAccelerometerPlayMode"}, 184 {78, nullptr, "ResetAccelerometerPlayMode"},
187 {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"}, 185 {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"},
188 {80, nullptr, "GetGyroscopeZeroDriftMode"}, 186 {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"},
189 {81, nullptr, "ResetGyroscopeZeroDriftMode"}, 187 {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"},
190 {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"}, 188 {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
191 {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"}, 189 {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},
192 {91, &Hid::ActivateGesture, "ActivateGesture"}, 190 {91, &Hid::ActivateGesture, "ActivateGesture"},
@@ -230,15 +228,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
230 {211, nullptr, "IsVibrationDeviceMounted"}, 228 {211, nullptr, "IsVibrationDeviceMounted"},
231 {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"}, 229 {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
232 {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, 230 {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
233 {302, nullptr, "StopConsoleSixAxisSensor"}, 231 {302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"},
234 {303, nullptr, "ActivateSevenSixAxisSensor"}, 232 {303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"},
235 {304, nullptr, "StartSevenSixAxisSensor"}, 233 {304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"},
236 {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"}, 234 {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
237 {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"}, 235 {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
238 {307, nullptr, "FinalizeSevenSixAxisSensor"}, 236 {307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"},
239 {308, nullptr, "SetSevenSixAxisSensorFusionStrength"}, 237 {308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
240 {309, nullptr, "GetSevenSixAxisSensorFusionStrength"}, 238 {309, nullptr, "GetSevenSixAxisSensorFusionStrength"},
241 {310, nullptr, "ResetSevenSixAxisSensorTimestamp"}, 239 {310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"},
242 {400, nullptr, "IsUsbFullKeyControllerEnabled"}, 240 {400, nullptr, "IsUsbFullKeyControllerEnabled"},
243 {401, nullptr, "EnableUsbFullKeyController"}, 241 {401, nullptr, "EnableUsbFullKeyController"},
244 {402, nullptr, "IsUsbFullKeyControllerConnected"}, 242 {402, nullptr, "IsUsbFullKeyControllerConnected"},
@@ -374,6 +372,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) {
374 rb.Push(RESULT_SUCCESS); 372 rb.Push(RESULT_SUCCESS);
375} 373}
376 374
375void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) {
376 IPC::RequestParser rp{ctx};
377 const auto flags{rp.Pop<u32>()};
378 LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags);
379
380 IPC::ResponseBuilder rb{ctx, 2};
381 rb.Push(RESULT_SUCCESS);
382}
383
377void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) { 384void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {
378 IPC::RequestParser rp{ctx}; 385 IPC::RequestParser rp{ctx};
379 const auto unknown{rp.Pop<u32>()}; 386 const auto unknown{rp.Pop<u32>()};
@@ -413,15 +420,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {
413 rb.Push(RESULT_SUCCESS); 420 rb.Push(RESULT_SUCCESS);
414} 421}
415 422
423void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
424 IPC::RequestParser rp{ctx};
425 const auto handle{rp.Pop<u32>()};
426 const auto applet_resource_user_id{rp.Pop<u64>()};
427
428 LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
429 applet_resource_user_id);
430
431 IPC::ResponseBuilder rb{ctx, 2};
432 rb.Push(RESULT_SUCCESS);
433}
434
416void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { 435void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
417 IPC::RequestParser rp{ctx}; 436 IPC::RequestParser rp{ctx};
418 const auto handle{rp.Pop<u32>()}; 437 const auto handle{rp.Pop<u32>()};
419 const auto drift_mode{rp.Pop<u32>()}; 438 const auto drift_mode{rp.Pop<u32>()};
420 const auto applet_resource_user_id{rp.Pop<u64>()}; 439 const auto applet_resource_user_id{rp.Pop<u64>()};
421 440
422 LOG_WARNING(Service_HID, 441 applet_resource->GetController<Controller_NPad>(HidController::NPad)
423 "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, 442 .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode});
424 drift_mode, applet_resource_user_id); 443
444 LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
445 drift_mode, applet_resource_user_id);
446
447 IPC::ResponseBuilder rb{ctx, 2};
448 rb.Push(RESULT_SUCCESS);
449}
450
451void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
452 IPC::RequestParser rp{ctx};
453 const auto handle{rp.Pop<u32>()};
454 const auto applet_resource_user_id{rp.Pop<u64>()};
455
456 LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
457 applet_resource_user_id);
458
459 IPC::ResponseBuilder rb{ctx, 3};
460 rb.Push(RESULT_SUCCESS);
461 rb.Push<u32>(
462 static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad)
463 .GetGyroscopeZeroDriftMode()));
464}
465
466void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
467 IPC::RequestParser rp{ctx};
468 const auto handle{rp.Pop<u32>()};
469 const auto applet_resource_user_id{rp.Pop<u64>()};
470
471 applet_resource->GetController<Controller_NPad>(HidController::NPad)
472 .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard);
473
474 LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
475 applet_resource_user_id);
425 476
426 IPC::ResponseBuilder rb{ctx, 2}; 477 IPC::ResponseBuilder rb{ctx, 2};
427 rb.Push(RESULT_SUCCESS); 478 rb.Push(RESULT_SUCCESS);
@@ -832,33 +883,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
832 rb.Push(RESULT_SUCCESS); 883 rb.Push(RESULT_SUCCESS);
833} 884}
834 885
835void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { 886void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
836 IPC::RequestParser rp{ctx}; 887 IPC::RequestParser rp{ctx};
837 const auto handle{rp.Pop<u32>()}; 888 const auto handle{rp.Pop<u32>()};
889 const auto applet_resource_user_id{rp.Pop<u64>()};
838 890
839 LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle); 891 LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
892 applet_resource_user_id);
840 893
841 IPC::ResponseBuilder rb{ctx, 2}; 894 IPC::ResponseBuilder rb{ctx, 2};
842 rb.Push(RESULT_SUCCESS); 895 rb.Push(RESULT_SUCCESS);
843} 896}
844 897
845void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { 898void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
846 IPC::RequestParser rp{ctx}; 899 IPC::RequestParser rp{ctx};
847 const auto applet_resource_user_id{rp.Pop<u64>()}; 900 const auto applet_resource_user_id{rp.Pop<u64>()};
848 const auto unknown{rp.Pop<u32>()};
849 901
850 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}", 902 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
851 applet_resource_user_id, unknown); 903 applet_resource_user_id);
852 904
853 IPC::ResponseBuilder rb{ctx, 2}; 905 IPC::ResponseBuilder rb{ctx, 2};
854 rb.Push(RESULT_SUCCESS); 906 rb.Push(RESULT_SUCCESS);
855} 907}
856 908
857void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { 909void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
858 IPC::RequestParser rp{ctx}; 910 IPC::RequestParser rp{ctx};
859 const auto unknown{rp.Pop<u32>()}; 911 const auto applet_resource_user_id{rp.Pop<u64>()};
860 912
861 LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown); 913 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
914 applet_resource_user_id);
862 915
863 IPC::ResponseBuilder rb{ctx, 2}; 916 IPC::ResponseBuilder rb{ctx, 2};
864 rb.Push(RESULT_SUCCESS); 917 rb.Push(RESULT_SUCCESS);
@@ -882,10 +935,46 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
882 rb.Push(RESULT_SUCCESS); 935 rb.Push(RESULT_SUCCESS);
883} 936}
884 937
885void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { 938void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
886 IPC::RequestParser rp{ctx}; 939 IPC::RequestParser rp{ctx};
887 const auto flags{rp.Pop<u32>()}; 940 const auto applet_resource_user_id{rp.Pop<u64>()};
888 LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); 941
942 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
943 applet_resource_user_id);
944
945 IPC::ResponseBuilder rb{ctx, 2};
946 rb.Push(RESULT_SUCCESS);
947}
948
949void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) {
950 IPC::RequestParser rp{ctx};
951 const auto applet_resource_user_id{rp.Pop<u64>()};
952
953 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
954 applet_resource_user_id);
955
956 IPC::ResponseBuilder rb{ctx, 2};
957 rb.Push(RESULT_SUCCESS);
958}
959
960void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) {
961 IPC::RequestParser rp{ctx};
962 const auto applet_resource_user_id{rp.Pop<u64>()};
963 const auto is_palma_all_connectable{rp.Pop<bool>()};
964
965 LOG_WARNING(Service_HID,
966 "(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}",
967 applet_resource_user_id, is_palma_all_connectable);
968
969 IPC::ResponseBuilder rb{ctx, 2};
970 rb.Push(RESULT_SUCCESS);
971}
972
973void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
974 IPC::RequestParser rp{ctx};
975 const auto palma_boost_mode{rp.Pop<bool>()};
976
977 LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode);
889 978
890 IPC::ResponseBuilder rb{ctx, 2}; 979 IPC::ResponseBuilder rb{ctx, 2};
891 rb.Push(RESULT_SUCCESS); 980 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index d481a75f8..6fb048360 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -91,10 +91,14 @@ private:
91 void ActivateTouchScreen(Kernel::HLERequestContext& ctx); 91 void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
92 void ActivateMouse(Kernel::HLERequestContext& ctx); 92 void ActivateMouse(Kernel::HLERequestContext& ctx);
93 void ActivateKeyboard(Kernel::HLERequestContext& ctx); 93 void ActivateKeyboard(Kernel::HLERequestContext& ctx);
94 void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx);
94 void ActivateGesture(Kernel::HLERequestContext& ctx); 95 void ActivateGesture(Kernel::HLERequestContext& ctx);
95 void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx); 96 void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
96 void StartSixAxisSensor(Kernel::HLERequestContext& ctx); 97 void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
98 void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
97 void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); 99 void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
100 void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
101 void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
98 void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx); 102 void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
99 void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); 103 void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
100 void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); 104 void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
@@ -126,12 +130,15 @@ private:
126 void IsVibrationPermitted(Kernel::HLERequestContext& ctx); 130 void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
127 void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); 131 void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
128 void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); 132 void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
129 void StopSixAxisSensor(Kernel::HLERequestContext& ctx); 133 void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
130 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); 134 void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
131 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); 135 void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
132 void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx); 136 void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
133 void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); 137 void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
134 void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); 138 void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
139 void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx);
140 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
141 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
135 142
136 std::shared_ptr<IAppletResource> applet_resource; 143 std::shared_ptr<IAppletResource> applet_resource;
137 Core::System& system; 144 Core::System& system;
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 36ed6f7da..e82fd031b 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
98 98
99 IPC::ResponseBuilder rb{ctx, 5}; 99 IPC::ResponseBuilder rb{ctx, 5};
100 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
101 rb.PushRaw<u64>(system.CoreTiming().GetTicks()); 101 rb.PushRaw<u64>(system.CoreTiming().GetCPUTicks());
102 rb.PushRaw<u32>(0); 102 rb.PushRaw<u32>(0);
103} 103}
104 104
diff --git a/src/core/hle/service/lbl/lbl.cpp b/src/core/hle/service/lbl/lbl.cpp
index e8f9f2d29..17350b403 100644
--- a/src/core/hle/service/lbl/lbl.cpp
+++ b/src/core/hle/service/lbl/lbl.cpp
@@ -47,6 +47,7 @@ public:
47 {26, &LBL::EnableVrMode, "EnableVrMode"}, 47 {26, &LBL::EnableVrMode, "EnableVrMode"},
48 {27, &LBL::DisableVrMode, "DisableVrMode"}, 48 {27, &LBL::DisableVrMode, "DisableVrMode"},
49 {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"}, 49 {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"},
50 {29, nullptr, "IsAutoBrightnessControlSupported"},
50 }; 51 };
51 // clang-format on 52 // clang-format on
52 53
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index 92adde6d4..49972cd69 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -69,6 +69,7 @@ public:
69 {101, nullptr, "GetNetworkInfoLatestUpdate"}, 69 {101, nullptr, "GetNetworkInfoLatestUpdate"},
70 {102, nullptr, "Scan"}, 70 {102, nullptr, "Scan"},
71 {103, nullptr, "ScanPrivate"}, 71 {103, nullptr, "ScanPrivate"},
72 {104, nullptr, "SetWirelessControllerRestriction"},
72 {200, nullptr, "OpenAccessPoint"}, 73 {200, nullptr, "OpenAccessPoint"},
73 {201, nullptr, "CloseAccessPoint"}, 74 {201, nullptr, "CloseAccessPoint"},
74 {202, nullptr, "CreateNetwork"}, 75 {202, nullptr, "CreateNetwork"},
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 6ad3be1b3..64a526b9e 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -39,42 +39,61 @@ constexpr ResultCode ERROR_NOT_INITIALIZED{ErrorModule::Loader, 87};
39constexpr std::size_t MAXIMUM_LOADED_RO{0x40}; 39constexpr std::size_t MAXIMUM_LOADED_RO{0x40};
40constexpr std::size_t MAXIMUM_MAP_RETRIES{0x200}; 40constexpr std::size_t MAXIMUM_MAP_RETRIES{0x200};
41 41
42constexpr std::size_t TEXT_INDEX{0};
43constexpr std::size_t RO_INDEX{1};
44constexpr std::size_t DATA_INDEX{2};
45
46struct NRRCertification {
47 u64_le application_id_mask;
48 u64_le application_id_pattern;
49 INSERT_PADDING_BYTES(0x10);
50 std::array<u8, 0x100> public_key; // Also known as modulus
51 std::array<u8, 0x100> signature;
52};
53static_assert(sizeof(NRRCertification) == 0x220, "NRRCertification has invalid size.");
54
42struct NRRHeader { 55struct NRRHeader {
43 u32_le magic; 56 u32_le magic;
44 INSERT_PADDING_BYTES(12); 57 u32_le certification_signature_key_generation; // 9.0.0+
45 u64_le title_id_mask; 58 INSERT_PADDING_WORDS(2);
46 u64_le title_id_pattern; 59 NRRCertification certification;
47 INSERT_PADDING_BYTES(16); 60 std::array<u8, 0x100> signature;
48 std::array<u8, 0x100> modulus; 61 u64_le application_id;
49 std::array<u8, 0x100> signature_1;
50 std::array<u8, 0x100> signature_2;
51 u64_le title_id;
52 u32_le size; 62 u32_le size;
53 INSERT_PADDING_BYTES(4); 63 u8 nrr_kind; // 7.0.0+
64 INSERT_PADDING_BYTES(3);
54 u32_le hash_offset; 65 u32_le hash_offset;
55 u32_le hash_count; 66 u32_le hash_count;
56 INSERT_PADDING_BYTES(8); 67 INSERT_PADDING_WORDS(2);
68};
69static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has invalid size.");
70
71struct SegmentHeader {
72 u32_le memory_offset;
73 u32_le memory_size;
57}; 74};
58static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has incorrect size."); 75static_assert(sizeof(SegmentHeader) == 0x8, "SegmentHeader has invalid size.");
59 76
60struct NROHeader { 77struct NROHeader {
78 // Switchbrew calls this "Start" (0x10)
61 INSERT_PADDING_WORDS(1); 79 INSERT_PADDING_WORDS(1);
62 u32_le mod_offset; 80 u32_le mod_offset;
63 INSERT_PADDING_WORDS(2); 81 INSERT_PADDING_WORDS(2);
82
83 // Switchbrew calls this "Header" (0x70)
64 u32_le magic; 84 u32_le magic;
65 u32_le version; 85 u32_le version;
66 u32_le nro_size; 86 u32_le nro_size;
67 u32_le flags; 87 u32_le flags;
68 u32_le text_offset; 88 // .text, .ro, .data
69 u32_le text_size; 89 std::array<SegmentHeader, 3> segment_headers;
70 u32_le ro_offset;
71 u32_le ro_size;
72 u32_le rw_offset;
73 u32_le rw_size;
74 u32_le bss_size; 90 u32_le bss_size;
75 INSERT_PADDING_WORDS(1); 91 INSERT_PADDING_WORDS(1);
76 std::array<u8, 0x20> build_id; 92 std::array<u8, 0x20> build_id;
77 INSERT_PADDING_BYTES(0x20); 93 u32_le dso_handle_offset;
94 INSERT_PADDING_WORDS(1);
95 // .apiInfo, .dynstr, .dynsym
96 std::array<SegmentHeader, 3> segment_headers_2;
78}; 97};
79static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size."); 98static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size.");
80 99
@@ -91,6 +110,7 @@ struct NROInfo {
91 std::size_t data_size{}; 110 std::size_t data_size{};
92 VAddr src_addr{}; 111 VAddr src_addr{};
93}; 112};
113static_assert(sizeof(NROInfo) == 0x60, "NROInfo has invalid size.");
94 114
95class DebugMonitor final : public ServiceFramework<DebugMonitor> { 115class DebugMonitor final : public ServiceFramework<DebugMonitor> {
96public: 116public:
@@ -226,11 +246,11 @@ public:
226 return; 246 return;
227 } 247 }
228 248
229 if (system.CurrentProcess()->GetTitleID() != header.title_id) { 249 if (system.CurrentProcess()->GetTitleID() != header.application_id) {
230 LOG_ERROR(Service_LDR, 250 LOG_ERROR(Service_LDR,
231 "Attempting to load NRR with title ID other than current process. (actual " 251 "Attempting to load NRR with title ID other than current process. (actual "
232 "{:016X})!", 252 "{:016X})!",
233 header.title_id); 253 header.application_id);
234 IPC::ResponseBuilder rb{ctx, 2}; 254 IPC::ResponseBuilder rb{ctx, 2};
235 rb.Push(ERROR_INVALID_NRR); 255 rb.Push(ERROR_INVALID_NRR);
236 return; 256 return;
@@ -348,10 +368,10 @@ public:
348 368
349 ResultCode LoadNro(Kernel::Process* process, const NROHeader& nro_header, VAddr nro_addr, 369 ResultCode LoadNro(Kernel::Process* process, const NROHeader& nro_header, VAddr nro_addr,
350 VAddr start) const { 370 VAddr start) const {
351 const VAddr text_start{start + nro_header.text_offset}; 371 const VAddr text_start{start + nro_header.segment_headers[TEXT_INDEX].memory_offset};
352 const VAddr ro_start{start + nro_header.ro_offset}; 372 const VAddr ro_start{start + nro_header.segment_headers[RO_INDEX].memory_offset};
353 const VAddr data_start{start + nro_header.rw_offset}; 373 const VAddr data_start{start + nro_header.segment_headers[DATA_INDEX].memory_offset};
354 const VAddr bss_start{data_start + nro_header.rw_size}; 374 const VAddr bss_start{data_start + nro_header.segment_headers[DATA_INDEX].memory_size};
355 const VAddr bss_end_addr{ 375 const VAddr bss_end_addr{
356 Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)}; 376 Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)};
357 377
@@ -360,9 +380,12 @@ public:
360 system.Memory().ReadBlock(src_addr, source_data.data(), source_data.size()); 380 system.Memory().ReadBlock(src_addr, source_data.data(), source_data.size());
361 system.Memory().WriteBlock(dst_addr, source_data.data(), source_data.size()); 381 system.Memory().WriteBlock(dst_addr, source_data.data(), source_data.size());
362 }}; 382 }};
363 CopyCode(nro_addr + nro_header.text_offset, text_start, nro_header.text_size); 383 CopyCode(nro_addr + nro_header.segment_headers[TEXT_INDEX].memory_offset, text_start,
364 CopyCode(nro_addr + nro_header.ro_offset, ro_start, nro_header.ro_size); 384 nro_header.segment_headers[TEXT_INDEX].memory_size);
365 CopyCode(nro_addr + nro_header.rw_offset, data_start, nro_header.rw_size); 385 CopyCode(nro_addr + nro_header.segment_headers[RO_INDEX].memory_offset, ro_start,
386 nro_header.segment_headers[RO_INDEX].memory_size);
387 CopyCode(nro_addr + nro_header.segment_headers[DATA_INDEX].memory_offset, data_start,
388 nro_header.segment_headers[DATA_INDEX].memory_size);
366 389
367 CASCADE_CODE(process->PageTable().SetCodeMemoryPermission( 390 CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(
368 text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute)); 391 text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute));
@@ -484,9 +507,11 @@ public:
484 } 507 }
485 508
486 // Track the loaded NRO 509 // Track the loaded NRO
487 nro.insert_or_assign(*map_result, NROInfo{hash, *map_result, nro_size, bss_address, 510 nro.insert_or_assign(*map_result,
488 bss_size, header.text_size, header.ro_size, 511 NROInfo{hash, *map_result, nro_size, bss_address, bss_size,
489 header.rw_size, nro_address}); 512 header.segment_headers[TEXT_INDEX].memory_size,
513 header.segment_headers[RO_INDEX].memory_size,
514 header.segment_headers[DATA_INDEX].memory_size, nro_address});
490 515
491 // Invalidate JIT caches for the newly mapped process code 516 // Invalidate JIT caches for the newly mapped process code
492 system.InvalidateCpuInstructionCaches(); 517 system.InvalidateCpuInstructionCaches();
@@ -584,11 +609,21 @@ private:
584 static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) { 609 static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) {
585 return header.magic == Common::MakeMagic('N', 'R', 'O', '0') && 610 return header.magic == Common::MakeMagic('N', 'R', 'O', '0') &&
586 header.nro_size == nro_size && header.bss_size == bss_size && 611 header.nro_size == nro_size && header.bss_size == bss_size &&
587 header.ro_offset == header.text_offset + header.text_size && 612
588 header.rw_offset == header.ro_offset + header.ro_size && 613 header.segment_headers[RO_INDEX].memory_offset ==
589 nro_size == header.rw_offset + header.rw_size && 614 header.segment_headers[TEXT_INDEX].memory_offset +
590 Common::Is4KBAligned(header.text_size) && Common::Is4KBAligned(header.ro_size) && 615 header.segment_headers[TEXT_INDEX].memory_size &&
591 Common::Is4KBAligned(header.rw_size); 616
617 header.segment_headers[DATA_INDEX].memory_offset ==
618 header.segment_headers[RO_INDEX].memory_offset +
619 header.segment_headers[RO_INDEX].memory_size &&
620
621 nro_size == header.segment_headers[DATA_INDEX].memory_offset +
622 header.segment_headers[DATA_INDEX].memory_size &&
623
624 Common::Is4KBAligned(header.segment_headers[TEXT_INDEX].memory_size) &&
625 Common::Is4KBAligned(header.segment_headers[RO_INDEX].memory_size) &&
626 Common::Is4KBAligned(header.segment_headers[DATA_INDEX].memory_size);
592 } 627 }
593 Core::System& system; 628 Core::System& system;
594}; 629};
diff --git a/src/core/hle/service/lm/manager.cpp b/src/core/hle/service/lm/manager.cpp
index b67081b86..3ee2374e7 100644
--- a/src/core/hle/service/lm/manager.cpp
+++ b/src/core/hle/service/lm/manager.cpp
@@ -86,7 +86,8 @@ std::string FormatField(Field type, const std::vector<u8>& data) {
86 return Common::StringFromFixedZeroTerminatedBuffer( 86 return Common::StringFromFixedZeroTerminatedBuffer(
87 reinterpret_cast<const char*>(data.data()), data.size()); 87 reinterpret_cast<const char*>(data.data()), data.size());
88 default: 88 default:
89 UNIMPLEMENTED(); 89 UNIMPLEMENTED_MSG("Unimplemented field type={}", type);
90 return "";
90 } 91 }
91} 92}
92 93
diff --git a/src/core/hle/service/mig/mig.cpp b/src/core/hle/service/mig/mig.cpp
index d16367f2c..113a4665c 100644
--- a/src/core/hle/service/mig/mig.cpp
+++ b/src/core/hle/service/mig/mig.cpp
@@ -20,6 +20,12 @@ public:
20 {101, nullptr, "ResumeServer"}, 20 {101, nullptr, "ResumeServer"},
21 {200, nullptr, "CreateClient"}, 21 {200, nullptr, "CreateClient"},
22 {201, nullptr, "ResumeClient"}, 22 {201, nullptr, "ResumeClient"},
23 {1001, nullptr, "Unknown1001"},
24 {1010, nullptr, "Unknown1010"},
25 {1100, nullptr, "Unknown1100"},
26 {1101, nullptr, "Unknown1101"},
27 {1200, nullptr, "Unknown1200"},
28 {1201, nullptr, "Unknown1201"}
23 }; 29 };
24 // clang-format on 30 // clang-format on
25 31
diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp
index def63dc8a..25c24e537 100644
--- a/src/core/hle/service/mm/mm_u.cpp
+++ b/src/core/hle/service/mm/mm_u.cpp
@@ -14,14 +14,14 @@ public:
14 explicit MM_U() : ServiceFramework{"mm:u"} { 14 explicit MM_U() : ServiceFramework{"mm:u"} {
15 // clang-format off 15 // clang-format off
16 static const FunctionInfo functions[] = { 16 static const FunctionInfo functions[] = {
17 {0, &MM_U::Initialize, "Initialize"}, 17 {0, &MM_U::InitializeOld, "InitializeOld"},
18 {1, &MM_U::Finalize, "Finalize"}, 18 {1, &MM_U::FinalizeOld, "FinalizeOld"},
19 {2, &MM_U::SetAndWait, "SetAndWait"}, 19 {2, &MM_U::SetAndWaitOld, "SetAndWaitOld"},
20 {3, &MM_U::Get, "Get"}, 20 {3, &MM_U::GetOld, "GetOld"},
21 {4, &MM_U::InitializeWithId, "InitializeWithId"}, 21 {4, &MM_U::Initialize, "Initialize"},
22 {5, &MM_U::FinalizeWithId, "FinalizeWithId"}, 22 {5, &MM_U::Finalize, "Finalize"},
23 {6, &MM_U::SetAndWaitWithId, "SetAndWaitWithId"}, 23 {6, &MM_U::SetAndWait, "SetAndWait"},
24 {7, &MM_U::GetWithId, "GetWithId"}, 24 {7, &MM_U::Get, "Get"},
25 }; 25 };
26 // clang-format on 26 // clang-format on
27 27
@@ -29,21 +29,21 @@ public:
29 } 29 }
30 30
31private: 31private:
32 void Initialize(Kernel::HLERequestContext& ctx) { 32 void InitializeOld(Kernel::HLERequestContext& ctx) {
33 LOG_WARNING(Service_MM, "(STUBBED) called"); 33 LOG_WARNING(Service_MM, "(STUBBED) called");
34 34
35 IPC::ResponseBuilder rb{ctx, 2}; 35 IPC::ResponseBuilder rb{ctx, 2};
36 rb.Push(RESULT_SUCCESS); 36 rb.Push(RESULT_SUCCESS);
37 } 37 }
38 38
39 void Finalize(Kernel::HLERequestContext& ctx) { 39 void FinalizeOld(Kernel::HLERequestContext& ctx) {
40 LOG_WARNING(Service_MM, "(STUBBED) called"); 40 LOG_WARNING(Service_MM, "(STUBBED) called");
41 41
42 IPC::ResponseBuilder rb{ctx, 2}; 42 IPC::ResponseBuilder rb{ctx, 2};
43 rb.Push(RESULT_SUCCESS); 43 rb.Push(RESULT_SUCCESS);
44 } 44 }
45 45
46 void SetAndWait(Kernel::HLERequestContext& ctx) { 46 void SetAndWaitOld(Kernel::HLERequestContext& ctx) {
47 IPC::RequestParser rp{ctx}; 47 IPC::RequestParser rp{ctx};
48 min = rp.Pop<u32>(); 48 min = rp.Pop<u32>();
49 max = rp.Pop<u32>(); 49 max = rp.Pop<u32>();
@@ -54,7 +54,7 @@ private:
54 rb.Push(RESULT_SUCCESS); 54 rb.Push(RESULT_SUCCESS);
55 } 55 }
56 56
57 void Get(Kernel::HLERequestContext& ctx) { 57 void GetOld(Kernel::HLERequestContext& ctx) {
58 LOG_WARNING(Service_MM, "(STUBBED) called"); 58 LOG_WARNING(Service_MM, "(STUBBED) called");
59 59
60 IPC::ResponseBuilder rb{ctx, 3}; 60 IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ private:
62 rb.Push(current); 62 rb.Push(current);
63 } 63 }
64 64
65 void InitializeWithId(Kernel::HLERequestContext& ctx) { 65 void Initialize(Kernel::HLERequestContext& ctx) {
66 LOG_WARNING(Service_MM, "(STUBBED) called"); 66 LOG_WARNING(Service_MM, "(STUBBED) called");
67 67
68 IPC::ResponseBuilder rb{ctx, 3}; 68 IPC::ResponseBuilder rb{ctx, 3};
@@ -70,14 +70,14 @@ private:
70 rb.Push<u32>(id); // Any non zero value 70 rb.Push<u32>(id); // Any non zero value
71 } 71 }
72 72
73 void FinalizeWithId(Kernel::HLERequestContext& ctx) { 73 void Finalize(Kernel::HLERequestContext& ctx) {
74 LOG_WARNING(Service_MM, "(STUBBED) called"); 74 LOG_WARNING(Service_MM, "(STUBBED) called");
75 75
76 IPC::ResponseBuilder rb{ctx, 2}; 76 IPC::ResponseBuilder rb{ctx, 2};
77 rb.Push(RESULT_SUCCESS); 77 rb.Push(RESULT_SUCCESS);
78 } 78 }
79 79
80 void SetAndWaitWithId(Kernel::HLERequestContext& ctx) { 80 void SetAndWait(Kernel::HLERequestContext& ctx) {
81 IPC::RequestParser rp{ctx}; 81 IPC::RequestParser rp{ctx};
82 u32 input_id = rp.Pop<u32>(); 82 u32 input_id = rp.Pop<u32>();
83 min = rp.Pop<u32>(); 83 min = rp.Pop<u32>();
@@ -90,7 +90,7 @@ private:
90 rb.Push(RESULT_SUCCESS); 90 rb.Push(RESULT_SUCCESS);
91 } 91 }
92 92
93 void GetWithId(Kernel::HLERequestContext& ctx) { 93 void Get(Kernel::HLERequestContext& ctx) {
94 LOG_WARNING(Service_MM, "(STUBBED) called"); 94 LOG_WARNING(Service_MM, "(STUBBED) called");
95 95
96 IPC::ResponseBuilder rb{ctx, 3}; 96 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp
index ec9aae04a..e38dea1f4 100644
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -28,16 +28,16 @@ public:
28 {7, nullptr, "ResolveApplicationLegalInformationPath"}, 28 {7, nullptr, "ResolveApplicationLegalInformationPath"},
29 {8, nullptr, "RedirectApplicationLegalInformationPath"}, 29 {8, nullptr, "RedirectApplicationLegalInformationPath"},
30 {9, nullptr, "Refresh"}, 30 {9, nullptr, "Refresh"},
31 {10, nullptr, "RedirectProgramPath2"}, 31 {10, nullptr, "RedirectApplicationProgramPath"},
32 {11, nullptr, "Refresh2"}, 32 {11, nullptr, "ClearApplicationRedirection"},
33 {12, nullptr, "DeleteProgramPath"}, 33 {12, nullptr, "EraseProgramRedirection"},
34 {13, nullptr, "DeleteApplicationControlPath"}, 34 {13, nullptr, "EraseApplicationControlRedirection"},
35 {14, nullptr, "DeleteApplicationHtmlDocumentPath"}, 35 {14, nullptr, "EraseApplicationHtmlDocumentRedirection"},
36 {15, nullptr, "DeleteApplicationLegalInformationPath"}, 36 {15, nullptr, "EraseApplicationLegalInformationRedirection"},
37 {16, nullptr, ""}, 37 {16, nullptr, "ResolveProgramPathForDebug"},
38 {17, nullptr, ""}, 38 {17, nullptr, "RedirectProgramPathForDebug"},
39 {18, nullptr, ""}, 39 {18, nullptr, "RedirectApplicationProgramPathForDebug"},
40 {19, nullptr, ""}, 40 {19, nullptr, "EraseProgramRedirectionForDebug"},
41 }; 41 };
42 // clang-format on 42 // clang-format on
43 43
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index b7b34ce7e..780ea30fe 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -198,9 +198,9 @@ public:
198 static const FunctionInfo functions[] = { 198 static const FunctionInfo functions[] = {
199 {0, nullptr, "Initialize"}, 199 {0, nullptr, "Initialize"},
200 {1, nullptr, "Finalize"}, 200 {1, nullptr, "Finalize"},
201 {2, nullptr, "GetState"}, 201 {2, nullptr, "GetStateOld"},
202 {3, nullptr, "IsNfcEnabled"}, 202 {3, nullptr, "IsNfcEnabledOld"},
203 {100, nullptr, "SetNfcEnabled"}, 203 {100, nullptr, "SetNfcEnabledOld"},
204 {400, nullptr, "InitializeSystem"}, 204 {400, nullptr, "InitializeSystem"},
205 {401, nullptr, "FinalizeSystem"}, 205 {401, nullptr, "FinalizeSystem"},
206 {402, nullptr, "GetState"}, 206 {402, nullptr, "GetState"},
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index cc2192e5c..fba89e7a6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
25 case IoctlCommand::IocGetCharacteristicsCommand: 25 case IoctlCommand::IocGetCharacteristicsCommand:
26 return GetCharacteristics(input, output, output2, version); 26 return GetCharacteristics(input, output, output2, version);
27 case IoctlCommand::IocGetTPCMasksCommand: 27 case IoctlCommand::IocGetTPCMasksCommand:
28 return GetTPCMasks(input, output); 28 return GetTPCMasks(input, output, output2, version);
29 case IoctlCommand::IocGetActiveSlotMaskCommand: 29 case IoctlCommand::IocGetActiveSlotMaskCommand:
30 return GetActiveSlotMask(input, output); 30 return GetActiveSlotMask(input, output);
31 case IoctlCommand::IocZcullGetCtxSizeCommand: 31 case IoctlCommand::IocZcullGetCtxSizeCommand:
@@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
98 return 0; 98 return 0;
99} 99}
100 100
101u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) { 101u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
102 std::vector<u8>& output2, IoctlVersion version) {
102 IoctlGpuGetTpcMasksArgs params{}; 103 IoctlGpuGetTpcMasksArgs params{};
103 std::memcpy(&params, input.data(), input.size()); 104 std::memcpy(&params, input.data(), input.size());
104 LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size, 105 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
105 params.mask_buf_addr); 106 if (params.mask_buffer_size != 0) {
106 // TODO(ogniK): Confirm value on hardware 107 params.tcp_mask = 3;
107 if (params.mask_buf_size) 108 }
108 params.tpc_mask_size = 4 * 1; // 4 * num_gpc 109
109 else 110 if (version == IoctlVersion::Version3) {
110 params.tpc_mask_size = 0; 111 std::memcpy(output.data(), input.data(), output.size());
111 std::memcpy(output.data(), &params, sizeof(params)); 112 std::memcpy(output2.data(), &params.tcp_mask, output2.size());
113 } else {
114 std::memcpy(output.data(), &params, output.size());
115 }
116
112 return 0; 117 return 0;
113} 118}
114 119
@@ -195,8 +200,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
195 200
196 IoctlGetGpuTime params{}; 201 IoctlGetGpuTime params{};
197 std::memcpy(&params, input.data(), input.size()); 202 std::memcpy(&params, input.data(), input.size());
198 const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); 203 params.gpu_time = static_cast<u64_le>(system.CoreTiming().GetGlobalTimeNs().count());
199 params.gpu_time = static_cast<u64_le>(ns.count());
200 std::memcpy(output.data(), &params, output.size()); 204 std::memcpy(output.data(), &params, output.size());
201 return 0; 205 return 0;
202} 206}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 07b644ec5..ef60f72ce 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -92,16 +92,11 @@ private:
92 "IoctlCharacteristics is incorrect size"); 92 "IoctlCharacteristics is incorrect size");
93 93
94 struct IoctlGpuGetTpcMasksArgs { 94 struct IoctlGpuGetTpcMasksArgs {
95 /// [in] TPC mask buffer size reserved by userspace. Should be at least 95 u32_le mask_buffer_size{};
96 /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC. 96 INSERT_PADDING_WORDS(1);
97 /// [out] full kernel buffer size 97 u64_le mask_buffer_address{};
98 u32_le mask_buf_size; 98 u32_le tcp_mask{};
99 u32_le reserved; 99 INSERT_PADDING_WORDS(1);
100
101 /// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
102 /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
103 u64_le mask_buf_addr;
104 u64_le tpc_mask_size; // Nintendo add this?
105 }; 100 };
106 static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24, 101 static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
107 "IoctlGpuGetTpcMasksArgs is incorrect size"); 102 "IoctlGpuGetTpcMasksArgs is incorrect size");
@@ -166,7 +161,8 @@ private:
166 161
167 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output, 162 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
168 std::vector<u8>& output2, IoctlVersion version); 163 std::vector<u8>& output2, IoctlVersion version);
169 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); 164 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
165 IoctlVersion version);
170 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); 166 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
171 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); 167 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
172 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); 168 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 437bc5dee..2f44d3779 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -9,6 +9,7 @@
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "common/scope_exit.h" 11#include "common/scope_exit.h"
12#include "common/thread.h"
12#include "core/core.h" 13#include "core/core.h"
13#include "core/core_timing.h" 14#include "core/core_timing.h"
14#include "core/core_timing_util.h" 15#include "core/core_timing_util.h"
@@ -27,8 +28,35 @@
27 28
28namespace Service::NVFlinger { 29namespace Service::NVFlinger {
29 30
30constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); 31constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60);
31constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30); 32constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30);
33
34void NVFlinger::VSyncThread(NVFlinger& nv_flinger) {
35 nv_flinger.SplitVSync();
36}
37
38void NVFlinger::SplitVSync() {
39 system.RegisterHostThread();
40 std::string name = "yuzu:VSyncThread";
41 MicroProfileOnThreadCreate(name.c_str());
42 Common::SetCurrentThreadName(name.c_str());
43 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
44 s64 delay = 0;
45 while (is_running) {
46 guard->lock();
47 const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count();
48 Compose();
49 const auto ticks = GetNextTicks();
50 const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count();
51 const s64 time_passed = time_end - time_start;
52 const s64 next_time = std::max<s64>(0, ticks - time_passed - delay);
53 guard->unlock();
54 if (next_time > 0) {
55 wait_event->WaitFor(std::chrono::nanoseconds{next_time});
56 }
57 delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time;
58 }
59}
32 60
33NVFlinger::NVFlinger(Core::System& system) : system(system) { 61NVFlinger::NVFlinger(Core::System& system) : system(system) {
34 displays.emplace_back(0, "Default", system); 62 displays.emplace_back(0, "Default", system);
@@ -36,22 +64,36 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
36 displays.emplace_back(2, "Edid", system); 64 displays.emplace_back(2, "Edid", system);
37 displays.emplace_back(3, "Internal", system); 65 displays.emplace_back(3, "Internal", system);
38 displays.emplace_back(4, "Null", system); 66 displays.emplace_back(4, "Null", system);
67 guard = std::make_shared<std::mutex>();
39 68
40 // Schedule the screen composition events 69 // Schedule the screen composition events
41 composition_event = 70 composition_event =
42 Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) { 71 Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) {
72 Lock();
43 Compose(); 73 Compose();
44 const auto ticks = 74 const auto ticks = GetNextTicks();
45 Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); 75 this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late),
46 this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late),
47 composition_event); 76 composition_event);
48 }); 77 });
49 78 if (system.IsMulticore()) {
50 system.CoreTiming().ScheduleEvent(frame_ticks, composition_event); 79 is_running = true;
80 wait_event = std::make_unique<Common::Event>();
81 vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this));
82 } else {
83 system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
84 }
51} 85}
52 86
53NVFlinger::~NVFlinger() { 87NVFlinger::~NVFlinger() {
54 system.CoreTiming().UnscheduleEvent(composition_event, 0); 88 if (system.IsMulticore()) {
89 is_running = false;
90 wait_event->Set();
91 vsync_thread->join();
92 vsync_thread.reset();
93 wait_event.reset();
94 } else {
95 system.CoreTiming().UnscheduleEvent(composition_event, 0);
96 }
55} 97}
56 98
57void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { 99void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
@@ -199,10 +241,12 @@ void NVFlinger::Compose() {
199 241
200 auto& gpu = system.GPU(); 242 auto& gpu = system.GPU();
201 const auto& multi_fence = buffer->get().multi_fence; 243 const auto& multi_fence = buffer->get().multi_fence;
244 guard->unlock();
202 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { 245 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
203 const auto& fence = multi_fence.fences[fence_id]; 246 const auto& fence = multi_fence.fences[fence_id];
204 gpu.WaitFence(fence.id, fence.value); 247 gpu.WaitFence(fence.id, fence.value);
205 } 248 }
249 guard->lock();
206 250
207 MicroProfileFlip(); 251 MicroProfileFlip();
208 252
@@ -223,7 +267,7 @@ void NVFlinger::Compose() {
223 267
224s64 NVFlinger::GetNextTicks() const { 268s64 NVFlinger::GetNextTicks() const {
225 constexpr s64 max_hertz = 120LL; 269 constexpr s64 max_hertz = 120LL;
226 return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; 270 return (1000000000 * (1LL << swap_interval)) / max_hertz;
227} 271}
228 272
229} // namespace Service::NVFlinger 273} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 57a21f33b..e4959a9af 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,15 +4,22 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <memory> 8#include <memory>
9#include <mutex>
8#include <optional> 10#include <optional>
9#include <string> 11#include <string>
10#include <string_view> 12#include <string_view>
13#include <thread>
11#include <vector> 14#include <vector>
12 15
13#include "common/common_types.h" 16#include "common/common_types.h"
14#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
15 18
19namespace Common {
20class Event;
21} // namespace Common
22
16namespace Core::Timing { 23namespace Core::Timing {
17class CoreTiming; 24class CoreTiming;
18struct EventType; 25struct EventType;
@@ -79,6 +86,10 @@ public:
79 86
80 s64 GetNextTicks() const; 87 s64 GetNextTicks() const;
81 88
89 std::unique_lock<std::mutex> Lock() {
90 return std::unique_lock{*guard};
91 }
92
82private: 93private:
83 /// Finds the display identified by the specified ID. 94 /// Finds the display identified by the specified ID.
84 VI::Display* FindDisplay(u64 display_id); 95 VI::Display* FindDisplay(u64 display_id);
@@ -92,6 +103,10 @@ private:
92 /// Finds the layer identified by the specified ID in the desired display. 103 /// Finds the layer identified by the specified ID in the desired display.
93 const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const; 104 const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
94 105
106 static void VSyncThread(NVFlinger& nv_flinger);
107
108 void SplitVSync();
109
95 std::shared_ptr<Nvidia::Module> nvdrv; 110 std::shared_ptr<Nvidia::Module> nvdrv;
96 111
97 std::vector<VI::Display> displays; 112 std::vector<VI::Display> displays;
@@ -108,7 +123,13 @@ private:
108 /// Event that handles screen composition. 123 /// Event that handles screen composition.
109 std::shared_ptr<Core::Timing::EventType> composition_event; 124 std::shared_ptr<Core::Timing::EventType> composition_event;
110 125
126 std::shared_ptr<std::mutex> guard;
127
111 Core::System& system; 128 Core::System& system;
129
130 std::unique_ptr<std::thread> vsync_thread;
131 std::unique_ptr<Common::Event> wait_event;
132 std::atomic<bool> is_running{};
112}; 133};
113 134
114} // namespace Service::NVFlinger 135} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 6ada13be4..d872de16c 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -142,7 +142,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
142 } 142 }
143 143
144 // Wake the threads waiting on the ServerPort 144 // Wake the threads waiting on the ServerPort
145 server_port->WakeupAllWaitingThreads(); 145 server_port->Signal();
146 146
147 LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId()); 147 LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());
148 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 148 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp
index 1575f0b49..59a272f4a 100644
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -11,9 +11,8 @@
11namespace Service::Time::Clock { 11namespace Service::Time::Clock {
12 12
13TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { 13TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
14 const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( 14 const TimeSpanType ticks_time_span{
15 Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), 15 TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
16 Core::Hardware::CNTFREQ)};
17 TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; 16 TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};
18 17
19 if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { 18 if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
index 44d5bc651..8baaa2a6a 100644
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -11,9 +11,8 @@
11namespace Service::Time::Clock { 11namespace Service::Time::Clock {
12 12
13SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { 13SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
14 const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( 14 const TimeSpanType ticks_time_span{
15 Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), 15 TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
16 Core::Hardware::CNTFREQ)};
17 16
18 return {ticks_time_span.ToSeconds(), GetClockSourceId()}; 17 return {ticks_time_span.ToSeconds(), GetClockSourceId()};
19} 18}
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 67f1bbcf3..4cf58a61a 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -234,9 +234,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
234 const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; 234 const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};
235 235
236 if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { 236 if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
237 const auto ticks{Clock::TimeSpanType::FromTicks( 237 const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(),
238 Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), 238 Core::Hardware::CNTFREQ)};
239 Core::Hardware::CNTFREQ)};
240 const s64 base_time_point{context.offset + current_time_point.time_point - 239 const s64 base_time_point{context.offset + current_time_point.time_point -
241 ticks.ToSeconds()}; 240 ticks.ToSeconds()};
242 IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; 241 IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index 999ec1e51..e0ae9f874 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -30,8 +30,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,
30 const Common::UUID& clock_source_id, 30 const Common::UUID& clock_source_id,
31 Clock::TimeSpanType current_time_point) { 31 Clock::TimeSpanType current_time_point) {
32 const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( 32 const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
33 Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), 33 system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
34 Core::Hardware::CNTFREQ)};
35 const Clock::SteadyClockContext context{ 34 const Clock::SteadyClockContext context{
36 static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), 35 static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
37 clock_source_id}; 36 clock_source_id};
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 46e14c2a3..157092074 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -511,6 +511,7 @@ private:
511 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 511 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
512 static_cast<u32>(transaction), flags); 512 static_cast<u32>(transaction), flags);
513 513
514 nv_flinger->Lock();
514 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 515 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
515 516
516 switch (transaction) { 517 switch (transaction) {
@@ -550,6 +551,7 @@ private:
550 [=](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, 551 [=](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
551 Kernel::ThreadWakeupReason reason) { 552 Kernel::ThreadWakeupReason reason) {
552 // Repeat TransactParcel DequeueBuffer when a buffer is available 553 // Repeat TransactParcel DequeueBuffer when a buffer is available
554 nv_flinger->Lock();
553 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 555 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
554 auto result = buffer_queue.DequeueBuffer(width, height); 556 auto result = buffer_queue.DequeueBuffer(width, height);
555 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); 557 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 9d87045a0..7def00768 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -8,6 +8,7 @@
8#include <utility> 8#include <utility>
9 9
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/atomic_ops.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "common/logging/log.h" 13#include "common/logging/log.h"
13#include "common/page_table.h" 14#include "common/page_table.h"
@@ -29,15 +30,12 @@ namespace Core::Memory {
29struct Memory::Impl { 30struct Memory::Impl {
30 explicit Impl(Core::System& system_) : system{system_} {} 31 explicit Impl(Core::System& system_) : system{system_} {}
31 32
32 void SetCurrentPageTable(Kernel::Process& process) { 33 void SetCurrentPageTable(Kernel::Process& process, u32 core_id) {
33 current_page_table = &process.PageTable().PageTableImpl(); 34 current_page_table = &process.PageTable().PageTableImpl();
34 35
35 const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); 36 const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth();
36 37
37 system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); 38 system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width);
38 system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width);
39 system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width);
40 system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
41 } 39 }
42 40
43 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { 41 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
@@ -179,6 +177,22 @@ struct Memory::Impl {
179 } 177 }
180 } 178 }
181 179
180 bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) {
181 return WriteExclusive<u8>(addr, data, expected);
182 }
183
184 bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) {
185 return WriteExclusive<u16_le>(addr, data, expected);
186 }
187
188 bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) {
189 return WriteExclusive<u32_le>(addr, data, expected);
190 }
191
192 bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) {
193 return WriteExclusive<u64_le>(addr, data, expected);
194 }
195
182 std::string ReadCString(VAddr vaddr, std::size_t max_length) { 196 std::string ReadCString(VAddr vaddr, std::size_t max_length) {
183 std::string string; 197 std::string string;
184 string.reserve(max_length); 198 string.reserve(max_length);
@@ -682,6 +696,67 @@ struct Memory::Impl {
682 } 696 }
683 } 697 }
684 698
699 template <typename T>
700 bool WriteExclusive(const VAddr vaddr, const T data, const T expected) {
701 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
702 if (page_pointer != nullptr) {
703 // NOTE: Avoid adding any extra logic to this fast-path block
704 T volatile* pointer = reinterpret_cast<T volatile*>(&page_pointer[vaddr]);
705 return Common::AtomicCompareAndSwap(pointer, data, expected);
706 }
707
708 const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
709 switch (type) {
710 case Common::PageType::Unmapped:
711 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
712 static_cast<u32>(data), vaddr);
713 return true;
714 case Common::PageType::Memory:
715 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
716 break;
717 case Common::PageType::RasterizerCachedMemory: {
718 u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
719 system.GPU().InvalidateRegion(vaddr, sizeof(T));
720 T volatile* pointer = reinterpret_cast<T volatile*>(&host_ptr);
721 return Common::AtomicCompareAndSwap(pointer, data, expected);
722 break;
723 }
724 default:
725 UNREACHABLE();
726 }
727 return true;
728 }
729
730 bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) {
731 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
732 if (page_pointer != nullptr) {
733 // NOTE: Avoid adding any extra logic to this fast-path block
734 u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&page_pointer[vaddr]);
735 return Common::AtomicCompareAndSwap(pointer, data, expected);
736 }
737
738 const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
739 switch (type) {
740 case Common::PageType::Unmapped:
741 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
742 static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
743 return true;
744 case Common::PageType::Memory:
745 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
746 break;
747 case Common::PageType::RasterizerCachedMemory: {
748 u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
749 system.GPU().InvalidateRegion(vaddr, sizeof(u128));
750 u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&host_ptr);
751 return Common::AtomicCompareAndSwap(pointer, data, expected);
752 break;
753 }
754 default:
755 UNREACHABLE();
756 }
757 return true;
758 }
759
685 Common::PageTable* current_page_table = nullptr; 760 Common::PageTable* current_page_table = nullptr;
686 Core::System& system; 761 Core::System& system;
687}; 762};
@@ -689,8 +764,8 @@ struct Memory::Impl {
689Memory::Memory(Core::System& system) : impl{std::make_unique<Impl>(system)} {} 764Memory::Memory(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
690Memory::~Memory() = default; 765Memory::~Memory() = default;
691 766
692void Memory::SetCurrentPageTable(Kernel::Process& process) { 767void Memory::SetCurrentPageTable(Kernel::Process& process, u32 core_id) {
693 impl->SetCurrentPageTable(process); 768 impl->SetCurrentPageTable(process, core_id);
694} 769}
695 770
696void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { 771void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
@@ -764,6 +839,26 @@ void Memory::Write64(VAddr addr, u64 data) {
764 impl->Write64(addr, data); 839 impl->Write64(addr, data);
765} 840}
766 841
842bool Memory::WriteExclusive8(VAddr addr, u8 data, u8 expected) {
843 return impl->WriteExclusive8(addr, data, expected);
844}
845
846bool Memory::WriteExclusive16(VAddr addr, u16 data, u16 expected) {
847 return impl->WriteExclusive16(addr, data, expected);
848}
849
850bool Memory::WriteExclusive32(VAddr addr, u32 data, u32 expected) {
851 return impl->WriteExclusive32(addr, data, expected);
852}
853
854bool Memory::WriteExclusive64(VAddr addr, u64 data, u64 expected) {
855 return impl->WriteExclusive64(addr, data, expected);
856}
857
858bool Memory::WriteExclusive128(VAddr addr, u128 data, u128 expected) {
859 return impl->WriteExclusive128(addr, data, expected);
860}
861
767std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) { 862std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {
768 return impl->ReadCString(vaddr, max_length); 863 return impl->ReadCString(vaddr, max_length);
769} 864}
diff --git a/src/core/memory.h b/src/core/memory.h
index 9292f3b0a..4a1cc63f4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -64,7 +64,7 @@ public:
64 * 64 *
65 * @param process The process to use the page table of. 65 * @param process The process to use the page table of.
66 */ 66 */
67 void SetCurrentPageTable(Kernel::Process& process); 67 void SetCurrentPageTable(Kernel::Process& process, u32 core_id);
68 68
69 /** 69 /**
70 * Maps an allocated buffer onto a region of the emulated process address space. 70 * Maps an allocated buffer onto a region of the emulated process address space.
@@ -245,6 +245,71 @@ public:
245 void Write64(VAddr addr, u64 data); 245 void Write64(VAddr addr, u64 data);
246 246
247 /** 247 /**
248 * Writes a 8-bit unsigned integer to the given virtual address in
249 * the current process' address space if and only if the address contains
250 * the expected value. This operation is atomic.
251 *
252 * @param addr The virtual address to write the 8-bit unsigned integer to.
253 * @param data The 8-bit unsigned integer to write to the given virtual address.
254 * @param expected The 8-bit unsigned integer to check against the given virtual address.
255 *
256 * @post The memory range [addr, sizeof(data)) contains the given data value.
257 */
258 bool WriteExclusive8(VAddr addr, u8 data, u8 expected);
259
260 /**
261 * Writes a 16-bit unsigned integer to the given virtual address in
262 * the current process' address space if and only if the address contains
263 * the expected value. This operation is atomic.
264 *
265 * @param addr The virtual address to write the 16-bit unsigned integer to.
266 * @param data The 16-bit unsigned integer to write to the given virtual address.
267 * @param expected The 16-bit unsigned integer to check against the given virtual address.
268 *
269 * @post The memory range [addr, sizeof(data)) contains the given data value.
270 */
271 bool WriteExclusive16(VAddr addr, u16 data, u16 expected);
272
273 /**
274 * Writes a 32-bit unsigned integer to the given virtual address in
275 * the current process' address space if and only if the address contains
276 * the expected value. This operation is atomic.
277 *
278 * @param addr The virtual address to write the 32-bit unsigned integer to.
279 * @param data The 32-bit unsigned integer to write to the given virtual address.
280 * @param expected The 32-bit unsigned integer to check against the given virtual address.
281 *
282 * @post The memory range [addr, sizeof(data)) contains the given data value.
283 */
284 bool WriteExclusive32(VAddr addr, u32 data, u32 expected);
285
286 /**
287 * Writes a 64-bit unsigned integer to the given virtual address in
288 * the current process' address space if and only if the address contains
289 * the expected value. This operation is atomic.
290 *
291 * @param addr The virtual address to write the 64-bit unsigned integer to.
292 * @param data The 64-bit unsigned integer to write to the given virtual address.
293 * @param expected The 64-bit unsigned integer to check against the given virtual address.
294 *
295 * @post The memory range [addr, sizeof(data)) contains the given data value.
296 */
297 bool WriteExclusive64(VAddr addr, u64 data, u64 expected);
298
299 /**
300 * Writes a 128-bit unsigned integer to the given virtual address in
301 * the current process' address space if and only if the address contains
302 * the expected value. This operation is atomic.
303 *
304 * @param addr The virtual address to write the 128-bit unsigned integer to.
305 * @param data The 128-bit unsigned integer to write to the given virtual address.
306 * @param expected The 128-bit unsigned integer to check against the given virtual address.
307 *
308 * @post The memory range [addr, sizeof(data)) contains the given data value.
309 */
310 bool WriteExclusive128(VAddr addr, u128 data, u128 expected);
311
312 /**
248 * Reads a null-terminated string from the given virtual address. 313 * Reads a null-terminated string from the given virtual address.
249 * This function will continually read characters until either: 314 * This function will continually read characters until either:
250 * 315 *
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp
index b139e8465..53d27859b 100644
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -20,7 +20,7 @@
20 20
21namespace Core::Memory { 21namespace Core::Memory {
22 22
23constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12); 23constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(1000000000 / 12);
24constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; 24constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
25 25
26StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) 26StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
@@ -190,7 +190,7 @@ CheatEngine::~CheatEngine() {
190void CheatEngine::Initialize() { 190void CheatEngine::Initialize() {
191 event = Core::Timing::CreateEvent( 191 event = Core::Timing::CreateEvent(
192 "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id), 192 "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id),
193 [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); 193 [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });
194 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event); 194 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
195 195
196 metadata.process_id = system.CurrentProcess()->GetProcessID(); 196 metadata.process_id = system.CurrentProcess()->GetProcessID();
@@ -217,7 +217,7 @@ void CheatEngine::Reload(std::vector<CheatEntry> cheats) {
217 217
218MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70)); 218MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
219 219
220void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { 220void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) {
221 if (is_pending_reload.exchange(false)) { 221 if (is_pending_reload.exchange(false)) {
222 vm.LoadProgram(cheats); 222 vm.LoadProgram(cheats);
223 } 223 }
@@ -230,7 +230,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
230 230
231 vm.Execute(metadata); 231 vm.Execute(metadata);
232 232
233 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event); 233 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event);
234} 234}
235 235
236} // namespace Core::Memory 236} // namespace Core::Memory
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index f1ae9d4df..9f3a6b811 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -119,7 +119,7 @@ double PerfStats::GetLastFrameTimeScale() {
119} 119}
120 120
121void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) { 121void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
122 if (!Settings::values.use_frame_limit) { 122 if (!Settings::values.use_frame_limit || Settings::values.use_multi_core) {
123 return; 123 return;
124 } 124 }
125 125
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 4edff9cd8..56df5e925 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -127,6 +127,13 @@ void LogSettings() {
127 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); 127 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
128} 128}
129 129
130float Volume() {
131 if (values.audio_muted) {
132 return 0.0f;
133 }
134 return values.volume;
135}
136
130bool IsGPULevelExtreme() { 137bool IsGPULevelExtreme() {
131 return values.gpu_accuracy == GPUAccuracy::Extreme; 138 return values.gpu_accuracy == GPUAccuracy::Extreme;
132} 139}
diff --git a/src/core/settings.h b/src/core/settings.h
index 36cd66fd4..a598ccbc1 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -437,7 +437,7 @@ struct Values {
437 bool renderer_debug; 437 bool renderer_debug;
438 int vulkan_device; 438 int vulkan_device;
439 439
440 float resolution_factor; 440 u16 resolution_factor{1};
441 int aspect_ratio; 441 int aspect_ratio;
442 int max_anisotropy; 442 int max_anisotropy;
443 bool use_frame_limit; 443 bool use_frame_limit;
@@ -459,6 +459,7 @@ struct Values {
459 bool use_dev_keys; 459 bool use_dev_keys;
460 460
461 // Audio 461 // Audio
462 bool audio_muted;
462 std::string sink_id; 463 std::string sink_id;
463 bool enable_audio_stretching; 464 bool enable_audio_stretching;
464 std::string audio_device_id; 465 std::string audio_device_id;
@@ -490,6 +491,8 @@ struct Values {
490 std::map<u64, std::vector<std::string>> disabled_addons; 491 std::map<u64, std::vector<std::string>> disabled_addons;
491} extern values; 492} extern values;
492 493
494float Volume();
495
493bool IsGPULevelExtreme(); 496bool IsGPULevelExtreme();
494bool IsGPULevelHigh(); 497bool IsGPULevelHigh();
495 498
diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp
index b2c6c537e..8b0c50d11 100644
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -14,7 +14,7 @@
14namespace Tools { 14namespace Tools {
15namespace { 15namespace {
16 16
17constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); 17constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(1000000000 / 60);
18 18
19u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) { 19u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) {
20 switch (width) { 20 switch (width) {
@@ -57,7 +57,7 @@ Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& m
57 : core_timing{core_timing_}, memory{memory_} { 57 : core_timing{core_timing_}, memory{memory_} {
58 event = Core::Timing::CreateEvent( 58 event = Core::Timing::CreateEvent(
59 "MemoryFreezer::FrameCallback", 59 "MemoryFreezer::FrameCallback",
60 [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); 60 [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });
61 core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event); 61 core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event);
62} 62}
63 63
@@ -158,7 +158,7 @@ std::vector<Freezer::Entry> Freezer::GetEntries() const {
158 return entries; 158 return entries;
159} 159}
160 160
161void Freezer::FrameCallback(u64 userdata, s64 cycles_late) { 161void Freezer::FrameCallback(u64 userdata, s64 ns_late) {
162 if (!IsActive()) { 162 if (!IsActive()) {
163 LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events."); 163 LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events.");
164 return; 164 return;
@@ -173,7 +173,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {
173 MemoryWriteWidth(memory, entry.width, entry.address, entry.value); 173 MemoryWriteWidth(memory, entry.width, entry.address, entry.value);
174 } 174 }
175 175
176 core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event); 176 core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event);
177} 177}
178 178
179void Freezer::FillEntryReads() { 179void Freezer::FillEntryReads() {
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 078374be5..afb8e6612 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -76,7 +76,7 @@ std::unique_ptr<Input::ButtonDevice> Keyboard::Create(const Common::ParamPackage
76 int key_code = params.Get("code", 0); 76 int key_code = params.Get("code", 0);
77 std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list); 77 std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list);
78 key_button_list->AddKeyButton(key_code, button.get()); 78 key_button_list->AddKeyButton(key_code, button.get());
79 return std::move(button); 79 return button;
80} 80}
81 81
82void Keyboard::PressKey(int key_code) { 82void Keyboard::PressKey(int key_code) {
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 868251628..d4cdf76a3 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -145,7 +145,7 @@ std::unique_ptr<Input::MotionDevice> MotionEmu::Create(const Common::ParamPackag
145 // Previously created device is disconnected here. Having two motion devices for 3DS is not 145 // Previously created device is disconnected here. Having two motion devices for 3DS is not
146 // expected. 146 // expected.
147 current_device = device_wrapper->device; 147 current_device = device_wrapper->device;
148 return std::move(device_wrapper); 148 return device_wrapper;
149} 149}
150 150
151void MotionEmu::BeginTilt(int x, int y) { 151void MotionEmu::BeginTilt(int x, int y) {
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index c7038b217..47ef30aa9 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,6 +1,7 @@
1add_executable(tests 1add_executable(tests
2 common/bit_field.cpp 2 common/bit_field.cpp
3 common/bit_utils.cpp 3 common/bit_utils.cpp
4 common/fibers.cpp
4 common/multi_level_queue.cpp 5 common/multi_level_queue.cpp
5 common/param_package.cpp 6 common/param_package.cpp
6 common/ring_buffer.cpp 7 common/ring_buffer.cpp
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
new file mode 100644
index 000000000..4fd92428f
--- /dev/null
+++ b/src/tests/common/fibers.cpp
@@ -0,0 +1,358 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <atomic>
6#include <cstdlib>
7#include <functional>
8#include <memory>
9#include <thread>
10#include <unordered_map>
11#include <vector>
12
13#include <catch2/catch.hpp>
14#include <math.h>
15#include "common/common_types.h"
16#include "common/fiber.h"
17#include "common/spin_lock.h"
18
19namespace Common {
20
21class TestControl1 {
22public:
23 TestControl1() = default;
24
25 void DoWork();
26
27 void ExecuteThread(u32 id);
28
29 std::unordered_map<std::thread::id, u32> ids;
30 std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
31 std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
32 std::vector<u32> items;
33 std::vector<u32> results;
34};
35
36static void WorkControl1(void* control) {
37 auto* test_control = static_cast<TestControl1*>(control);
38 test_control->DoWork();
39}
40
41void TestControl1::DoWork() {
42 std::thread::id this_id = std::this_thread::get_id();
43 u32 id = ids[this_id];
44 u32 value = items[id];
45 for (u32 i = 0; i < id; i++) {
46 value++;
47 }
48 results[id] = value;
49 Fiber::YieldTo(work_fibers[id], thread_fibers[id]);
50}
51
52void TestControl1::ExecuteThread(u32 id) {
53 std::thread::id this_id = std::this_thread::get_id();
54 ids[this_id] = id;
55 auto thread_fiber = Fiber::ThreadToFiber();
56 thread_fibers[id] = thread_fiber;
57 work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
58 items[id] = rand() % 256;
59 Fiber::YieldTo(thread_fibers[id], work_fibers[id]);
60 thread_fibers[id]->Exit();
61}
62
63static void ThreadStart1(u32 id, TestControl1& test_control) {
64 test_control.ExecuteThread(id);
65}
66
67/** This test checks for fiber setup configuration and validates that fibers are
68 * doing all the work required.
69 */
70TEST_CASE("Fibers::Setup", "[common]") {
71 constexpr std::size_t num_threads = 7;
72 TestControl1 test_control{};
73 test_control.thread_fibers.resize(num_threads);
74 test_control.work_fibers.resize(num_threads);
75 test_control.items.resize(num_threads, 0);
76 test_control.results.resize(num_threads, 0);
77 std::vector<std::thread> threads;
78 for (u32 i = 0; i < num_threads; i++) {
79 threads.emplace_back(ThreadStart1, i, std::ref(test_control));
80 }
81 for (u32 i = 0; i < num_threads; i++) {
82 threads[i].join();
83 }
84 for (u32 i = 0; i < num_threads; i++) {
85 REQUIRE(test_control.items[i] + i == test_control.results[i]);
86 }
87}
88
89class TestControl2 {
90public:
91 TestControl2() = default;
92
93 void DoWork1() {
94 trap2 = false;
95 while (trap.load())
96 ;
97 for (u32 i = 0; i < 12000; i++) {
98 value1 += i;
99 }
100 Fiber::YieldTo(fiber1, fiber3);
101 std::thread::id this_id = std::this_thread::get_id();
102 u32 id = ids[this_id];
103 assert1 = id == 1;
104 value2 += 5000;
105 Fiber::YieldTo(fiber1, thread_fibers[id]);
106 }
107
108 void DoWork2() {
109 while (trap2.load())
110 ;
111 value2 = 2000;
112 trap = false;
113 Fiber::YieldTo(fiber2, fiber1);
114 assert3 = false;
115 }
116
117 void DoWork3() {
118 std::thread::id this_id = std::this_thread::get_id();
119 u32 id = ids[this_id];
120 assert2 = id == 0;
121 value1 += 1000;
122 Fiber::YieldTo(fiber3, thread_fibers[id]);
123 }
124
125 void ExecuteThread(u32 id);
126
127 void CallFiber1() {
128 std::thread::id this_id = std::this_thread::get_id();
129 u32 id = ids[this_id];
130 Fiber::YieldTo(thread_fibers[id], fiber1);
131 }
132
133 void CallFiber2() {
134 std::thread::id this_id = std::this_thread::get_id();
135 u32 id = ids[this_id];
136 Fiber::YieldTo(thread_fibers[id], fiber2);
137 }
138
139 void Exit();
140
141 bool assert1{};
142 bool assert2{};
143 bool assert3{true};
144 u32 value1{};
145 u32 value2{};
146 std::atomic<bool> trap{true};
147 std::atomic<bool> trap2{true};
148 std::unordered_map<std::thread::id, u32> ids;
149 std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
150 std::shared_ptr<Common::Fiber> fiber1;
151 std::shared_ptr<Common::Fiber> fiber2;
152 std::shared_ptr<Common::Fiber> fiber3;
153};
154
155static void WorkControl2_1(void* control) {
156 auto* test_control = static_cast<TestControl2*>(control);
157 test_control->DoWork1();
158}
159
160static void WorkControl2_2(void* control) {
161 auto* test_control = static_cast<TestControl2*>(control);
162 test_control->DoWork2();
163}
164
165static void WorkControl2_3(void* control) {
166 auto* test_control = static_cast<TestControl2*>(control);
167 test_control->DoWork3();
168}
169
170void TestControl2::ExecuteThread(u32 id) {
171 std::thread::id this_id = std::this_thread::get_id();
172 ids[this_id] = id;
173 auto thread_fiber = Fiber::ThreadToFiber();
174 thread_fibers[id] = thread_fiber;
175}
176
177void TestControl2::Exit() {
178 std::thread::id this_id = std::this_thread::get_id();
179 u32 id = ids[this_id];
180 thread_fibers[id]->Exit();
181}
182
183static void ThreadStart2_1(u32 id, TestControl2& test_control) {
184 test_control.ExecuteThread(id);
185 test_control.CallFiber1();
186 test_control.Exit();
187}
188
189static void ThreadStart2_2(u32 id, TestControl2& test_control) {
190 test_control.ExecuteThread(id);
191 test_control.CallFiber2();
192 test_control.Exit();
193}
194
195/** This test checks for fiber thread exchange configuration and validates that fibers are
196 * that a fiber has been succesfully transfered from one thread to another and that the TLS
197 * region of the thread is kept while changing fibers.
198 */
199TEST_CASE("Fibers::InterExchange", "[common]") {
200 TestControl2 test_control{};
201 test_control.thread_fibers.resize(2);
202 test_control.fiber1 =
203 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
204 test_control.fiber2 =
205 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
206 test_control.fiber3 =
207 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
208 std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
209 std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
210 thread1.join();
211 thread2.join();
212 REQUIRE(test_control.assert1);
213 REQUIRE(test_control.assert2);
214 REQUIRE(test_control.assert3);
215 REQUIRE(test_control.value2 == 7000);
216 u32 cal_value = 0;
217 for (u32 i = 0; i < 12000; i++) {
218 cal_value += i;
219 }
220 cal_value += 1000;
221 REQUIRE(test_control.value1 == cal_value);
222}
223
224class TestControl3 {
225public:
226 TestControl3() = default;
227
228 void DoWork1() {
229 value1 += 1;
230 Fiber::YieldTo(fiber1, fiber2);
231 std::thread::id this_id = std::this_thread::get_id();
232 u32 id = ids[this_id];
233 value3 += 1;
234 Fiber::YieldTo(fiber1, thread_fibers[id]);
235 }
236
237 void DoWork2() {
238 value2 += 1;
239 std::thread::id this_id = std::this_thread::get_id();
240 u32 id = ids[this_id];
241 Fiber::YieldTo(fiber2, thread_fibers[id]);
242 }
243
244 void ExecuteThread(u32 id);
245
246 void CallFiber1() {
247 std::thread::id this_id = std::this_thread::get_id();
248 u32 id = ids[this_id];
249 Fiber::YieldTo(thread_fibers[id], fiber1);
250 }
251
252 void Exit();
253
254 u32 value1{};
255 u32 value2{};
256 u32 value3{};
257 std::unordered_map<std::thread::id, u32> ids;
258 std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
259 std::shared_ptr<Common::Fiber> fiber1;
260 std::shared_ptr<Common::Fiber> fiber2;
261};
262
263static void WorkControl3_1(void* control) {
264 auto* test_control = static_cast<TestControl3*>(control);
265 test_control->DoWork1();
266}
267
268static void WorkControl3_2(void* control) {
269 auto* test_control = static_cast<TestControl3*>(control);
270 test_control->DoWork2();
271}
272
273void TestControl3::ExecuteThread(u32 id) {
274 std::thread::id this_id = std::this_thread::get_id();
275 ids[this_id] = id;
276 auto thread_fiber = Fiber::ThreadToFiber();
277 thread_fibers[id] = thread_fiber;
278}
279
280void TestControl3::Exit() {
281 std::thread::id this_id = std::this_thread::get_id();
282 u32 id = ids[this_id];
283 thread_fibers[id]->Exit();
284}
285
286static void ThreadStart3(u32 id, TestControl3& test_control) {
287 test_control.ExecuteThread(id);
288 test_control.CallFiber1();
289 test_control.Exit();
290}
291
292/** This test checks for one two threads racing for starting the same fiber.
293 * It checks execution occured in an ordered manner and by no time there were
294 * two contexts at the same time.
295 */
296TEST_CASE("Fibers::StartRace", "[common]") {
297 TestControl3 test_control{};
298 test_control.thread_fibers.resize(2);
299 test_control.fiber1 =
300 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
301 test_control.fiber2 =
302 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
303 std::thread thread1(ThreadStart3, 0, std::ref(test_control));
304 std::thread thread2(ThreadStart3, 1, std::ref(test_control));
305 thread1.join();
306 thread2.join();
307 REQUIRE(test_control.value1 == 1);
308 REQUIRE(test_control.value2 == 1);
309 REQUIRE(test_control.value3 == 1);
310}
311
312class TestControl4;
313
314static void WorkControl4(void* control);
315
316class TestControl4 {
317public:
318 TestControl4() {
319 fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this);
320 goal_reached = false;
321 rewinded = false;
322 }
323
324 void Execute() {
325 thread_fiber = Fiber::ThreadToFiber();
326 Fiber::YieldTo(thread_fiber, fiber1);
327 thread_fiber->Exit();
328 }
329
330 void DoWork() {
331 fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this);
332 if (rewinded) {
333 goal_reached = true;
334 Fiber::YieldTo(fiber1, thread_fiber);
335 }
336 rewinded = true;
337 fiber1->Rewind();
338 }
339
340 std::shared_ptr<Common::Fiber> fiber1;
341 std::shared_ptr<Common::Fiber> thread_fiber;
342 bool goal_reached;
343 bool rewinded;
344};
345
346static void WorkControl4(void* control) {
347 auto* test_control = static_cast<TestControl4*>(control);
348 test_control->DoWork();
349}
350
351TEST_CASE("Fibers::Rewind", "[common]") {
352 TestControl4 test_control{};
353 test_control.Execute();
354 REQUIRE(test_control.goal_reached);
355 REQUIRE(test_control.rewinded);
356}
357
358} // namespace Common
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index ff2d11cc8..e66db1940 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -18,29 +18,26 @@ namespace {
18// Numbers are chosen randomly to make sure the correct one is given. 18// Numbers are chosen randomly to make sure the correct one is given.
19constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; 19constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
20constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals 20constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
21constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
22std::array<s64, 5> delays{};
21 23
22std::bitset<CB_IDS.size()> callbacks_ran_flags; 24std::bitset<CB_IDS.size()> callbacks_ran_flags;
23u64 expected_callback = 0; 25u64 expected_callback = 0;
24s64 lateness = 0;
25 26
26template <unsigned int IDX> 27template <unsigned int IDX>
27void CallbackTemplate(u64 userdata, s64 cycles_late) { 28void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
28 static_assert(IDX < CB_IDS.size(), "IDX out of range"); 29 static_assert(IDX < CB_IDS.size(), "IDX out of range");
29 callbacks_ran_flags.set(IDX); 30 callbacks_ran_flags.set(IDX);
30 REQUIRE(CB_IDS[IDX] == userdata); 31 REQUIRE(CB_IDS[IDX] == userdata);
31 REQUIRE(CB_IDS[IDX] == expected_callback); 32 REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
32 REQUIRE(lateness == cycles_late); 33 delays[IDX] = nanoseconds_late;
33} 34 ++expected_callback;
34
35u64 callbacks_done = 0;
36
37void EmptyCallback(u64 userdata, s64 cycles_late) {
38 ++callbacks_done;
39} 35}
40 36
41struct ScopeInit final { 37struct ScopeInit final {
42 ScopeInit() { 38 ScopeInit() {
43 core_timing.Initialize(); 39 core_timing.SetMulticore(true);
40 core_timing.Initialize([]() {});
44 } 41 }
45 ~ScopeInit() { 42 ~ScopeInit() {
46 core_timing.Shutdown(); 43 core_timing.Shutdown();
@@ -49,110 +46,101 @@ struct ScopeInit final {
49 Core::Timing::CoreTiming core_timing; 46 Core::Timing::CoreTiming core_timing;
50}; 47};
51 48
52void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, 49#pragma optimize("", off)
53 int expected_lateness = 0, int cpu_downcount = 0) {
54 callbacks_ran_flags = 0;
55 expected_callback = CB_IDS[idx];
56 lateness = expected_lateness;
57
58 // Pretend we executed X cycles of instructions.
59 core_timing.SwitchContext(context);
60 core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
61 core_timing.Advance();
62 core_timing.SwitchContext((context + 1) % 4);
63 50
64 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); 51u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) {
52 u64 start = core_timing.GetGlobalTimeNs().count();
53 u64 placebo = 0;
54 for (std::size_t i = 0; i < 1000; i++) {
55 placebo += core_timing.GetGlobalTimeNs().count();
56 }
57 u64 end = core_timing.GetGlobalTimeNs().count();
58 return (end - start);
65} 59}
60
61#pragma optimize("", on)
62
66} // Anonymous namespace 63} // Anonymous namespace
67 64
68TEST_CASE("CoreTiming[BasicOrder]", "[core]") { 65TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
69 ScopeInit guard; 66 ScopeInit guard;
70 auto& core_timing = guard.core_timing; 67 auto& core_timing = guard.core_timing;
68 std::vector<std::shared_ptr<Core::Timing::EventType>> events{
69 Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>),
70 Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>),
71 Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>),
72 Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>),
73 Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>),
74 };
75
76 expected_callback = 0;
77
78 core_timing.SyncPause(true);
79
80 u64 one_micro = 1000U;
81 for (std::size_t i = 0; i < events.size(); i++) {
82 u64 order = calls_order[i];
83 core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
84 }
85 /// test pause
86 REQUIRE(callbacks_ran_flags.none());
71 87
72 std::shared_ptr<Core::Timing::EventType> cb_a = 88 core_timing.Pause(false); // No need to sync
73 Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
74 std::shared_ptr<Core::Timing::EventType> cb_b =
75 Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
76 std::shared_ptr<Core::Timing::EventType> cb_c =
77 Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>);
78 std::shared_ptr<Core::Timing::EventType> cb_d =
79 Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>);
80 std::shared_ptr<Core::Timing::EventType> cb_e =
81 Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>);
82
83 // Enter slice 0
84 core_timing.ResetRun();
85
86 // D -> B -> C -> A -> E
87 core_timing.SwitchContext(0);
88 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
89 REQUIRE(1000 == core_timing.GetDowncount());
90 core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
91 REQUIRE(500 == core_timing.GetDowncount());
92 core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
93 REQUIRE(500 == core_timing.GetDowncount());
94 core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
95 REQUIRE(100 == core_timing.GetDowncount());
96 core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
97 REQUIRE(100 == core_timing.GetDowncount());
98
99 AdvanceAndCheck(core_timing, 3, 0);
100 AdvanceAndCheck(core_timing, 1, 1);
101 AdvanceAndCheck(core_timing, 2, 2);
102 AdvanceAndCheck(core_timing, 0, 3);
103 AdvanceAndCheck(core_timing, 4, 0);
104}
105
106TEST_CASE("CoreTiming[FairSharing]", "[core]") {
107 89
108 ScopeInit guard; 90 while (core_timing.HasPendingEvents())
109 auto& core_timing = guard.core_timing; 91 ;
110 92
111 std::shared_ptr<Core::Timing::EventType> empty_callback = 93 REQUIRE(callbacks_ran_flags.all());
112 Core::Timing::CreateEvent("empty_callback", EmptyCallback);
113 94
114 callbacks_done = 0; 95 for (std::size_t i = 0; i < delays.size(); i++) {
115 u64 MAX_CALLBACKS = 10; 96 const double delay = static_cast<double>(delays[i]);
116 for (std::size_t i = 0; i < 10; i++) { 97 const double micro = delay / 1000.0f;
117 core_timing.ScheduleEvent(i * 3333U, empty_callback, 0); 98 const double mili = micro / 1000.0f;
99 printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
118 } 100 }
119
120 const s64 advances = MAX_SLICE_LENGTH / 10;
121 core_timing.ResetRun();
122 u64 current_time = core_timing.GetTicks();
123 bool keep_running{};
124 do {
125 keep_running = false;
126 for (u32 active_core = 0; active_core < 4; ++active_core) {
127 core_timing.SwitchContext(active_core);
128 if (core_timing.CanCurrentContextRun()) {
129 core_timing.AddTicks(std::min<s64>(advances, core_timing.GetDowncount()));
130 core_timing.Advance();
131 }
132 keep_running |= core_timing.CanCurrentContextRun();
133 }
134 } while (keep_running);
135 u64 current_time_2 = core_timing.GetTicks();
136
137 REQUIRE(MAX_CALLBACKS == callbacks_done);
138 REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4);
139} 101}
140 102
141TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { 103TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") {
142 ScopeInit guard; 104 ScopeInit guard;
143 auto& core_timing = guard.core_timing; 105 auto& core_timing = guard.core_timing;
106 std::vector<std::shared_ptr<Core::Timing::EventType>> events{
107 Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>),
108 Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>),
109 Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>),
110 Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>),
111 Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>),
112 };
113
114 core_timing.SyncPause(true);
115 core_timing.SyncPause(false);
116
117 expected_callback = 0;
118
119 u64 start = core_timing.GetGlobalTimeNs().count();
120 u64 one_micro = 1000U;
121 for (std::size_t i = 0; i < events.size(); i++) {
122 u64 order = calls_order[i];
123 core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
124 }
125 u64 end = core_timing.GetGlobalTimeNs().count();
126 const double scheduling_time = static_cast<double>(end - start);
127 const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
144 128
145 std::shared_ptr<Core::Timing::EventType> cb_a = 129 while (core_timing.HasPendingEvents())
146 Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); 130 ;
147 std::shared_ptr<Core::Timing::EventType> cb_b =
148 Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
149 131
150 // Enter slice 0 132 REQUIRE(callbacks_ran_flags.all());
151 core_timing.ResetRun();
152 133
153 core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); 134 for (std::size_t i = 0; i < delays.size(); i++) {
154 core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); 135 const double delay = static_cast<double>(delays[i]);
136 const double micro = delay / 1000.0f;
137 const double mili = micro / 1000.0f;
138 printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
139 }
155 140
156 AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10) 141 const double micro = scheduling_time / 1000.0f;
157 AdvanceAndCheck(core_timing, 1, 1, 50, -50); 142 const double mili = micro / 1000.0f;
143 printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
144 printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
145 timer_time / 1000000.f);
158} 146}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2bf8d68ce..21c46a567 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
3 buffer_cache/buffer_cache.h 3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.cpp 4 buffer_cache/map_interval.cpp
5 buffer_cache/map_interval.h 5 buffer_cache/map_interval.h
6 compatible_formats.cpp
7 compatible_formats.h
6 dirty_flags.cpp 8 dirty_flags.cpp
7 dirty_flags.h 9 dirty_flags.h
8 dma_pusher.cpp 10 dma_pusher.cpp
@@ -27,6 +29,8 @@ add_library(video_core STATIC
27 engines/shader_type.h 29 engines/shader_type.h
28 macro/macro.cpp 30 macro/macro.cpp
29 macro/macro.h 31 macro/macro.h
32 macro/macro_hle.cpp
33 macro/macro_hle.h
30 macro/macro_interpreter.cpp 34 macro/macro_interpreter.cpp
31 macro/macro_interpreter.h 35 macro/macro_interpreter.h
32 macro/macro_jit_x64.cpp 36 macro/macro_jit_x64.cpp
@@ -49,11 +53,11 @@ add_library(video_core STATIC
49 query_cache.h 53 query_cache.h
50 rasterizer_accelerated.cpp 54 rasterizer_accelerated.cpp
51 rasterizer_accelerated.h 55 rasterizer_accelerated.h
52 rasterizer_cache.cpp
53 rasterizer_cache.h
54 rasterizer_interface.h 56 rasterizer_interface.h
55 renderer_base.cpp 57 renderer_base.cpp
56 renderer_base.h 58 renderer_base.h
59 renderer_opengl/gl_arb_decompiler.cpp
60 renderer_opengl/gl_arb_decompiler.h
57 renderer_opengl/gl_buffer_cache.cpp 61 renderer_opengl/gl_buffer_cache.cpp
58 renderer_opengl/gl_buffer_cache.h 62 renderer_opengl/gl_buffer_cache.h
59 renderer_opengl/gl_device.cpp 63 renderer_opengl/gl_device.cpp
@@ -93,6 +97,7 @@ add_library(video_core STATIC
93 renderer_opengl/utils.h 97 renderer_opengl/utils.h
94 sampler_cache.cpp 98 sampler_cache.cpp
95 sampler_cache.h 99 sampler_cache.h
100 shader_cache.h
96 shader/decode/arithmetic.cpp 101 shader/decode/arithmetic.cpp
97 shader/decode/arithmetic_immediate.cpp 102 shader/decode/arithmetic_immediate.cpp
98 shader/decode/bfe.cpp 103 shader/decode/bfe.cpp
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e35ee0b67..e64170e66 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,47 @@ namespace VideoCommon {
15 15
16class BufferBlock { 16class BufferBlock {
17public: 17public:
18 bool Overlaps(const VAddr start, const VAddr end) const { 18 bool Overlaps(VAddr start, VAddr end) const {
19 return (cpu_addr < end) && (cpu_addr_end > start); 19 return (cpu_addr < end) && (cpu_addr_end > start);
20 } 20 }
21 21
22 bool IsInside(const VAddr other_start, const VAddr other_end) const { 22 bool IsInside(VAddr other_start, VAddr other_end) const {
23 return cpu_addr <= other_start && other_end <= cpu_addr_end; 23 return cpu_addr <= other_start && other_end <= cpu_addr_end;
24 } 24 }
25 25
26 std::size_t GetOffset(const VAddr in_addr) { 26 std::size_t Offset(VAddr in_addr) const {
27 return static_cast<std::size_t>(in_addr - cpu_addr); 27 return static_cast<std::size_t>(in_addr - cpu_addr);
28 } 28 }
29 29
30 VAddr GetCpuAddr() const { 30 VAddr CpuAddr() const {
31 return cpu_addr; 31 return cpu_addr;
32 } 32 }
33 33
34 VAddr GetCpuAddrEnd() const { 34 VAddr CpuAddrEnd() const {
35 return cpu_addr_end; 35 return cpu_addr_end;
36 } 36 }
37 37
38 void SetCpuAddr(const VAddr new_addr) { 38 void SetCpuAddr(VAddr new_addr) {
39 cpu_addr = new_addr; 39 cpu_addr = new_addr;
40 cpu_addr_end = new_addr + size; 40 cpu_addr_end = new_addr + size;
41 } 41 }
42 42
43 std::size_t GetSize() const { 43 std::size_t Size() const {
44 return size; 44 return size;
45 } 45 }
46 46
47 void SetEpoch(u64 new_epoch) { 47 u64 Epoch() const {
48 epoch = new_epoch; 48 return epoch;
49 } 49 }
50 50
51 u64 GetEpoch() { 51 void SetEpoch(u64 new_epoch) {
52 return epoch; 52 epoch = new_epoch;
53 } 53 }
54 54
55protected: 55protected:
56 explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { 56 explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
57 SetCpuAddr(cpu_addr); 57 SetCpuAddr(cpu_addr_);
58 } 58 }
59 ~BufferBlock() = default;
60 59
61private: 60private:
62 VAddr cpu_addr{}; 61 VAddr cpu_addr{};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b88fce2cd..cf8bdd021 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,23 +30,31 @@
30 30
31namespace VideoCommon { 31namespace VideoCommon {
32 32
33template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> 33template <typename Buffer, typename BufferType, typename StreamBuffer>
34class BufferCache { 34class BufferCache {
35 using IntervalSet = boost::icl::interval_set<VAddr>; 35 using IntervalSet = boost::icl::interval_set<VAddr>;
36 using IntervalType = typename IntervalSet::interval_type; 36 using IntervalType = typename IntervalSet::interval_type;
37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; 37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
38 38
39 static constexpr u64 WRITE_PAGE_BIT = 11;
40 static constexpr u64 BLOCK_PAGE_BITS = 21;
41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
42
39public: 43public:
40 using BufferInfo = std::pair<BufferType, u64>; 44 struct BufferInfo {
45 BufferType handle;
46 u64 offset;
47 u64 address;
48 };
41 49
42 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 50 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
43 bool is_written = false, bool use_fast_cbuf = false) { 51 bool is_written = false, bool use_fast_cbuf = false) {
44 std::lock_guard lock{mutex}; 52 std::lock_guard lock{mutex};
45 53
46 const auto& memory_manager = system.GPU().MemoryManager(); 54 auto& memory_manager = system.GPU().MemoryManager();
47 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); 55 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
48 if (!cpu_addr_opt) { 56 if (!cpu_addr_opt) {
49 return {GetEmptyBuffer(size), 0}; 57 return GetEmptyBuffer(size);
50 } 58 }
51 const VAddr cpu_addr = *cpu_addr_opt; 59 const VAddr cpu_addr = *cpu_addr_opt;
52 60
@@ -55,7 +63,6 @@ public:
55 constexpr std::size_t max_stream_size = 0x800; 63 constexpr std::size_t max_stream_size = 0x800;
56 if (use_fast_cbuf || size < max_stream_size) { 64 if (use_fast_cbuf || size < max_stream_size) {
57 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { 65 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
58 auto& memory_manager = system.GPU().MemoryManager();
59 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); 66 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
60 if (use_fast_cbuf) { 67 if (use_fast_cbuf) {
61 u8* dest; 68 u8* dest;
@@ -82,10 +89,10 @@ public:
82 } 89 }
83 } 90 }
84 91
85 OwnerBuffer block = GetBlock(cpu_addr, size); 92 Buffer* const block = GetBlock(cpu_addr, size);
86 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); 93 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
87 if (!map) { 94 if (!map) {
88 return {GetEmptyBuffer(size), 0}; 95 return GetEmptyBuffer(size);
89 } 96 }
90 if (is_written) { 97 if (is_written) {
91 map->MarkAsModified(true, GetModifiedTicks()); 98 map->MarkAsModified(true, GetModifiedTicks());
@@ -98,7 +105,7 @@ public:
98 } 105 }
99 } 106 }
100 107
101 return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; 108 return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
102 } 109 }
103 110
104 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. 111 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -110,31 +117,37 @@ public:
110 }); 117 });
111 } 118 }
112 119
113 void Map(std::size_t max_size) { 120 /// Prepares the buffer cache for data uploading
121 /// @param max_size Maximum number of bytes that will be uploaded
122 /// @return True when a stream buffer invalidation was required, false otherwise
123 bool Map(std::size_t max_size) {
114 std::lock_guard lock{mutex}; 124 std::lock_guard lock{mutex};
115 125
126 bool invalidated;
116 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); 127 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
117 buffer_offset = buffer_offset_base; 128 buffer_offset = buffer_offset_base;
129
130 return invalidated;
118 } 131 }
119 132
120 /// Finishes the upload stream, returns true on bindings invalidation. 133 /// Finishes the upload stream
121 bool Unmap() { 134 void Unmap() {
122 std::lock_guard lock{mutex}; 135 std::lock_guard lock{mutex};
123
124 stream_buffer->Unmap(buffer_offset - buffer_offset_base); 136 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
125 return std::exchange(invalidated, false);
126 } 137 }
127 138
139 /// Function called at the end of each frame, inteded for deferred operations
128 void TickFrame() { 140 void TickFrame() {
129 ++epoch; 141 ++epoch;
142
130 while (!pending_destruction.empty()) { 143 while (!pending_destruction.empty()) {
131 // Delay at least 4 frames before destruction. 144 // Delay at least 4 frames before destruction.
132 // This is due to triple buffering happening on some drivers. 145 // This is due to triple buffering happening on some drivers.
133 static constexpr u64 epochs_to_destroy = 5; 146 static constexpr u64 epochs_to_destroy = 5;
134 if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { 147 if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
135 break; 148 break;
136 } 149 }
137 pending_destruction.pop_front(); 150 pending_destruction.pop();
138 } 151 }
139 } 152 }
140 153
@@ -245,28 +258,16 @@ public:
245 committed_flushes.pop_front(); 258 committed_flushes.pop_front();
246 } 259 }
247 260
248 virtual BufferType GetEmptyBuffer(std::size_t size) = 0; 261 virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
249 262
250protected: 263protected:
251 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 264 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
252 std::unique_ptr<StreamBuffer> stream_buffer) 265 std::unique_ptr<StreamBuffer> stream_buffer)
253 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, 266 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
254 stream_buffer_handle{this->stream_buffer->GetHandle()} {}
255 267
256 ~BufferCache() = default; 268 ~BufferCache() = default;
257 269
258 virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; 270 virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
259
260 virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
261
262 virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
263 const u8* data) = 0;
264
265 virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
266 u8* data) = 0;
267
268 virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
269 std::size_t dst_offset, std::size_t size) = 0;
270 271
271 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { 272 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
272 return {}; 273 return {};
@@ -321,7 +322,7 @@ protected:
321 } 322 }
322 323
323private: 324private:
324 MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, 325 MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
325 std::size_t size) { 326 std::size_t size) {
326 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); 327 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
327 if (overlaps.empty()) { 328 if (overlaps.empty()) {
@@ -329,11 +330,11 @@ private:
329 const VAddr cpu_addr_end = cpu_addr + size; 330 const VAddr cpu_addr_end = cpu_addr + size;
330 if (memory_manager.IsGranularRange(gpu_addr, size)) { 331 if (memory_manager.IsGranularRange(gpu_addr, size)) {
331 u8* host_ptr = memory_manager.GetPointer(gpu_addr); 332 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
332 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); 333 block->Upload(block->Offset(cpu_addr), size, host_ptr);
333 } else { 334 } else {
334 staging_buffer.resize(size); 335 staging_buffer.resize(size);
335 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 336 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
336 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); 337 block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
337 } 338 }
338 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); 339 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
339 } 340 }
@@ -376,7 +377,7 @@ private:
376 return map; 377 return map;
377 } 378 }
378 379
379 void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, 380 void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
380 const VectorMapInterval& overlaps) { 381 const VectorMapInterval& overlaps) {
381 const IntervalType base_interval{start, end}; 382 const IntervalType base_interval{start, end};
382 IntervalSet interval_set{}; 383 IntervalSet interval_set{};
@@ -386,13 +387,13 @@ private:
386 interval_set.subtract(subtract); 387 interval_set.subtract(subtract);
387 } 388 }
388 for (auto& interval : interval_set) { 389 for (auto& interval : interval_set) {
389 std::size_t size = interval.upper() - interval.lower(); 390 const std::size_t size = interval.upper() - interval.lower();
390 if (size > 0) { 391 if (size == 0) {
391 staging_buffer.resize(size); 392 continue;
392 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
393 UploadBlockData(block, block->GetOffset(interval.lower()), size,
394 staging_buffer.data());
395 } 393 }
394 staging_buffer.resize(size);
395 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
396 block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
396 } 397 }
397 } 398 }
398 399
@@ -422,10 +423,14 @@ private:
422 } 423 }
423 424
424 void FlushMap(MapInterval* map) { 425 void FlushMap(MapInterval* map) {
426 const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
427 ASSERT_OR_EXECUTE(it != blocks.end(), return;);
428
429 std::shared_ptr<Buffer> block = it->second;
430
425 const std::size_t size = map->end - map->start; 431 const std::size_t size = map->end - map->start;
426 OwnerBuffer block = blocks[map->start >> block_page_bits];
427 staging_buffer.resize(size); 432 staging_buffer.resize(size);
428 DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); 433 block->Download(block->Offset(map->start), size, staging_buffer.data());
429 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); 434 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
430 map->MarkAsModified(false, 0); 435 map->MarkAsModified(false, 0);
431 } 436 }
@@ -438,7 +443,7 @@ private:
438 443
439 buffer_ptr += size; 444 buffer_ptr += size;
440 buffer_offset += size; 445 buffer_offset += size;
441 return {stream_buffer_handle, uploaded_offset}; 446 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
442 } 447 }
443 448
444 void AlignBuffer(std::size_t alignment) { 449 void AlignBuffer(std::size_t alignment) {
@@ -448,97 +453,89 @@ private:
448 buffer_offset = offset_aligned; 453 buffer_offset = offset_aligned;
449 } 454 }
450 455
451 OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { 456 std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
452 const std::size_t old_size = buffer->GetSize(); 457 const std::size_t old_size = buffer->Size();
453 const std::size_t new_size = old_size + block_page_size; 458 const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
454 const VAddr cpu_addr = buffer->GetCpuAddr(); 459 const VAddr cpu_addr = buffer->CpuAddr();
455 OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); 460 std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
456 CopyBlock(buffer, new_buffer, 0, 0, old_size); 461 new_buffer->CopyFrom(*buffer, 0, 0, old_size);
457 buffer->SetEpoch(epoch); 462 QueueDestruction(std::move(buffer));
458 pending_destruction.push_back(buffer); 463
459 const VAddr cpu_addr_end = cpu_addr + new_size - 1; 464 const VAddr cpu_addr_end = cpu_addr + new_size - 1;
460 u64 page_start = cpu_addr >> block_page_bits; 465 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
461 const u64 page_end = cpu_addr_end >> block_page_bits; 466 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
462 while (page_start <= page_end) { 467 blocks.insert_or_assign(page_start, new_buffer);
463 blocks[page_start] = new_buffer;
464 ++page_start;
465 } 468 }
469
466 return new_buffer; 470 return new_buffer;
467 } 471 }
468 472
469 OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { 473 std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
470 const std::size_t size_1 = first->GetSize(); 474 std::shared_ptr<Buffer> second) {
471 const std::size_t size_2 = second->GetSize(); 475 const std::size_t size_1 = first->Size();
472 const VAddr first_addr = first->GetCpuAddr(); 476 const std::size_t size_2 = second->Size();
473 const VAddr second_addr = second->GetCpuAddr(); 477 const VAddr first_addr = first->CpuAddr();
478 const VAddr second_addr = second->CpuAddr();
474 const VAddr new_addr = std::min(first_addr, second_addr); 479 const VAddr new_addr = std::min(first_addr, second_addr);
475 const std::size_t new_size = size_1 + size_2; 480 const std::size_t new_size = size_1 + size_2;
476 OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); 481
477 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); 482 std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
478 CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); 483 new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
479 first->SetEpoch(epoch); 484 new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
480 second->SetEpoch(epoch); 485 QueueDestruction(std::move(first));
481 pending_destruction.push_back(first); 486 QueueDestruction(std::move(second));
482 pending_destruction.push_back(second); 487
483 const VAddr cpu_addr_end = new_addr + new_size - 1; 488 const VAddr cpu_addr_end = new_addr + new_size - 1;
484 u64 page_start = new_addr >> block_page_bits; 489 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
485 const u64 page_end = cpu_addr_end >> block_page_bits; 490 for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
486 while (page_start <= page_end) { 491 blocks.insert_or_assign(page_start, new_buffer);
487 blocks[page_start] = new_buffer;
488 ++page_start;
489 } 492 }
490 return new_buffer; 493 return new_buffer;
491 } 494 }
492 495
493 OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { 496 Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
494 OwnerBuffer found; 497 std::shared_ptr<Buffer> found;
498
495 const VAddr cpu_addr_end = cpu_addr + size - 1; 499 const VAddr cpu_addr_end = cpu_addr + size - 1;
496 u64 page_start = cpu_addr >> block_page_bits; 500 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
497 const u64 page_end = cpu_addr_end >> block_page_bits; 501 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
498 while (page_start <= page_end) {
499 auto it = blocks.find(page_start); 502 auto it = blocks.find(page_start);
500 if (it == blocks.end()) { 503 if (it == blocks.end()) {
501 if (found) { 504 if (found) {
502 found = EnlargeBlock(found); 505 found = EnlargeBlock(found);
503 } else { 506 continue;
504 const VAddr start_addr = (page_start << block_page_bits);
505 found = CreateBlock(start_addr, block_page_size);
506 blocks[page_start] = found;
507 }
508 } else {
509 if (found) {
510 if (found == it->second) {
511 ++page_start;
512 continue;
513 }
514 found = MergeBlocks(found, it->second);
515 } else {
516 found = it->second;
517 } 507 }
508 const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
509 found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
510 blocks.insert_or_assign(page_start, found);
511 continue;
512 }
513 if (!found) {
514 found = it->second;
515 continue;
516 }
517 if (found != it->second) {
518 found = MergeBlocks(std::move(found), it->second);
518 } 519 }
519 ++page_start;
520 } 520 }
521 return found; 521 return found.get();
522 } 522 }
523 523
524 void MarkRegionAsWritten(const VAddr start, const VAddr end) { 524 void MarkRegionAsWritten(VAddr start, VAddr end) {
525 u64 page_start = start >> write_page_bit; 525 const u64 page_end = end >> WRITE_PAGE_BIT;
526 const u64 page_end = end >> write_page_bit; 526 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
527 while (page_start <= page_end) {
528 auto it = written_pages.find(page_start); 527 auto it = written_pages.find(page_start);
529 if (it != written_pages.end()) { 528 if (it != written_pages.end()) {
530 it->second = it->second + 1; 529 it->second = it->second + 1;
531 } else { 530 } else {
532 written_pages[page_start] = 1; 531 written_pages.insert_or_assign(page_start, 1);
533 } 532 }
534 ++page_start;
535 } 533 }
536 } 534 }
537 535
538 void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { 536 void UnmarkRegionAsWritten(VAddr start, VAddr end) {
539 u64 page_start = start >> write_page_bit; 537 const u64 page_end = end >> WRITE_PAGE_BIT;
540 const u64 page_end = end >> write_page_bit; 538 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
541 while (page_start <= page_end) {
542 auto it = written_pages.find(page_start); 539 auto it = written_pages.find(page_start);
543 if (it != written_pages.end()) { 540 if (it != written_pages.end()) {
544 if (it->second > 1) { 541 if (it->second > 1) {
@@ -547,22 +544,24 @@ private:
547 written_pages.erase(it); 544 written_pages.erase(it);
548 } 545 }
549 } 546 }
550 ++page_start;
551 } 547 }
552 } 548 }
553 549
554 bool IsRegionWritten(const VAddr start, const VAddr end) const { 550 bool IsRegionWritten(VAddr start, VAddr end) const {
555 u64 page_start = start >> write_page_bit; 551 const u64 page_end = end >> WRITE_PAGE_BIT;
556 const u64 page_end = end >> write_page_bit; 552 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
557 while (page_start <= page_end) {
558 if (written_pages.count(page_start) > 0) { 553 if (written_pages.count(page_start) > 0) {
559 return true; 554 return true;
560 } 555 }
561 ++page_start;
562 } 556 }
563 return false; 557 return false;
564 } 558 }
565 559
560 void QueueDestruction(std::shared_ptr<Buffer> buffer) {
561 buffer->SetEpoch(epoch);
562 pending_destruction.push(std::move(buffer));
563 }
564
566 void MarkForAsyncFlush(MapInterval* map) { 565 void MarkForAsyncFlush(MapInterval* map) {
567 if (!uncommitted_flushes) { 566 if (!uncommitted_flushes) {
568 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); 567 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
@@ -574,9 +573,7 @@ private:
574 Core::System& system; 573 Core::System& system;
575 574
576 std::unique_ptr<StreamBuffer> stream_buffer; 575 std::unique_ptr<StreamBuffer> stream_buffer;
577 BufferType stream_buffer_handle{}; 576 BufferType stream_buffer_handle;
578
579 bool invalidated = false;
580 577
581 u8* buffer_ptr = nullptr; 578 u8* buffer_ptr = nullptr;
582 u64 buffer_offset = 0; 579 u64 buffer_offset = 0;
@@ -586,18 +583,15 @@ private:
586 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> 583 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
587 mapped_addresses; 584 mapped_addresses;
588 585
589 static constexpr u64 write_page_bit = 11;
590 std::unordered_map<u64, u32> written_pages; 586 std::unordered_map<u64, u32> written_pages;
587 std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
591 588
592 static constexpr u64 block_page_bits = 21; 589 std::queue<std::shared_ptr<Buffer>> pending_destruction;
593 static constexpr u64 block_page_size = 1ULL << block_page_bits;
594 std::unordered_map<u64, OwnerBuffer> blocks;
595
596 std::list<OwnerBuffer> pending_destruction;
597 u64 epoch = 0; 590 u64 epoch = 0;
598 u64 modified_ticks = 0; 591 u64 modified_ticks = 0;
599 592
600 std::vector<u8> staging_buffer; 593 std::vector<u8> staging_buffer;
594
601 std::list<MapInterval*> marked_for_unregister; 595 std::list<MapInterval*> marked_for_unregister;
602 596
603 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; 597 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
new file mode 100644
index 000000000..6c426b035
--- /dev/null
+++ b/src/video_core/compatible_formats.cpp
@@ -0,0 +1,162 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bitset>
7#include <cstddef>
8
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11
12namespace VideoCore::Surface {
13
14namespace {
15
16// Compatibility table taken from Table 3.X.2 in:
17// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
18
19constexpr std::array VIEW_CLASS_128_BITS = {
20 PixelFormat::RGBA32F,
21 PixelFormat::RGBA32UI,
22};
23// Missing formats:
24// PixelFormat::RGBA32I
25
26constexpr std::array VIEW_CLASS_96_BITS = {
27 PixelFormat::RGB32F,
28};
29// Missing formats:
30// PixelFormat::RGB32UI,
31// PixelFormat::RGB32I,
32
33constexpr std::array VIEW_CLASS_64_BITS = {
34 PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
35 PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S,
36};
37// Missing formats:
38// PixelFormat::RGBA16I
39// PixelFormat::RG32I
40
41// TODO: How should we handle 48 bits?
42
43constexpr std::array VIEW_CLASS_32_BITS = {
44 PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F,
45 PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI,
46 PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U,
47 PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S,
48 PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8,
49 PixelFormat::BGRA8_SRGB,
50};
51// Missing formats:
52// PixelFormat::RGBA8UI
53// PixelFormat::RGBA8I
54// PixelFormat::RGB10_A2_UI
55
56// TODO: How should we handle 24 bits?
57
58constexpr std::array VIEW_CLASS_16_BITS = {
59 PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I,
60 PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S,
61};
62// Missing formats:
63// PixelFormat::RG8I
64
65constexpr std::array VIEW_CLASS_8_BITS = {
66 PixelFormat::R8UI,
67 PixelFormat::R8U,
68};
69// Missing formats:
70// PixelFormat::R8I
71// PixelFormat::R8S
72
73constexpr std::array VIEW_CLASS_RGTC1_RED = {
74 PixelFormat::DXN1,
75};
76// Missing formats:
77// COMPRESSED_SIGNED_RED_RGTC1
78
79constexpr std::array VIEW_CLASS_RGTC2_RG = {
80 PixelFormat::DXN2UNORM,
81 PixelFormat::DXN2SNORM,
82};
83
84constexpr std::array VIEW_CLASS_BPTC_UNORM = {
85 PixelFormat::BC7U,
86 PixelFormat::BC7U_SRGB,
87};
88
89constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
90 PixelFormat::BC6H_SF16,
91 PixelFormat::BC6H_UF16,
92};
93
94// Compatibility table taken from Table 4.X.1 in:
95// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
96
97constexpr std::array COPY_CLASS_128_BITS = {
98 PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23,
99 PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB,
100 PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB,
101 PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16,
102};
103// Missing formats:
104// PixelFormat::RGBA32I
105// COMPRESSED_RG_RGTC2
106
107constexpr std::array COPY_CLASS_64_BITS = {
108 PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
109 PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1,
110
111};
112// Missing formats:
113// PixelFormat::RGBA16I
114// PixelFormat::RG32I,
115// COMPRESSED_RGB_S3TC_DXT1_EXT
116// COMPRESSED_SRGB_S3TC_DXT1_EXT
117// COMPRESSED_RGBA_S3TC_DXT1_EXT
118// COMPRESSED_SIGNED_RED_RGTC1
119
120void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
121 compatiblity[format_a][format_b] = true;
122 compatiblity[format_b][format_a] = true;
123}
124
125void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
126 Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
127}
128
129template <typename Range>
130void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
131 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
132 for (auto it_b = it_a; it_b != range.end(); ++it_b) {
133 Enable(compatibility, *it_a, *it_b);
134 }
135 }
136}
137
138} // Anonymous namespace
139
140FormatCompatibility::FormatCompatibility() {
141 for (size_t i = 0; i < MaxPixelFormat; ++i) {
142 // Identity is allowed
143 Enable(view, i, i);
144 }
145
146 EnableRange(view, VIEW_CLASS_128_BITS);
147 EnableRange(view, VIEW_CLASS_96_BITS);
148 EnableRange(view, VIEW_CLASS_64_BITS);
149 EnableRange(view, VIEW_CLASS_32_BITS);
150 EnableRange(view, VIEW_CLASS_16_BITS);
151 EnableRange(view, VIEW_CLASS_8_BITS);
152 EnableRange(view, VIEW_CLASS_RGTC1_RED);
153 EnableRange(view, VIEW_CLASS_RGTC2_RG);
154 EnableRange(view, VIEW_CLASS_BPTC_UNORM);
155 EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
156
157 copy = view;
158 EnableRange(copy, COPY_CLASS_128_BITS);
159 EnableRange(copy, COPY_CLASS_64_BITS);
160}
161
162} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
new file mode 100644
index 000000000..d1082566d
--- /dev/null
+++ b/src/video_core/compatible_formats.h
@@ -0,0 +1,32 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bitset>
7#include <cstddef>
8
9#include "video_core/surface.h"
10
11namespace VideoCore::Surface {
12
13class FormatCompatibility {
14public:
15 using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
16
17 explicit FormatCompatibility();
18
19 bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
20 return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
21 }
22
23 bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
24 return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
25 }
26
27private:
28 Table view;
29 Table copy;
30};
31
32} // namespace VideoCore::Surface
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; 93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
95 u64 offset) const = 0; 95 u64 offset) const = 0;
96 virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
96 virtual u32 GetBoundBuffer() const = 0; 97 virtual u32 GetBoundBuffer() const = 0;
97 98
98 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; 99 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..a82b06a38 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
92 ASSERT(stage == ShaderType::Compute); 92 ASSERT(stage == ShaderType::Compute);
93 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; 93 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
94 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; 94 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
95 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
96}
95 97
96 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 98SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
99 const Texture::TextureHandle tex_handle{handle};
97 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 100 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
98 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 101 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
99 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 102 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..b7f668d88 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,6 +219,8 @@ public:
219 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 219 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
220 u64 offset) const override; 220 u64 offset) const override;
221 221
222 SamplerDescriptor AccessSampler(u32 handle) const override;
223
222 u32 GetBoundBuffer() const override { 224 u32 GetBoundBuffer() const override {
223 return regs.tex_cb_index; 225 return regs.tex_cb_index;
224 } 226 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e46b153f9..c01436295 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
128 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); 128 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
129 129
130 // Execute the current macro. 130 // Execute the current macro.
131 macro_engine->Execute(macro_positions[entry], parameters); 131 macro_engine->Execute(*this, macro_positions[entry], parameters);
132 if (mme_draw.current_mode != MMEDrawMode::Undefined) { 132 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
133 FlushMMEInlineDraw(); 133 FlushMMEInlineDraw();
134 } 134 }
@@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
740 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; 740 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
741 const auto& tex_info_buffer = shader.const_buffers[const_buffer]; 741 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
742 const GPUVAddr tex_info_address = tex_info_buffer.address + offset; 742 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
743 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
744}
743 745
744 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 746SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
747 const Texture::TextureHandle tex_handle{handle};
745 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 748 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
746 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 749 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
747 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 750 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index b827b112f..ef1618990 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -598,6 +598,7 @@ public:
598 BitField<4, 3, u32> block_height; 598 BitField<4, 3, u32> block_height;
599 BitField<8, 3, u32> block_depth; 599 BitField<8, 3, u32> block_depth;
600 BitField<12, 1, InvMemoryLayout> type; 600 BitField<12, 1, InvMemoryLayout> type;
601 BitField<16, 1, u32> is_3d;
601 } memory_layout; 602 } memory_layout;
602 union { 603 union {
603 BitField<0, 16, u32> layers; 604 BitField<0, 16, u32> layers;
@@ -1403,6 +1404,8 @@ public:
1403 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 1404 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1404 u64 offset) const override; 1405 u64 offset) const override;
1405 1406
1407 SamplerDescriptor AccessSampler(u32 handle) const override;
1408
1406 u32 GetBoundBuffer() const override { 1409 u32 GetBoundBuffer() const override {
1407 return regs.tex_cb_index; 1410 return regs.tex_cb_index;
1408 } 1411 }
@@ -1415,6 +1418,14 @@ public:
1415 return execute_on; 1418 return execute_on;
1416 } 1419 }
1417 1420
1421 VideoCore::RasterizerInterface& GetRasterizer() {
1422 return rasterizer;
1423 }
1424
1425 const VideoCore::RasterizerInterface& GetRasterizer() const {
1426 return rasterizer;
1427 }
1428
1418 /// Notify a memory write has happened. 1429 /// Notify a memory write has happened.
1419 void OnMemoryWrite() { 1430 void OnMemoryWrite() {
1420 dirty.flags |= dirty.on_write_stores; 1431 dirty.flags |= dirty.on_write_stores;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
661 constexpr Instruction(u64 value) : value{value} {} 661 constexpr Instruction(u64 value) : value{value} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {} 662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663 663
664 constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
664 BitField<0, 8, Register> gpr0; 668 BitField<0, 8, Register> gpr0;
665 BitField<8, 8, Register> gpr8; 669 BitField<8, 8, Register> gpr8;
666 union { 670 union {
@@ -1874,7 +1878,9 @@ public:
1874 HSETP2_C, 1878 HSETP2_C,
1875 HSETP2_R, 1879 HSETP2_R,
1876 HSETP2_IMM, 1880 HSETP2_IMM,
1881 HSET2_C,
1877 HSET2_R, 1882 HSET2_R,
1883 HSET2_IMM,
1878 POPC_C, 1884 POPC_C,
1879 POPC_R, 1885 POPC_R,
1880 POPC_IMM, 1886 POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
2194 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), 2200 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2195 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), 2201 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2196 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), 2202 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2203 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2197 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 2204 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2205 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2198 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), 2206 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2199 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), 2207 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2200 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 2208 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8eb017f65..482e49711 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <chrono>
6
5#include "common/assert.h" 7#include "common/assert.h"
6#include "common/microprofile.h" 8#include "common/microprofile.h"
7#include "core/core.h" 9#include "core/core.h"
@@ -154,8 +156,7 @@ u64 GPU::GetTicks() const {
154 constexpr u64 gpu_ticks_num = 384; 156 constexpr u64 gpu_ticks_num = 384;
155 constexpr u64 gpu_ticks_den = 625; 157 constexpr u64 gpu_ticks_den = 625;
156 158
157 const u64 cpu_ticks = system.CoreTiming().GetTicks(); 159 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
158 u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
159 if (Settings::values.use_fast_gpu_time) { 160 if (Settings::values.use_fast_gpu_time) {
160 nanoseconds /= 256; 161 nanoseconds /= 256;
161 } 162 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a1b4c305c..2c42483bd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -284,6 +284,12 @@ public:
284 /// core timing events. 284 /// core timing events.
285 virtual void Start() = 0; 285 virtual void Start() = 0;
286 286
287 /// Obtain the CPU Context
288 virtual void ObtainContext() = 0;
289
290 /// Release the CPU Context
291 virtual void ReleaseContext() = 0;
292
287 /// Push GPU command entries to be processed 293 /// Push GPU command entries to be processed
288 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 294 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
289 295
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 53305ab43..7b855f63e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa
19GPUAsynch::~GPUAsynch() = default; 19GPUAsynch::~GPUAsynch() = default;
20 20
21void GPUAsynch::Start() { 21void GPUAsynch::Start() {
22 cpu_context->MakeCurrent();
23 gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); 22 gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
24} 23}
25 24
25void GPUAsynch::ObtainContext() {
26 cpu_context->MakeCurrent();
27}
28
29void GPUAsynch::ReleaseContext() {
30 cpu_context->DoneCurrent();
31}
32
26void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { 33void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
27 gpu_thread.SubmitList(std::move(entries)); 34 gpu_thread.SubmitList(std::move(entries));
28} 35}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 517658612..15e9f1d38 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,8 @@ public:
25 ~GPUAsynch() override; 25 ~GPUAsynch() override;
26 26
27 void Start() override; 27 void Start() override;
28 void ObtainContext() override;
29 void ReleaseContext() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override; 30 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 31 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
30 void FlushRegion(VAddr addr, u64 size) override; 32 void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 6f38a672a..aaeb9811d 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase
13 13
14GPUSynch::~GPUSynch() = default; 14GPUSynch::~GPUSynch() = default;
15 15
16void GPUSynch::Start() { 16void GPUSynch::Start() {}
17
18void GPUSynch::ObtainContext() {
17 context->MakeCurrent(); 19 context->MakeCurrent();
18} 20}
19 21
22void GPUSynch::ReleaseContext() {
23 context->DoneCurrent();
24}
25
20void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { 26void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
21 dma_pusher->Push(std::move(entries)); 27 dma_pusher->Push(std::move(entries));
22 dma_pusher->DispatchCalls(); 28 dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 4a6e9a01d..762c20aa5 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,8 @@ public:
24 ~GPUSynch() override; 24 ~GPUSynch() override;
25 25
26 void Start() override; 26 void Start() override;
27 void ObtainContext() override;
28 void ReleaseContext() override;
27 void PushGPUEntries(Tegra::CommandList&& entries) override; 29 void PushGPUEntries(Tegra::CommandList&& entries) override;
28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 30 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
29 void FlushRegion(VAddr addr, u64 size) override; 31 void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c3bb4fe06..738c6f0c1 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "common/thread.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/frontend/emu_window.h" 9#include "core/frontend/emu_window.h"
9#include "core/settings.h" 10#include "core/settings.h"
@@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread {
18static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, 19static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
19 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, 20 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
20 SynchState& state) { 21 SynchState& state) {
21 MicroProfileOnThreadCreate("GpuThread"); 22 std::string name = "yuzu:GPU";
23 MicroProfileOnThreadCreate(name.c_str());
24 Common::SetCurrentThreadName(name.c_str());
25 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
26 system.RegisterHostThread();
22 27
23 // Wait for first GPU command before acquiring the window context 28 // Wait for first GPU command before acquiring the window context
24 while (state.queue.Empty()) 29 while (state.queue.Empty())
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 89077a2d8..a50e7b4e0 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,32 +2,78 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <optional>
6#include <boost/container_hash/hash.hpp>
5#include "common/assert.h" 7#include "common/assert.h"
6#include "common/logging/log.h" 8#include "common/logging/log.h"
7#include "core/settings.h" 9#include "core/settings.h"
10#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro/macro.h" 11#include "video_core/macro/macro.h"
12#include "video_core/macro/macro_hle.h"
9#include "video_core/macro/macro_interpreter.h" 13#include "video_core/macro/macro_interpreter.h"
10#include "video_core/macro/macro_jit_x64.h" 14#include "video_core/macro/macro_jit_x64.h"
11 15
12namespace Tegra { 16namespace Tegra {
13 17
18MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
19 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
20
21MacroEngine::~MacroEngine() = default;
22
14void MacroEngine::AddCode(u32 method, u32 data) { 23void MacroEngine::AddCode(u32 method, u32 data) {
15 uploaded_macro_code[method].push_back(data); 24 uploaded_macro_code[method].push_back(data);
16} 25}
17 26
18void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { 27void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
28 const std::vector<u32>& parameters) {
19 auto compiled_macro = macro_cache.find(method); 29 auto compiled_macro = macro_cache.find(method);
20 if (compiled_macro != macro_cache.end()) { 30 if (compiled_macro != macro_cache.end()) {
21 compiled_macro->second->Execute(parameters, method); 31 const auto& cache_info = compiled_macro->second;
32 if (cache_info.has_hle_program) {
33 cache_info.hle_program->Execute(parameters, method);
34 } else {
35 cache_info.lle_program->Execute(parameters, method);
36 }
22 } else { 37 } else {
23 // Macro not compiled, check if it's uploaded and if so, compile it 38 // Macro not compiled, check if it's uploaded and if so, compile it
24 auto macro_code = uploaded_macro_code.find(method); 39 std::optional<u32> mid_method = std::nullopt;
40 const auto macro_code = uploaded_macro_code.find(method);
25 if (macro_code == uploaded_macro_code.end()) { 41 if (macro_code == uploaded_macro_code.end()) {
26 UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); 42 for (const auto& [method_base, code] : uploaded_macro_code) {
27 return; 43 if (method >= method_base && (method - method_base) < code.size()) {
44 mid_method = method_base;
45 break;
46 }
47 }
48 if (!mid_method.has_value()) {
49 UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
50 return;
51 }
52 }
53 auto& cache_info = macro_cache[method];
54
55 if (!mid_method.has_value()) {
56 cache_info.lle_program = Compile(macro_code->second);
57 cache_info.hash = boost::hash_value(macro_code->second);
58 } else {
59 const auto& macro_cached = uploaded_macro_code[mid_method.value()];
60 const auto rebased_method = method - mid_method.value();
61 auto& code = uploaded_macro_code[method];
62 code.resize(macro_cached.size() - rebased_method);
63 std::memcpy(code.data(), macro_cached.data() + rebased_method,
64 code.size() * sizeof(u32));
65 cache_info.hash = boost::hash_value(code);
66 cache_info.lle_program = Compile(code);
67 }
68
69 auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
70 if (hle_program.has_value()) {
71 cache_info.has_hle_program = true;
72 cache_info.hle_program = std::move(hle_program.value());
73 cache_info.hle_program->Execute(parameters, method);
74 } else {
75 cache_info.lle_program->Execute(parameters, method);
28 } 76 }
29 macro_cache[method] = Compile(macro_code->second);
30 macro_cache[method]->Execute(parameters, method);
31 } 77 }
32} 78}
33 79
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index b76ed891f..4d00b84b0 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -11,9 +11,11 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13namespace Tegra { 13namespace Tegra {
14
14namespace Engines { 15namespace Engines {
15class Maxwell3D; 16class Maxwell3D;
16} 17}
18
17namespace Macro { 19namespace Macro {
18constexpr std::size_t NUM_MACRO_REGISTERS = 8; 20constexpr std::size_t NUM_MACRO_REGISTERS = 8;
19enum class Operation : u32 { 21enum class Operation : u32 {
@@ -94,6 +96,8 @@ union MethodAddress {
94 96
95} // namespace Macro 97} // namespace Macro
96 98
99class HLEMacro;
100
97class CachedMacro { 101class CachedMacro {
98public: 102public:
99 virtual ~CachedMacro() = default; 103 virtual ~CachedMacro() = default;
@@ -107,20 +111,29 @@ public:
107 111
108class MacroEngine { 112class MacroEngine {
109public: 113public:
110 virtual ~MacroEngine() = default; 114 explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
115 virtual ~MacroEngine();
111 116
112 // Store the uploaded macro code to compile them when they're called. 117 // Store the uploaded macro code to compile them when they're called.
113 void AddCode(u32 method, u32 data); 118 void AddCode(u32 method, u32 data);
114 119
115 // Compiles the macro if its not in the cache, and executes the compiled macro 120 // Compiles the macro if its not in the cache, and executes the compiled macro
116 void Execute(u32 method, const std::vector<u32>& parameters); 121 void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
117 122
118protected: 123protected:
119 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; 124 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
120 125
121private: 126private:
122 std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; 127 struct CacheInfo {
128 std::unique_ptr<CachedMacro> lle_program{};
129 std::unique_ptr<CachedMacro> hle_program{};
130 u64 hash{};
131 bool has_hle_program{};
132 };
133
134 std::unordered_map<u32, CacheInfo> macro_cache;
123 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; 135 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
136 std::unique_ptr<HLEMacro> hle_macros;
124}; 137};
125 138
126std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); 139std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
new file mode 100644
index 000000000..410f99018
--- /dev/null
+++ b/src/video_core/macro/macro_hle.cpp
@@ -0,0 +1,113 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <vector>
7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro/macro_hle.h"
9#include "video_core/rasterizer_interface.h"
10
11namespace Tegra {
12
13namespace {
14// HLE'd functions
15static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
16 const std::vector<u32>& parameters) {
17 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
18
19 maxwell3d.regs.draw.topology.Assign(
20 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
21 ~(0x3ffffff << 26)));
22 maxwell3d.regs.vb_base_instance = parameters[5];
23 maxwell3d.mme_draw.instance_count = instance_count;
24 maxwell3d.regs.vb_element_base = parameters[3];
25 maxwell3d.regs.index_array.count = parameters[1];
26 maxwell3d.regs.index_array.first = parameters[4];
27
28 if (maxwell3d.ShouldExecute()) {
29 maxwell3d.GetRasterizer().Draw(true, true);
30 }
31 maxwell3d.regs.index_array.count = 0;
32 maxwell3d.mme_draw.instance_count = 0;
33 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
34}
35
36static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
37 const std::vector<u32>& parameters) {
38 const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
39
40 maxwell3d.regs.vertex_buffer.first = parameters[3];
41 maxwell3d.regs.vertex_buffer.count = parameters[1];
42 maxwell3d.regs.vb_base_instance = parameters[4];
43 maxwell3d.regs.draw.topology.Assign(
44 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
45 maxwell3d.mme_draw.instance_count = count;
46
47 if (maxwell3d.ShouldExecute()) {
48 maxwell3d.GetRasterizer().Draw(false, true);
49 }
50 maxwell3d.regs.vertex_buffer.count = 0;
51 maxwell3d.mme_draw.instance_count = 0;
52 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
53}
54
55static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
56 const std::vector<u32>& parameters) {
57 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
58 const u32 element_base = parameters[4];
59 const u32 base_instance = parameters[5];
60 maxwell3d.regs.index_array.first = parameters[3];
61 maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
62 maxwell3d.regs.index_array.count = parameters[1];
63 maxwell3d.regs.vb_element_base = element_base;
64 maxwell3d.regs.vb_base_instance = base_instance;
65 maxwell3d.mme_draw.instance_count = instance_count;
66 maxwell3d.CallMethodFromMME(0x8e3, 0x640);
67 maxwell3d.CallMethodFromMME(0x8e4, element_base);
68 maxwell3d.CallMethodFromMME(0x8e5, base_instance);
69 maxwell3d.regs.draw.topology.Assign(
70 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
71 if (maxwell3d.ShouldExecute()) {
72 maxwell3d.GetRasterizer().Draw(true, true);
73 }
74 maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
75 maxwell3d.regs.index_array.count = 0;
76 maxwell3d.regs.vb_element_base = 0x0;
77 maxwell3d.regs.vb_base_instance = 0x0;
78 maxwell3d.mme_draw.instance_count = 0;
79 maxwell3d.CallMethodFromMME(0x8e3, 0x640);
80 maxwell3d.CallMethodFromMME(0x8e4, 0x0);
81 maxwell3d.CallMethodFromMME(0x8e5, 0x0);
82 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
83}
84} // namespace
85
86constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
87 std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
88 std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
89 std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
90}};
91
92HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
93HLEMacro::~HLEMacro() = default;
94
95std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
96 const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
97 [hash](const auto& pair) { return pair.first == hash; });
98 if (it == hle_funcs.end()) {
99 return std::nullopt;
100 }
101 return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
102}
103
104HLEMacroImpl::~HLEMacroImpl() = default;
105
106HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
107 : maxwell3d(maxwell3d), func(func) {}
108
109void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
110 func(maxwell3d, parameters);
111}
112
113} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
new file mode 100644
index 000000000..37af875a0
--- /dev/null
+++ b/src/video_core/macro/macro_hle.h
@@ -0,0 +1,44 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/macro/macro.h"
12
13namespace Tegra {
14
15namespace Engines {
16class Maxwell3D;
17}
18
19using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
20
21class HLEMacro {
22public:
23 explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
24 ~HLEMacro();
25
26 std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
27
28private:
29 Engines::Maxwell3D& maxwell3d;
30};
31
32class HLEMacroImpl : public CachedMacro {
33public:
34 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
35 ~HLEMacroImpl();
36
37 void Execute(const std::vector<u32>& parameters, u32 method) override;
38
39private:
40 Engines::Maxwell3D& maxwell3d;
41 HLEFunction func;
42};
43
44} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 5edff27aa..aa5256419 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,7 +11,8 @@
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); 11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12 12
13namespace Tegra { 13namespace Tegra {
14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
15 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
15 16
16std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { 17std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
17 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); 18 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 11c1cc3be..07292702f 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,27 +14,22 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); 14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
15 15
16namespace Tegra { 16namespace Tegra {
17static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; 17static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
18static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; 18static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
19static const Xbyak::Reg64 STATE = Xbyak::util::r11; 19static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
20static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
21static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
22static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
23static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; 20static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
24static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
25static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; 21static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
26 22
27static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ 23static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
28 PARAMETERS,
29 REGISTERS,
30 STATE, 24 STATE,
31 NEXT_PARAMETER,
32 RESULT, 25 RESULT,
26 PARAMETERS,
33 METHOD_ADDRESS, 27 METHOD_ADDRESS,
34 BRANCH_HOLDER, 28 BRANCH_HOLDER,
35}); 29});
36 30
37MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
32 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
38 33
39std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { 34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
40 return std::make_unique<MacroJITx64Impl>(maxwell3d, code); 35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
@@ -53,32 +48,32 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
53 JITState state{}; 48 JITState state{};
54 state.maxwell3d = &maxwell3d; 49 state.maxwell3d = &maxwell3d;
55 state.registers = {}; 50 state.registers = {};
56 state.parameters = parameters.data(); 51 program(&state, parameters.data());
57 program(&state);
58} 52}
59 53
60void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { 54void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
61 const bool is_a_zero = opcode.src_a == 0; 55 const bool is_a_zero = opcode.src_a == 0;
62 const bool is_b_zero = opcode.src_b == 0; 56 const bool is_b_zero = opcode.src_b == 0;
63 const bool valid_operation = !is_a_zero && !is_b_zero; 57 const bool valid_operation = !is_a_zero && !is_b_zero;
64 const bool is_move_operation = !is_a_zero && is_b_zero; 58 [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
65 const bool has_zero_register = is_a_zero || is_b_zero; 59 const bool has_zero_register = is_a_zero || is_b_zero;
60 const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
61 opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
66 62
67 Xbyak::Reg64 src_a; 63 Xbyak::Reg32 src_a;
68 Xbyak::Reg32 src_b; 64 Xbyak::Reg32 src_b;
69 65
70 if (!optimizer.zero_reg_skip) { 66 if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
71 src_a = Compile_GetRegister(opcode.src_a, RESULT_64); 67 src_a = Compile_GetRegister(opcode.src_a, RESULT);
72 src_b = Compile_GetRegister(opcode.src_b, ebx); 68 src_b = Compile_GetRegister(opcode.src_b, eax);
73 } else { 69 } else {
74 if (!is_a_zero) { 70 if (!is_a_zero) {
75 src_a = Compile_GetRegister(opcode.src_a, RESULT_64); 71 src_a = Compile_GetRegister(opcode.src_a, RESULT);
76 } 72 }
77 if (!is_b_zero) { 73 if (!is_b_zero) {
78 src_b = Compile_GetRegister(opcode.src_b, ebx); 74 src_b = Compile_GetRegister(opcode.src_b, eax);
79 } 75 }
80 } 76 }
81 Xbyak::Label skip_carry{};
82 77
83 bool has_emitted = false; 78 bool has_emitted = false;
84 79
@@ -190,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
190 opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { 185 opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
191 if (next_opcode.has_value()) { 186 if (next_opcode.has_value()) {
192 const auto next = *next_opcode; 187 const auto next = *next_opcode;
193 if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { 188 if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
189 opcode.dst == next.dst) {
194 return; 190 return;
195 } 191 }
196 } 192 }
@@ -244,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
244} 240}
245 241
246void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { 242void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
247 auto dst = Compile_GetRegister(opcode.src_a, eax); 243 const auto dst = Compile_GetRegister(opcode.src_a, ecx);
248 auto src = Compile_GetRegister(opcode.src_b, RESULT); 244 const auto src = Compile_GetRegister(opcode.src_b, RESULT);
249 245
250 shr(src, al); 246 shr(src, dst.cvt8());
251 if (opcode.bf_size != 0 && opcode.bf_size != 31) { 247 if (opcode.bf_size != 0 && opcode.bf_size != 31) {
252 and_(src, opcode.GetBitfieldMask()); 248 and_(src, opcode.GetBitfieldMask());
253 } else if (opcode.bf_size == 0) { 249 } else if (opcode.bf_size == 0) {
@@ -263,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
263} 259}
264 260
265void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { 261void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
266 auto dst = Compile_GetRegister(opcode.src_a, eax); 262 const auto dst = Compile_GetRegister(opcode.src_a, ecx);
267 auto src = Compile_GetRegister(opcode.src_b, RESULT); 263 const auto src = Compile_GetRegister(opcode.src_b, RESULT);
268 264
269 if (opcode.bf_src_bit != 0) { 265 if (opcode.bf_src_bit != 0) {
270 shr(src, opcode.bf_src_bit); 266 shr(src, opcode.bf_src_bit);
@@ -273,16 +269,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
273 if (opcode.bf_size != 31) { 269 if (opcode.bf_size != 31) {
274 and_(src, opcode.GetBitfieldMask()); 270 and_(src, opcode.GetBitfieldMask());
275 } 271 }
276 shl(src, al); 272 shl(src, dst.cvt8());
277 Compile_ProcessResult(opcode.result_operation, opcode.dst);
278}
279 273
280static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { 274 Compile_ProcessResult(opcode.result_operation, opcode.dst);
281 return maxwell3d->GetRegisterValue(method);
282}
283
284static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
285 maxwell3d->CallMethodFromMME(method_address.address, value);
286} 275}
287 276
288void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { 277void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
@@ -302,22 +291,34 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
302 sub(result, opcode.immediate * -1); 291 sub(result, opcode.immediate * -1);
303 } 292 }
304 } 293 }
305 Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 294
306 mov(Common::X64::ABI_PARAM1, qword[STATE]); 295 // Equivalent to Engines::Maxwell3D::GetRegisterValue:
307 mov(Common::X64::ABI_PARAM2, RESULT); 296 if (optimizer.enable_asserts) {
308 Common::X64::CallFarFunction(*this, &Read); 297 Xbyak::Label pass_range_check;
309 Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 298 cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
310 mov(RESULT, Common::X64::ABI_RETURN.cvt32()); 299 jb(pass_range_check);
300 int3();
301 L(pass_range_check);
302 }
303 mov(rax, qword[STATE]);
304 mov(RESULT,
305 dword[rax + offsetof(Engines::Maxwell3D, regs) +
306 offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
307
311 Compile_ProcessResult(opcode.result_operation, opcode.dst); 308 Compile_ProcessResult(opcode.result_operation, opcode.dst);
312} 309}
313 310
311static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
312 maxwell3d->CallMethodFromMME(method_address.address, value);
313}
314
314void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { 315void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
315 Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 316 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
316 mov(Common::X64::ABI_PARAM1, qword[STATE]); 317 mov(Common::X64::ABI_PARAM1, qword[STATE]);
317 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); 318 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
318 mov(Common::X64::ABI_PARAM3, value); 319 mov(Common::X64::ABI_PARAM3, value);
319 Common::X64::CallFarFunction(*this, &Send); 320 Common::X64::CallFarFunction(*this, &Send);
320 Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 321 Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
321 322
322 Xbyak::Label dont_process{}; 323 Xbyak::Label dont_process{};
323 // Get increment 324 // Get increment
@@ -329,7 +330,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
329 and_(METHOD_ADDRESS, 0xfff); 330 and_(METHOD_ADDRESS, 0xfff);
330 shr(ecx, 12); 331 shr(ecx, 12);
331 and_(ecx, 0x3f); 332 and_(ecx, 0x3f);
332 lea(eax, ptr[rcx + METHOD_ADDRESS_64]); 333 lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
333 sal(ecx, 12); 334 sal(ecx, 12);
334 or_(eax, ecx); 335 or_(eax, ecx);
335 336
@@ -421,19 +422,15 @@ void MacroJITx64Impl::Compile() {
421 bool keep_executing = true; 422 bool keep_executing = true;
422 labels.fill(Xbyak::Label()); 423 labels.fill(Xbyak::Label());
423 424
424 Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); 425 Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
425 // JIT state 426 // JIT state
426 mov(STATE, Common::X64::ABI_PARAM1); 427 mov(STATE, Common::X64::ABI_PARAM1);
427 mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + 428 mov(PARAMETERS, Common::X64::ABI_PARAM2);
428 static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
429 mov(REGISTERS, Common::X64::ABI_PARAM1);
430 add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
431 xor_(RESULT, RESULT); 429 xor_(RESULT, RESULT);
432 xor_(METHOD_ADDRESS, METHOD_ADDRESS); 430 xor_(METHOD_ADDRESS, METHOD_ADDRESS);
433 xor_(NEXT_PARAMETER, NEXT_PARAMETER);
434 xor_(BRANCH_HOLDER, BRANCH_HOLDER); 431 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
435 432
436 mov(dword[REGISTERS + 4], Compile_FetchParameter()); 433 mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
437 434
438 // Track get register for zero registers and mark it as no-op 435 // Track get register for zero registers and mark it as no-op
439 optimizer.zero_reg_skip = true; 436 optimizer.zero_reg_skip = true;
@@ -446,6 +443,9 @@ void MacroJITx64Impl::Compile() {
446 // one if our register isn't "dirty" 443 // one if our register isn't "dirty"
447 optimizer.optimize_for_method_move = true; 444 optimizer.optimize_for_method_move = true;
448 445
446 // Enable run-time assertions in JITted code
447 optimizer.enable_asserts = false;
448
449 // Check to see if we can skip emitting certain instructions 449 // Check to see if we can skip emitting certain instructions
450 Optimizer_ScanFlags(); 450 Optimizer_ScanFlags();
451 451
@@ -463,7 +463,7 @@ void MacroJITx64Impl::Compile() {
463 463
464 L(end_of_code); 464 L(end_of_code);
465 465
466 Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); 466 Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
467 ret(); 467 ret();
468 ready(); 468 ready();
469 program = getCode<ProgramType>(); 469 program = getCode<ProgramType>();
@@ -537,8 +537,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
537} 537}
538 538
539Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { 539Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
540 mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); 540 mov(eax, dword[PARAMETERS]);
541 inc(NEXT_PARAMETER); 541 add(PARAMETERS, sizeof(u32));
542 return eax; 542 return eax;
543} 543}
544 544
@@ -547,41 +547,22 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
547 // Register 0 is always zero 547 // Register 0 is always zero
548 xor_(dst, dst); 548 xor_(dst, dst);
549 } else { 549 } else {
550 mov(dst, dword[REGISTERS + index * sizeof(u32)]); 550 mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
551 }
552
553 return dst;
554}
555
556Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
557 if (index == 0) {
558 // Register 0 is always zero
559 xor_(dst, dst);
560 } else {
561 mov(dst, dword[REGISTERS + index * sizeof(u32)]);
562 } 551 }
563 552
564 return dst; 553 return dst;
565} 554}
566 555
567void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
568 Xbyak::Label zero{}, end{};
569 xor_(ecx, ecx);
570 shr(dst, 32);
571 setne(cl);
572 mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
573}
574
575void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { 556void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
576 auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { 557 const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
577 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero 558 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
578 // register. 559 // register.
579 if (reg == 0) { 560 if (reg == 0) {
580 return; 561 return;
581 } 562 }
582 mov(dword[REGISTERS + reg * sizeof(u32)], result); 563 mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
583 }; 564 };
584 auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; 565 const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
585 566
586 switch (operation) { 567 switch (operation) {
587 case Macro::ResultOperation::IgnoreAndFetch: 568 case Macro::ResultOperation::IgnoreAndFetch:
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 21ee157cf..a180e7428 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,8 +55,6 @@ private:
55 55
56 Xbyak::Reg32 Compile_FetchParameter(); 56 Xbyak::Reg32 Compile_FetchParameter();
57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); 57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
58 Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
59 void Compile_WriteCarry(Xbyak::Reg64 dst);
60 58
61 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); 59 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
62 void Compile_Send(Xbyak::Reg32 value); 60 void Compile_Send(Xbyak::Reg32 value);
@@ -67,11 +65,10 @@ private:
67 struct JITState { 65 struct JITState {
68 Engines::Maxwell3D* maxwell3d{}; 66 Engines::Maxwell3D* maxwell3d{};
69 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; 67 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
70 const u32* parameters{};
71 u32 carry_flag{}; 68 u32 carry_flag{};
72 }; 69 };
73 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); 70 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
74 using ProgramType = void (*)(JITState*); 71 using ProgramType = void (*)(JITState*, const u32*);
75 72
76 struct OptimizerState { 73 struct OptimizerState {
77 bool can_skip_carry{}; 74 bool can_skip_carry{};
@@ -79,14 +76,15 @@ private:
79 bool zero_reg_skip{}; 76 bool zero_reg_skip{};
80 bool skip_dummy_addimmediate{}; 77 bool skip_dummy_addimmediate{};
81 bool optimize_for_method_move{}; 78 bool optimize_for_method_move{};
79 bool enable_asserts{};
82 }; 80 };
83 OptimizerState optimizer{}; 81 OptimizerState optimizer{};
84 82
85 std::optional<Macro::Opcode> next_opcode{}; 83 std::optional<Macro::Opcode> next_opcode{};
86 ProgramType program{nullptr}; 84 ProgramType program{nullptr};
87 85
88 std::array<Xbyak::Label, MAX_CODE_SIZE> labels{}; 86 std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
89 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{}; 87 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
90 Xbyak::Label end_of_code{}; 88 Xbyak::Label end_of_code{};
91 89
92 bool is_delay_slot{}; 90 bool is_delay_slot{};
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index dbee9f634..ff5505d12 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
210 return range == inner_size; 210 return range == inner_size;
211} 211}
212 212
213void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { 213void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
214 const std::size_t size) const {
214 std::size_t remaining_size{size}; 215 std::size_t remaining_size{size};
215 std::size_t page_index{src_addr >> page_bits}; 216 std::size_t page_index{gpu_src_addr >> page_bits};
216 std::size_t page_offset{src_addr & page_mask}; 217 std::size_t page_offset{gpu_src_addr & page_mask};
217 218
218 auto& memory = system.Memory(); 219 auto& memory = system.Memory();
219 220
@@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
234 } 235 }
235} 236}
236 237
237void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, 238void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
238 const std::size_t size) const { 239 const std::size_t size) const {
239 std::size_t remaining_size{size}; 240 std::size_t remaining_size{size};
240 std::size_t page_index{src_addr >> page_bits}; 241 std::size_t page_index{gpu_src_addr >> page_bits};
241 std::size_t page_offset{src_addr & page_mask}; 242 std::size_t page_offset{gpu_src_addr & page_mask};
242 243
243 auto& memory = system.Memory(); 244 auto& memory = system.Memory();
244 245
@@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
259 } 260 }
260} 261}
261 262
262void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { 263void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
264 const std::size_t size) {
263 std::size_t remaining_size{size}; 265 std::size_t remaining_size{size};
264 std::size_t page_index{dest_addr >> page_bits}; 266 std::size_t page_index{gpu_dest_addr >> page_bits};
265 std::size_t page_offset{dest_addr & page_mask}; 267 std::size_t page_offset{gpu_dest_addr & page_mask};
266 268
267 auto& memory = system.Memory(); 269 auto& memory = system.Memory();
268 270
@@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
283 } 285 }
284} 286}
285 287
286void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, 288void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
287 const std::size_t size) { 289 const std::size_t size) {
288 std::size_t remaining_size{size}; 290 std::size_t remaining_size{size};
289 std::size_t page_index{dest_addr >> page_bits}; 291 std::size_t page_index{gpu_dest_addr >> page_bits};
290 std::size_t page_offset{dest_addr & page_mask}; 292 std::size_t page_offset{gpu_dest_addr & page_mask};
291 293
292 auto& memory = system.Memory(); 294 auto& memory = system.Memory();
293 295
@@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
306 } 308 }
307} 309}
308 310
309void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 311void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
312 const std::size_t size) {
310 std::vector<u8> tmp_buffer(size); 313 std::vector<u8> tmp_buffer(size);
311 ReadBlock(src_addr, tmp_buffer.data(), size); 314 ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
312 WriteBlock(dest_addr, tmp_buffer.data(), size); 315 WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
313} 316}
314 317
315void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 318void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
319 const std::size_t size) {
316 std::vector<u8> tmp_buffer(size); 320 std::vector<u8> tmp_buffer(size);
317 ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); 321 ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
318 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); 322 WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
319} 323}
320 324
321bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { 325bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0ddd52d5a..87658e87a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -79,9 +79,9 @@ public:
79 * in the Host Memory counterpart. Note: This functions cause Host GPU Memory 79 * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
80 * Flushes and Invalidations, respectively to each operation. 80 * Flushes and Invalidations, respectively to each operation.
81 */ 81 */
82 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; 82 void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
83 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 83 void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
84 void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 84 void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
85 85
86 /** 86 /**
87 * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and 87 * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,9 +93,9 @@ public:
93 * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture 93 * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
94 * being flushed. 94 * being flushed.
95 */ 95 */
96 void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; 96 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 97 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 98 void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
99 99
100 /** 100 /**
101 * IsGranularRange checks if a gpu region can be simply read with a pointer 101 * IsGranularRange checks if a gpu region can be simply read with a pointer
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 2f75f8801..e12dab899 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -220,8 +220,8 @@ private:
220 return cache_begin < addr_end && addr_begin < cache_end; 220 return cache_begin < addr_end && addr_begin < cache_end;
221 }; 221 };
222 222
223 const u64 page_end = addr_end >> PAGE_SHIFT; 223 const u64 page_end = addr_end >> PAGE_BITS;
224 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { 224 for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
225 const auto& it = cached_queries.find(page); 225 const auto& it = cached_queries.find(page);
226 if (it == std::end(cached_queries)) { 226 if (it == std::end(cached_queries)) {
227 continue; 227 continue;
@@ -242,14 +242,14 @@ private:
242 /// Registers the passed parameters as cached and returns a pointer to the stored cached query. 242 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
243 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { 243 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
244 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); 244 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
245 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; 245 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
246 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, 246 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
247 host_ptr); 247 host_ptr);
248 } 248 }
249 249
250 /// Tries to a get a cached query. Returns nullptr on failure. 250 /// Tries to a get a cached query. Returns nullptr on failure.
251 CachedQuery* TryGet(VAddr addr) { 251 CachedQuery* TryGet(VAddr addr) {
252 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; 252 const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
253 const auto it = cached_queries.find(page); 253 const auto it = cached_queries.find(page);
254 if (it == std::end(cached_queries)) { 254 if (it == std::end(cached_queries)) {
255 return nullptr; 255 return nullptr;
@@ -268,7 +268,7 @@ private:
268 } 268 }
269 269
270 static constexpr std::uintptr_t PAGE_SIZE = 4096; 270 static constexpr std::uintptr_t PAGE_SIZE = 4096;
271 static constexpr unsigned PAGE_SHIFT = 12; 271 static constexpr unsigned PAGE_BITS = 12;
272 272
273 Core::System& system; 273 Core::System& system;
274 VideoCore::RasterizerInterface& rasterizer; 274 VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/rasterizer_cache.h"
6
7RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337c..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <set>
9#include <unordered_map>
10
11#include <boost/icl/interval_map.hpp>
12#include <boost/range/iterator_range_core.hpp>
13
14#include "common/common_types.h"
15#include "core/settings.h"
16#include "video_core/gpu.h"
17#include "video_core/rasterizer_interface.h"
18
19class RasterizerCacheObject {
20public:
21 explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
22
23 virtual ~RasterizerCacheObject();
24
25 VAddr GetCpuAddr() const {
26 return cpu_addr;
27 }
28
29 /// Gets the size of the shader in guest memory, required for cache management
30 virtual std::size_t GetSizeInBytes() const = 0;
31
32 /// Sets whether the cached object should be considered registered
33 void SetIsRegistered(bool registered) {
34 is_registered = registered;
35 }
36
37 /// Returns true if the cached object is registered
38 bool IsRegistered() const {
39 return is_registered;
40 }
41
42 /// Returns true if the cached object is dirty
43 bool IsDirty() const {
44 return is_dirty;
45 }
46
47 /// Returns ticks from when this cached object was last modified
48 u64 GetLastModifiedTicks() const {
49 return last_modified_ticks;
50 }
51
52 /// Marks an object as recently modified, used to specify whether it is clean or dirty
53 template <class T>
54 void MarkAsModified(bool dirty, T& cache) {
55 is_dirty = dirty;
56 last_modified_ticks = cache.GetModifiedTicks();
57 }
58
59 void SetMemoryMarked(bool is_memory_marked_) {
60 is_memory_marked = is_memory_marked_;
61 }
62
63 bool IsMemoryMarked() const {
64 return is_memory_marked;
65 }
66
67 void SetSyncPending(bool is_sync_pending_) {
68 is_sync_pending = is_sync_pending_;
69 }
70
71 bool IsSyncPending() const {
72 return is_sync_pending;
73 }
74
75private:
76 bool is_registered{}; ///< Whether the object is currently registered with the cache
77 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
78 bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
79 bool is_sync_pending{}; ///< Whether the object is pending deletion.
80 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
81 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
82};
83
84template <class T>
85class RasterizerCache : NonCopyable {
86 friend class RasterizerCacheObject;
87
88public:
89 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
90
91 /// Write any cached resources overlapping the specified region back to memory
92 void FlushRegion(VAddr addr, std::size_t size) {
93 std::lock_guard lock{mutex};
94
95 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
96 for (auto& object : objects) {
97 FlushObject(object);
98 }
99 }
100
101 /// Mark the specified region as being invalidated
102 void InvalidateRegion(VAddr addr, u64 size) {
103 std::lock_guard lock{mutex};
104
105 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
106 for (auto& object : objects) {
107 if (!object->IsRegistered()) {
108 // Skip duplicates
109 continue;
110 }
111 Unregister(object);
112 }
113 }
114
115 void OnCPUWrite(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex};
117
118 for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
119 if (object->IsRegistered()) {
120 UnmarkMemory(object);
121 object->SetSyncPending(true);
122 marked_for_unregister.emplace_back(object);
123 }
124 }
125 }
126
127 void SyncGuestHost() {
128 std::lock_guard lock{mutex};
129
130 for (const auto& object : marked_for_unregister) {
131 if (object->IsRegistered()) {
132 object->SetSyncPending(false);
133 Unregister(object);
134 }
135 }
136 marked_for_unregister.clear();
137 }
138
139 /// Invalidates everything in the cache
140 void InvalidateAll() {
141 std::lock_guard lock{mutex};
142
143 while (interval_cache.begin() != interval_cache.end()) {
144 Unregister(*interval_cache.begin()->second.begin());
145 }
146 }
147
148protected:
149 /// Tries to get an object from the cache with the specified cache address
150 T TryGet(VAddr addr) const {
151 const auto iter = map_cache.find(addr);
152 if (iter != map_cache.end())
153 return iter->second;
154 return nullptr;
155 }
156
157 /// Register an object into the cache
158 virtual void Register(const T& object) {
159 std::lock_guard lock{mutex};
160
161 object->SetIsRegistered(true);
162 interval_cache.add({GetInterval(object), ObjectSet{object}});
163 map_cache.insert({object->GetCpuAddr(), object});
164 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
165 object->SetMemoryMarked(true);
166 }
167
168 /// Unregisters an object from the cache
169 virtual void Unregister(const T& object) {
170 std::lock_guard lock{mutex};
171
172 UnmarkMemory(object);
173 object->SetIsRegistered(false);
174 if (object->IsSyncPending()) {
175 marked_for_unregister.remove(object);
176 object->SetSyncPending(false);
177 }
178 const VAddr addr = object->GetCpuAddr();
179 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
180 map_cache.erase(addr);
181 }
182
183 void UnmarkMemory(const T& object) {
184 if (!object->IsMemoryMarked()) {
185 return;
186 }
187 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
188 object->SetMemoryMarked(false);
189 }
190
191 /// Returns a ticks counter used for tracking when cached objects were last modified
192 u64 GetModifiedTicks() {
193 std::lock_guard lock{mutex};
194
195 return ++modified_ticks;
196 }
197
198 virtual void FlushObjectInner(const T& object) = 0;
199
200 /// Flushes the specified object, updating appropriate cache state as needed
201 void FlushObject(const T& object) {
202 std::lock_guard lock{mutex};
203
204 if (!object->IsDirty()) {
205 return;
206 }
207 FlushObjectInner(object);
208 object->MarkAsModified(false, *this);
209 }
210
211 std::recursive_mutex mutex;
212
213private:
214 /// Returns a list of cached objects from the specified memory region, ordered by access time
215 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
216 if (size == 0) {
217 return {};
218 }
219
220 std::vector<T> objects;
221 const ObjectInterval interval{addr, addr + size};
222 for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
223 for (auto& cached_object : pair.second) {
224 if (!cached_object) {
225 continue;
226 }
227 objects.push_back(cached_object);
228 }
229 }
230
231 std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
232 return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
233 });
234
235 return objects;
236 }
237
238 using ObjectSet = std::set<T>;
239 using ObjectCache = std::unordered_map<VAddr, T>;
240 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
241 using ObjectInterval = typename IntervalCache::interval_type;
242
243 static auto GetInterval(const T& object) {
244 return ObjectInterval::right_open(object->GetCpuAddr(),
245 object->GetCpuAddr() + object->GetSizeInBytes());
246 }
247
248 ObjectCache map_cache;
249 IntervalCache interval_cache; ///< Cache of objects
250 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
251 VideoCore::RasterizerInterface& rasterizer;
252 std::list<T> marked_for_unregister;
253};
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..eb5158407
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2073 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 ASSERT(component < 4);
43 return component["xyzw"];
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188std::string GlobalMemoryName(const GlobalMemoryBase& base) {
189 return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
190}
191
192class ARBDecompiler final {
193public:
194 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
195 ShaderType stage, std::string_view identifier);
196
197 std::string Code() const {
198 return shader_source;
199 }
200
201private:
202 void DeclareHeader();
203 void DeclareVertex();
204 void DeclareGeometry();
205 void DeclareFragment();
206 void DeclareCompute();
207 void DeclareInputAttributes();
208 void DeclareOutputAttributes();
209 void DeclareLocalMemory();
210 void DeclareGlobalMemory();
211 void DeclareConstantBuffers();
212 void DeclareRegisters();
213 void DeclareTemporaries();
214 void DeclarePredicates();
215 void DeclareInternalFlags();
216
217 void InitializeVariables();
218
219 void DecompileAST();
220 void DecompileBranchMode();
221
222 void VisitAST(const ASTNode& node);
223 std::string VisitExpression(const Expr& node);
224
225 void VisitBlock(const NodeBlock& bb);
226
227 std::string Visit(const Node& node);
228
229 std::pair<std::string, std::size_t> BuildCoords(Operation);
230 std::string BuildAoffi(Operation);
231 void Exit();
232
233 std::string Assign(Operation);
234 std::string Select(Operation);
235 std::string FClamp(Operation);
236 std::string FCastHalf0(Operation);
237 std::string FCastHalf1(Operation);
238 std::string FSqrt(Operation);
239 std::string FSwizzleAdd(Operation);
240 std::string HAdd2(Operation);
241 std::string HMul2(Operation);
242 std::string HFma2(Operation);
243 std::string HAbsolute(Operation);
244 std::string HNegate(Operation);
245 std::string HClamp(Operation);
246 std::string HCastFloat(Operation);
247 std::string HUnpack(Operation);
248 std::string HMergeF32(Operation);
249 std::string HMergeH0(Operation);
250 std::string HMergeH1(Operation);
251 std::string HPack2(Operation);
252 std::string LogicalAssign(Operation);
253 std::string LogicalPick2(Operation);
254 std::string LogicalAnd2(Operation);
255 std::string FloatOrdered(Operation);
256 std::string FloatUnordered(Operation);
257 std::string LogicalAddCarry(Operation);
258 std::string Texture(Operation);
259 std::string TextureGather(Operation);
260 std::string TextureQueryDimensions(Operation);
261 std::string TextureQueryLod(Operation);
262 std::string TexelFetch(Operation);
263 std::string TextureGradient(Operation);
264 std::string ImageLoad(Operation);
265 std::string ImageStore(Operation);
266 std::string Branch(Operation);
267 std::string BranchIndirect(Operation);
268 std::string PushFlowStack(Operation);
269 std::string PopFlowStack(Operation);
270 std::string Exit(Operation);
271 std::string Discard(Operation);
272 std::string EmitVertex(Operation);
273 std::string EndPrimitive(Operation);
274 std::string InvocationId(Operation);
275 std::string YNegate(Operation);
276 std::string ThreadId(Operation);
277 std::string ShuffleIndexed(Operation);
278 std::string Barrier(Operation);
279 std::string MemoryBarrierGroup(Operation);
280 std::string MemoryBarrierGlobal(Operation);
281
282 template <const std::string_view& op>
283 std::string Unary(Operation operation) {
284 std::string temporary = AllocTemporary();
285 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
286 return temporary;
287 }
288
289 template <const std::string_view& op>
290 std::string Binary(Operation operation) {
291 std::string temporary = AllocTemporary();
292 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
293 Visit(operation[1]));
294 return temporary;
295 }
296
297 template <const std::string_view& op>
298 std::string Trinary(Operation operation) {
299 std::string temporary = AllocTemporary();
300 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
301 Visit(operation[1]), Visit(operation[2]));
302 return temporary;
303 }
304
305 template <const std::string_view& op, bool unordered>
306 std::string FloatComparison(Operation operation) {
307 std::string temporary = AllocTemporary();
308 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
309 AddLine("MOV.S {}, 0;", temporary);
310 AddLine("MOV.S {} (NE.x), -1;", temporary);
311
312 const std::string op_a = Visit(operation[0]);
313 const std::string op_b = Visit(operation[1]);
314 if constexpr (unordered) {
315 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
316 AddLine("TRUNC.U.CC RC.x, RC.x;");
317 AddLine("MOV.S {} (NE.x), -1;", temporary);
318 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
319 AddLine("TRUNC.U.CC RC.x, RC.x;");
320 AddLine("MOV.S {} (NE.x), -1;", temporary);
321 } else if (op == SNE_F) {
322 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
323 AddLine("TRUNC.U.CC RC.x, RC.x;");
324 AddLine("MOV.S {} (NE.x), 0;", temporary);
325 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
326 AddLine("TRUNC.U.CC RC.x, RC.x;");
327 AddLine("MOV.S {} (NE.x), 0;", temporary);
328 }
329 return temporary;
330 }
331
332 template <const std::string_view& op, bool is_nan>
333 std::string HalfComparison(Operation operation) {
334 std::string tmp1 = AllocVectorTemporary();
335 const std::string tmp2 = AllocVectorTemporary();
336 const std::string op_a = Visit(operation[0]);
337 const std::string op_b = Visit(operation[1]);
338 AddLine("UP2H.F {}, {};", tmp1, op_a);
339 AddLine("UP2H.F {}, {};", tmp2, op_b);
340 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
341 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
342 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
343 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
344 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
345 if constexpr (is_nan) {
346 AddLine("MOVC.F RC.x, {};", op_a);
347 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
348 AddLine("MOVC.F RC.x, {};", op_b);
349 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
350 }
351 return tmp1;
352 }
353
354 template <const std::string_view& op, const std::string_view& type>
355 std::string AtomicImage(Operation operation) {
356 const auto& meta = std::get<MetaImage>(operation.GetMeta());
357 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
358 const std::size_t num_coords = operation.GetOperandsCount();
359 const std::size_t num_values = meta.values.size();
360
361 const std::string coord = AllocVectorTemporary();
362 const std::string value = AllocVectorTemporary();
363 for (std::size_t i = 0; i < num_coords; ++i) {
364 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
365 }
366 for (std::size_t i = 0; i < num_values; ++i) {
367 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
368 }
369
370 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
371 image_id, ImageType(meta.image.type));
372 return fmt::format("{}.x", coord);
373 }
374
375 template <const std::string_view& op, const std::string_view& type>
376 std::string Atomic(Operation operation) {
377 std::string temporary = AllocTemporary();
378 std::string address;
379 std::string_view opname;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
382 Visit(gmem->GetBaseAddress()));
383 address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
384 opname = "ATOMB";
385 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
386 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
387 opname = "ATOMS";
388 } else {
389 UNREACHABLE();
390 return "{0, 0, 0, 0}";
391 }
392 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
393 return temporary;
394 }
395
396 template <char type>
397 std::string Negate(Operation operation) {
398 std::string temporary = AllocTemporary();
399 if constexpr (type == 'F') {
400 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
401 } else {
402 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
403 }
404 return temporary;
405 }
406
407 template <char type>
408 std::string Absolute(Operation operation) {
409 std::string temporary = AllocTemporary();
410 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
411 return temporary;
412 }
413
414 template <char type>
415 std::string BitfieldInsert(Operation operation) {
416 const std::string temporary = AllocVectorTemporary();
417 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
418 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
419 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
420 Visit(operation[0]));
421 return fmt::format("{}.x", temporary);
422 }
423
424 template <char type>
425 std::string BitfieldExtract(Operation operation) {
426 const std::string temporary = AllocVectorTemporary();
427 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
428 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
429 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
430 return fmt::format("{}.x", temporary);
431 }
432
433 template <char swizzle>
434 std::string LocalInvocationId(Operation) {
435 return fmt::format("invocation.localid.{}", swizzle);
436 }
437
438 template <char swizzle>
439 std::string WorkGroupId(Operation) {
440 return fmt::format("invocation.groupid.{}", swizzle);
441 }
442
443 template <char c1, char c2>
444 std::string ThreadMask(Operation) {
445 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
446 }
447
448 template <typename... Args>
449 void AddExpression(std::string_view text, Args&&... args) {
450 shader_source += fmt::format(text, std::forward<Args>(args)...);
451 }
452
453 template <typename... Args>
454 void AddLine(std::string_view text, Args&&... args) {
455 AddExpression(text, std::forward<Args>(args)...);
456 shader_source += '\n';
457 }
458
459 std::string AllocTemporary() {
460 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
461 return fmt::format("T{}.x", num_temporaries++);
462 }
463
464 std::string AllocVectorTemporary() {
465 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
466 return fmt::format("T{}", num_temporaries++);
467 }
468
469 void ResetTemporaries() noexcept {
470 num_temporaries = 0;
471 }
472
473 const Device& device;
474 const ShaderIR& ir;
475 const Registry& registry;
476 const ShaderType stage;
477
478 std::size_t num_temporaries = 0;
479 std::size_t max_temporaries = 0;
480
481 std::string shader_source;
482
483 static constexpr std::string_view ADD_F32 = "ADD.F32";
484 static constexpr std::string_view ADD_S = "ADD.S";
485 static constexpr std::string_view ADD_U = "ADD.U";
486 static constexpr std::string_view MUL_F32 = "MUL.F32";
487 static constexpr std::string_view MUL_S = "MUL.S";
488 static constexpr std::string_view MUL_U = "MUL.U";
489 static constexpr std::string_view DIV_F32 = "DIV.F32";
490 static constexpr std::string_view DIV_S = "DIV.S";
491 static constexpr std::string_view DIV_U = "DIV.U";
492 static constexpr std::string_view MAD_F32 = "MAD.F32";
493 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
494 static constexpr std::string_view COS_F32 = "COS.F32";
495 static constexpr std::string_view SIN_F32 = "SIN.F32";
496 static constexpr std::string_view EX2_F32 = "EX2.F32";
497 static constexpr std::string_view LG2_F32 = "LG2.F32";
498 static constexpr std::string_view SLT_F = "SLT.F32";
499 static constexpr std::string_view SLT_S = "SLT.S";
500 static constexpr std::string_view SLT_U = "SLT.U";
501 static constexpr std::string_view SEQ_F = "SEQ.F32";
502 static constexpr std::string_view SEQ_S = "SEQ.S";
503 static constexpr std::string_view SEQ_U = "SEQ.U";
504 static constexpr std::string_view SLE_F = "SLE.F32";
505 static constexpr std::string_view SLE_S = "SLE.S";
506 static constexpr std::string_view SLE_U = "SLE.U";
507 static constexpr std::string_view SGT_F = "SGT.F32";
508 static constexpr std::string_view SGT_S = "SGT.S";
509 static constexpr std::string_view SGT_U = "SGT.U";
510 static constexpr std::string_view SNE_F = "SNE.F32";
511 static constexpr std::string_view SNE_S = "SNE.S";
512 static constexpr std::string_view SNE_U = "SNE.U";
513 static constexpr std::string_view SGE_F = "SGE.F32";
514 static constexpr std::string_view SGE_S = "SGE.S";
515 static constexpr std::string_view SGE_U = "SGE.U";
516 static constexpr std::string_view AND_S = "AND.S";
517 static constexpr std::string_view AND_U = "AND.U";
518 static constexpr std::string_view TRUNC_F = "TRUNC.F";
519 static constexpr std::string_view TRUNC_S = "TRUNC.S";
520 static constexpr std::string_view TRUNC_U = "TRUNC.U";
521 static constexpr std::string_view SHL_S = "SHL.S";
522 static constexpr std::string_view SHL_U = "SHL.U";
523 static constexpr std::string_view SHR_S = "SHR.S";
524 static constexpr std::string_view SHR_U = "SHR.U";
525 static constexpr std::string_view OR_S = "OR.S";
526 static constexpr std::string_view OR_U = "OR.U";
527 static constexpr std::string_view XOR_S = "XOR.S";
528 static constexpr std::string_view XOR_U = "XOR.U";
529 static constexpr std::string_view NOT_S = "NOT.S";
530 static constexpr std::string_view NOT_U = "NOT.U";
531 static constexpr std::string_view BTC_S = "BTC.S";
532 static constexpr std::string_view BTC_U = "BTC.U";
533 static constexpr std::string_view BTFM_S = "BTFM.S";
534 static constexpr std::string_view BTFM_U = "BTFM.U";
535 static constexpr std::string_view ROUND_F = "ROUND.F";
536 static constexpr std::string_view CEIL_F = "CEIL.F";
537 static constexpr std::string_view FLR_F = "FLR.F";
538 static constexpr std::string_view I2F_S = "I2F.S";
539 static constexpr std::string_view I2F_U = "I2F.U";
540 static constexpr std::string_view MIN_F = "MIN.F";
541 static constexpr std::string_view MIN_S = "MIN.S";
542 static constexpr std::string_view MIN_U = "MIN.U";
543 static constexpr std::string_view MAX_F = "MAX.F";
544 static constexpr std::string_view MAX_S = "MAX.S";
545 static constexpr std::string_view MAX_U = "MAX.U";
546 static constexpr std::string_view MOV_U = "MOV.U";
547 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
548 static constexpr std::string_view TGALL_U = "TGALL.U";
549 static constexpr std::string_view TGANY_U = "TGANY.U";
550 static constexpr std::string_view TGEQ_U = "TGEQ.U";
551 static constexpr std::string_view EXCH = "EXCH";
552 static constexpr std::string_view ADD = "ADD";
553 static constexpr std::string_view MIN = "MIN";
554 static constexpr std::string_view MAX = "MAX";
555 static constexpr std::string_view AND = "AND";
556 static constexpr std::string_view OR = "OR";
557 static constexpr std::string_view XOR = "XOR";
558 static constexpr std::string_view U32 = "U32";
559 static constexpr std::string_view S32 = "S32";
560
561 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
562 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
563 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
564 &ARBDecompiler::Assign,
565
566 &ARBDecompiler::Select,
567
568 &ARBDecompiler::Binary<ADD_F32>,
569 &ARBDecompiler::Binary<MUL_F32>,
570 &ARBDecompiler::Binary<DIV_F32>,
571 &ARBDecompiler::Trinary<MAD_F32>,
572 &ARBDecompiler::Negate<'F'>,
573 &ARBDecompiler::Absolute<'F'>,
574 &ARBDecompiler::FClamp,
575 &ARBDecompiler::FCastHalf0,
576 &ARBDecompiler::FCastHalf1,
577 &ARBDecompiler::Binary<MIN_F>,
578 &ARBDecompiler::Binary<MAX_F>,
579 &ARBDecompiler::Unary<COS_F32>,
580 &ARBDecompiler::Unary<SIN_F32>,
581 &ARBDecompiler::Unary<EX2_F32>,
582 &ARBDecompiler::Unary<LG2_F32>,
583 &ARBDecompiler::Unary<RSQ_F32>,
584 &ARBDecompiler::FSqrt,
585 &ARBDecompiler::Unary<ROUND_F>,
586 &ARBDecompiler::Unary<FLR_F>,
587 &ARBDecompiler::Unary<CEIL_F>,
588 &ARBDecompiler::Unary<TRUNC_F>,
589 &ARBDecompiler::Unary<I2F_S>,
590 &ARBDecompiler::Unary<I2F_U>,
591 &ARBDecompiler::FSwizzleAdd,
592
593 &ARBDecompiler::Binary<ADD_S>,
594 &ARBDecompiler::Binary<MUL_S>,
595 &ARBDecompiler::Binary<DIV_S>,
596 &ARBDecompiler::Negate<'S'>,
597 &ARBDecompiler::Absolute<'S'>,
598 &ARBDecompiler::Binary<MIN_S>,
599 &ARBDecompiler::Binary<MAX_S>,
600
601 &ARBDecompiler::Unary<TRUNC_S>,
602 &ARBDecompiler::Unary<MOV_U>,
603 &ARBDecompiler::Binary<SHL_S>,
604 &ARBDecompiler::Binary<SHR_U>,
605 &ARBDecompiler::Binary<SHR_S>,
606 &ARBDecompiler::Binary<AND_S>,
607 &ARBDecompiler::Binary<OR_S>,
608 &ARBDecompiler::Binary<XOR_S>,
609 &ARBDecompiler::Unary<NOT_S>,
610 &ARBDecompiler::BitfieldInsert<'S'>,
611 &ARBDecompiler::BitfieldExtract<'S'>,
612 &ARBDecompiler::Unary<BTC_S>,
613 &ARBDecompiler::Unary<BTFM_S>,
614
615 &ARBDecompiler::Binary<ADD_U>,
616 &ARBDecompiler::Binary<MUL_U>,
617 &ARBDecompiler::Binary<DIV_U>,
618 &ARBDecompiler::Binary<MIN_U>,
619 &ARBDecompiler::Binary<MAX_U>,
620 &ARBDecompiler::Unary<TRUNC_U>,
621 &ARBDecompiler::Unary<MOV_U>,
622 &ARBDecompiler::Binary<SHL_U>,
623 &ARBDecompiler::Binary<SHR_U>,
624 &ARBDecompiler::Binary<SHR_U>,
625 &ARBDecompiler::Binary<AND_U>,
626 &ARBDecompiler::Binary<OR_U>,
627 &ARBDecompiler::Binary<XOR_U>,
628 &ARBDecompiler::Unary<NOT_U>,
629 &ARBDecompiler::BitfieldInsert<'U'>,
630 &ARBDecompiler::BitfieldExtract<'U'>,
631 &ARBDecompiler::Unary<BTC_U>,
632 &ARBDecompiler::Unary<BTFM_U>,
633
634 &ARBDecompiler::HAdd2,
635 &ARBDecompiler::HMul2,
636 &ARBDecompiler::HFma2,
637 &ARBDecompiler::HAbsolute,
638 &ARBDecompiler::HNegate,
639 &ARBDecompiler::HClamp,
640 &ARBDecompiler::HCastFloat,
641 &ARBDecompiler::HUnpack,
642 &ARBDecompiler::HMergeF32,
643 &ARBDecompiler::HMergeH0,
644 &ARBDecompiler::HMergeH1,
645 &ARBDecompiler::HPack2,
646
647 &ARBDecompiler::LogicalAssign,
648 &ARBDecompiler::Binary<AND_U>,
649 &ARBDecompiler::Binary<OR_U>,
650 &ARBDecompiler::Binary<XOR_U>,
651 &ARBDecompiler::Unary<NOT_U>,
652 &ARBDecompiler::LogicalPick2,
653 &ARBDecompiler::LogicalAnd2,
654
655 &ARBDecompiler::FloatComparison<SLT_F, false>,
656 &ARBDecompiler::FloatComparison<SEQ_F, false>,
657 &ARBDecompiler::FloatComparison<SLE_F, false>,
658 &ARBDecompiler::FloatComparison<SGT_F, false>,
659 &ARBDecompiler::FloatComparison<SNE_F, false>,
660 &ARBDecompiler::FloatComparison<SGE_F, false>,
661 &ARBDecompiler::FloatOrdered,
662 &ARBDecompiler::FloatUnordered,
663 &ARBDecompiler::FloatComparison<SLT_F, true>,
664 &ARBDecompiler::FloatComparison<SEQ_F, true>,
665 &ARBDecompiler::FloatComparison<SLE_F, true>,
666 &ARBDecompiler::FloatComparison<SGT_F, true>,
667 &ARBDecompiler::FloatComparison<SNE_F, true>,
668 &ARBDecompiler::FloatComparison<SGE_F, true>,
669
670 &ARBDecompiler::Binary<SLT_S>,
671 &ARBDecompiler::Binary<SEQ_S>,
672 &ARBDecompiler::Binary<SLE_S>,
673 &ARBDecompiler::Binary<SGT_S>,
674 &ARBDecompiler::Binary<SNE_S>,
675 &ARBDecompiler::Binary<SGE_S>,
676
677 &ARBDecompiler::Binary<SLT_U>,
678 &ARBDecompiler::Binary<SEQ_U>,
679 &ARBDecompiler::Binary<SLE_U>,
680 &ARBDecompiler::Binary<SGT_U>,
681 &ARBDecompiler::Binary<SNE_U>,
682 &ARBDecompiler::Binary<SGE_U>,
683
684 &ARBDecompiler::LogicalAddCarry,
685
686 &ARBDecompiler::HalfComparison<SLT_F, false>,
687 &ARBDecompiler::HalfComparison<SEQ_F, false>,
688 &ARBDecompiler::HalfComparison<SLE_F, false>,
689 &ARBDecompiler::HalfComparison<SGT_F, false>,
690 &ARBDecompiler::HalfComparison<SNE_F, false>,
691 &ARBDecompiler::HalfComparison<SGE_F, false>,
692 &ARBDecompiler::HalfComparison<SLT_F, true>,
693 &ARBDecompiler::HalfComparison<SEQ_F, true>,
694 &ARBDecompiler::HalfComparison<SLE_F, true>,
695 &ARBDecompiler::HalfComparison<SGT_F, true>,
696 &ARBDecompiler::HalfComparison<SNE_F, true>,
697 &ARBDecompiler::HalfComparison<SGE_F, true>,
698
699 &ARBDecompiler::Texture,
700 &ARBDecompiler::Texture,
701 &ARBDecompiler::TextureGather,
702 &ARBDecompiler::TextureQueryDimensions,
703 &ARBDecompiler::TextureQueryLod,
704 &ARBDecompiler::TexelFetch,
705 &ARBDecompiler::TextureGradient,
706
707 &ARBDecompiler::ImageLoad,
708 &ARBDecompiler::ImageStore,
709
710 &ARBDecompiler::AtomicImage<ADD, U32>,
711 &ARBDecompiler::AtomicImage<AND, U32>,
712 &ARBDecompiler::AtomicImage<OR, U32>,
713 &ARBDecompiler::AtomicImage<XOR, U32>,
714 &ARBDecompiler::AtomicImage<EXCH, U32>,
715
716 &ARBDecompiler::Atomic<EXCH, U32>,
717 &ARBDecompiler::Atomic<ADD, U32>,
718 &ARBDecompiler::Atomic<MIN, U32>,
719 &ARBDecompiler::Atomic<MAX, U32>,
720 &ARBDecompiler::Atomic<AND, U32>,
721 &ARBDecompiler::Atomic<OR, U32>,
722 &ARBDecompiler::Atomic<XOR, U32>,
723
724 &ARBDecompiler::Atomic<EXCH, S32>,
725 &ARBDecompiler::Atomic<ADD, S32>,
726 &ARBDecompiler::Atomic<MIN, S32>,
727 &ARBDecompiler::Atomic<MAX, S32>,
728 &ARBDecompiler::Atomic<AND, S32>,
729 &ARBDecompiler::Atomic<OR, S32>,
730 &ARBDecompiler::Atomic<XOR, S32>,
731
732 &ARBDecompiler::Atomic<ADD, U32>,
733 &ARBDecompiler::Atomic<MIN, U32>,
734 &ARBDecompiler::Atomic<MAX, U32>,
735 &ARBDecompiler::Atomic<AND, U32>,
736 &ARBDecompiler::Atomic<OR, U32>,
737 &ARBDecompiler::Atomic<XOR, U32>,
738
739 &ARBDecompiler::Atomic<ADD, S32>,
740 &ARBDecompiler::Atomic<MIN, S32>,
741 &ARBDecompiler::Atomic<MAX, S32>,
742 &ARBDecompiler::Atomic<AND, S32>,
743 &ARBDecompiler::Atomic<OR, S32>,
744 &ARBDecompiler::Atomic<XOR, S32>,
745
746 &ARBDecompiler::Branch,
747 &ARBDecompiler::BranchIndirect,
748 &ARBDecompiler::PushFlowStack,
749 &ARBDecompiler::PopFlowStack,
750 &ARBDecompiler::Exit,
751 &ARBDecompiler::Discard,
752
753 &ARBDecompiler::EmitVertex,
754 &ARBDecompiler::EndPrimitive,
755
756 &ARBDecompiler::InvocationId,
757 &ARBDecompiler::YNegate,
758 &ARBDecompiler::LocalInvocationId<'x'>,
759 &ARBDecompiler::LocalInvocationId<'y'>,
760 &ARBDecompiler::LocalInvocationId<'z'>,
761 &ARBDecompiler::WorkGroupId<'x'>,
762 &ARBDecompiler::WorkGroupId<'y'>,
763 &ARBDecompiler::WorkGroupId<'z'>,
764
765 &ARBDecompiler::Unary<TGBALLOT_U>,
766 &ARBDecompiler::Unary<TGALL_U>,
767 &ARBDecompiler::Unary<TGANY_U>,
768 &ARBDecompiler::Unary<TGEQ_U>,
769
770 &ARBDecompiler::ThreadId,
771 &ARBDecompiler::ThreadMask<'e', 'q'>,
772 &ARBDecompiler::ThreadMask<'g', 'e'>,
773 &ARBDecompiler::ThreadMask<'g', 't'>,
774 &ARBDecompiler::ThreadMask<'l', 'e'>,
775 &ARBDecompiler::ThreadMask<'l', 't'>,
776 &ARBDecompiler::ShuffleIndexed,
777
778 &ARBDecompiler::Barrier,
779 &ARBDecompiler::MemoryBarrierGroup,
780 &ARBDecompiler::MemoryBarrierGlobal,
781 };
782};
783
784ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
785 ShaderType stage, std::string_view identifier)
786 : device{device}, ir{ir}, registry{registry}, stage{stage} {
787 AddLine("TEMP RC;");
788 AddLine("TEMP FSWZA[4];");
789 AddLine("TEMP FSWZB[4];");
790 if (ir.IsDecompiled()) {
791 DecompileAST();
792 } else {
793 DecompileBranchMode();
794 }
795 AddLine("END");
796
797 const std::string code = std::move(shader_source);
798 DeclareHeader();
799 DeclareVertex();
800 DeclareGeometry();
801 DeclareFragment();
802 DeclareCompute();
803 DeclareInputAttributes();
804 DeclareOutputAttributes();
805 DeclareLocalMemory();
806 DeclareGlobalMemory();
807 DeclareConstantBuffers();
808 DeclareRegisters();
809 DeclareTemporaries();
810 DeclarePredicates();
811 DeclareInternalFlags();
812
813 shader_source += code;
814}
815
816std::string_view HeaderStageName(ShaderType stage) {
817 switch (stage) {
818 case ShaderType::Vertex:
819 return "vp";
820 case ShaderType::Geometry:
821 return "gp";
822 case ShaderType::Fragment:
823 return "fp";
824 case ShaderType::Compute:
825 return "cp";
826 default:
827 UNREACHABLE();
828 return "";
829 }
830}
831
832void ARBDecompiler::DeclareHeader() {
833 AddLine("!!NV{}5.0", HeaderStageName(stage));
834 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
835 AddLine("OPTION NV_internal;");
836 AddLine("OPTION NV_gpu_program_fp64;");
837 AddLine("OPTION NV_shader_storage_buffer;");
838 AddLine("OPTION NV_shader_thread_group;");
839 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
840 AddLine("OPTION NV_shader_thread_shuffle;");
841 }
842 if (stage == ShaderType::Vertex) {
843 if (device.HasNvViewportArray2()) {
844 AddLine("OPTION NV_viewport_array2;");
845 }
846 }
847 if (stage == ShaderType::Fragment) {
848 AddLine("OPTION ARB_draw_buffers;");
849 }
850 if (device.HasImageLoadFormatted()) {
851 AddLine("OPTION EXT_shader_image_load_formatted;");
852 }
853}
854
855void ARBDecompiler::DeclareVertex() {
856 if (stage != ShaderType::Vertex) {
857 return;
858 }
859 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
860}
861
862void ARBDecompiler::DeclareGeometry() {
863 if (stage != ShaderType::Geometry) {
864 return;
865 }
866 const auto& info = registry.GetGraphicsInfo();
867 const auto& header = ir.GetHeader();
868 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
869 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
870 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
871 AddLine("ATTRIB vertex_position = vertex.position;");
872}
873
874void ARBDecompiler::DeclareFragment() {
875 if (stage != ShaderType::Fragment) {
876 return;
877 }
878 AddLine("OUTPUT result_color7 = result.color[7];");
879 AddLine("OUTPUT result_color6 = result.color[6];");
880 AddLine("OUTPUT result_color5 = result.color[5];");
881 AddLine("OUTPUT result_color4 = result.color[4];");
882 AddLine("OUTPUT result_color3 = result.color[3];");
883 AddLine("OUTPUT result_color2 = result.color[2];");
884 AddLine("OUTPUT result_color1 = result.color[1];");
885 AddLine("OUTPUT result_color0 = result.color;");
886}
887
888void ARBDecompiler::DeclareCompute() {
889 if (stage != ShaderType::Compute) {
890 return;
891 }
892 const ComputeInfo& info = registry.GetComputeInfo();
893 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
894 info.workgroup_size[2]);
895 if (info.shared_memory_size_in_words > 0) {
896 const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
897 AddLine("SHARED_MEMORY {};", size_in_bytes);
898 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
899 }
900}
901
902void ARBDecompiler::DeclareInputAttributes() {
903 if (stage == ShaderType::Compute) {
904 return;
905 }
906 const std::string_view stage_name = StageInputName(stage);
907 for (const auto attribute : ir.GetInputAttributes()) {
908 if (!IsGenericAttribute(attribute)) {
909 continue;
910 }
911 const u32 index = GetGenericAttributeIndex(attribute);
912
913 std::string_view suffix;
914 if (stage == ShaderType::Fragment) {
915 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
916 if (input_mode == PixelImap::Unused) {
917 return;
918 }
919 suffix = GetInputFlags(input_mode);
920 }
921 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
922 index);
923 }
924}
925
926void ARBDecompiler::DeclareOutputAttributes() {
927 if (stage == ShaderType::Compute) {
928 return;
929 }
930 for (const auto attribute : ir.GetOutputAttributes()) {
931 if (!IsGenericAttribute(attribute)) {
932 continue;
933 }
934 const u32 index = GetGenericAttributeIndex(attribute);
935 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
936 }
937}
938
939void ARBDecompiler::DeclareLocalMemory() {
940 u64 size = 0;
941 if (stage == ShaderType::Compute) {
942 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
943 } else {
944 size = ir.GetHeader().GetLocalMemorySize();
945 }
946 if (size == 0) {
947 return;
948 }
949 const u64 element_count = Common::AlignUp(size, 4) / 4;
950 AddLine("TEMP lmem[{}];", element_count);
951}
952
953void ARBDecompiler::DeclareGlobalMemory() {
954 u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
955 for (const auto& pair : ir.GetGlobalMemory()) {
956 const auto& base = pair.first;
957 AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
958 ++binding;
959 }
960}
961
962void ARBDecompiler::DeclareConstantBuffers() {
963 u32 binding = 0;
964 for (const auto& cbuf : ir.GetConstantBuffers()) {
965 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
966 ++binding;
967 }
968}
969
970void ARBDecompiler::DeclareRegisters() {
971 for (const u32 gpr : ir.GetRegisters()) {
972 AddLine("TEMP R{};", gpr);
973 }
974}
975
976void ARBDecompiler::DeclareTemporaries() {
977 for (std::size_t i = 0; i < max_temporaries; ++i) {
978 AddLine("TEMP T{};", i);
979 }
980}
981
982void ARBDecompiler::DeclarePredicates() {
983 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
984 AddLine("TEMP P{};", static_cast<u64>(pred));
985 }
986}
987
988void ARBDecompiler::DeclareInternalFlags() {
989 for (const char* name : INTERNAL_FLAG_NAMES) {
990 AddLine("TEMP {};", name);
991 }
992}
993
994void ARBDecompiler::InitializeVariables() {
995 AddLine("MOV.F32 FSWZA[0], -1;");
996 AddLine("MOV.F32 FSWZA[1], 1;");
997 AddLine("MOV.F32 FSWZA[2], -1;");
998 AddLine("MOV.F32 FSWZA[3], 0;");
999 AddLine("MOV.F32 FSWZB[0], -1;");
1000 AddLine("MOV.F32 FSWZB[1], -1;");
1001 AddLine("MOV.F32 FSWZB[2], 1;");
1002 AddLine("MOV.F32 FSWZB[3], -1;");
1003
1004 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1005 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1006 }
1007 for (const auto attribute : ir.GetOutputAttributes()) {
1008 if (!IsGenericAttribute(attribute)) {
1009 continue;
1010 }
1011 const u32 index = GetGenericAttributeIndex(attribute);
1012 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1013 }
1014 for (const u32 gpr : ir.GetRegisters()) {
1015 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1016 }
1017 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1018 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1019 }
1020}
1021
1022void ARBDecompiler::DecompileAST() {
1023 const u32 num_flow_variables = ir.GetASTNumVariables();
1024 for (u32 i = 0; i < num_flow_variables; ++i) {
1025 AddLine("TEMP F{};", i);
1026 }
1027 for (u32 i = 0; i < num_flow_variables; ++i) {
1028 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1029 }
1030
1031 InitializeVariables();
1032
1033 VisitAST(ir.GetASTProgram());
1034}
1035
1036void ARBDecompiler::DecompileBranchMode() {
1037 static constexpr u32 FLOW_STACK_SIZE = 20;
1038 if (!ir.IsFlowStackDisabled()) {
1039 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1040 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1041 AddLine("TEMP SSY_TOP;");
1042 AddLine("TEMP PBK_TOP;");
1043 }
1044
1045 AddLine("TEMP PC;");
1046
1047 if (!ir.IsFlowStackDisabled()) {
1048 AddLine("MOV.U SSY_TOP.x, 0;");
1049 AddLine("MOV.U PBK_TOP.x, 0;");
1050 }
1051
1052 InitializeVariables();
1053
1054 const auto basic_block_end = ir.GetBasicBlocks().end();
1055 auto basic_block_it = ir.GetBasicBlocks().begin();
1056 const u32 first_address = basic_block_it->first;
1057 AddLine("MOV.U PC.x, {};", first_address);
1058
1059 AddLine("REP;");
1060
1061 std::size_t num_blocks = 0;
1062 while (basic_block_it != basic_block_end) {
1063 const auto& [address, bb] = *basic_block_it;
1064 ++num_blocks;
1065
1066 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1067 AddLine("IF NE.x;");
1068
1069 VisitBlock(bb);
1070
1071 ++basic_block_it;
1072
1073 if (basic_block_it != basic_block_end) {
1074 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1075 if (!op || op->GetCode() != OperationCode::Branch) {
1076 const u32 next_address = basic_block_it->first;
1077 AddLine("MOV.U PC.x, {};", next_address);
1078 AddLine("CONT;");
1079 }
1080 }
1081
1082 AddLine("ELSE;");
1083 }
1084 AddLine("RET;");
1085 while (num_blocks--) {
1086 AddLine("ENDIF;");
1087 }
1088
1089 AddLine("ENDREP;");
1090}
1091
1092void ARBDecompiler::VisitAST(const ASTNode& node) {
1093 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1094 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1095 VisitAST(current);
1096 }
1097 } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1098 const std::string condition = VisitExpression(ast->condition);
1099 ResetTemporaries();
1100
1101 AddLine("MOVC.U RC.x, {};", condition);
1102 AddLine("IF NE.x;");
1103 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1104 VisitAST(current);
1105 }
1106 AddLine("ENDIF;");
1107 } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1108 AddLine("ELSE;");
1109 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1110 VisitAST(current);
1111 }
1112 } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1113 VisitBlock(ast->nodes);
1114 } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1115 AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
1116 ResetTemporaries();
1117 } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1118 const std::string condition = VisitExpression(ast->condition);
1119 ResetTemporaries();
1120 AddLine("REP;");
1121 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1122 VisitAST(current);
1123 }
1124 AddLine("MOVC.U RC.x, {};", condition);
1125 AddLine("BRK (NE.x);");
1126 AddLine("ENDREP;");
1127 } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1128 const bool is_true = ExprIsTrue(ast->condition);
1129 if (!is_true) {
1130 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1131 AddLine("IF NE.x;");
1132 ResetTemporaries();
1133 }
1134 if (ast->kills) {
1135 AddLine("KIL TR;");
1136 } else {
1137 Exit();
1138 }
1139 if (!is_true) {
1140 AddLine("ENDIF;");
1141 }
1142 } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1143 if (ExprIsTrue(ast->condition)) {
1144 AddLine("BRK;");
1145 } else {
1146 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1147 AddLine("BRK (NE.x);");
1148 ResetTemporaries();
1149 }
1150 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1151 // Nothing to do
1152 } else {
1153 UNREACHABLE();
1154 }
1155}
1156
1157std::string ARBDecompiler::VisitExpression(const Expr& node) {
1158 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1159 std::string result = AllocTemporary();
1160 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1161 VisitExpression(expr->operand2));
1162 return result;
1163 }
1164 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1165 std::string result = AllocTemporary();
1166 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1167 VisitExpression(expr->operand2));
1168 return result;
1169 }
1170 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1171 std::string result = AllocTemporary();
1172 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1173 return result;
1174 }
1175 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1176 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1177 }
1178 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1179 return Visit(ir.GetConditionCode(expr->cc));
1180 }
1181 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1182 return fmt::format("F{}.x", expr->var_index);
1183 }
1184 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1185 return expr->value ? "0xffffffff" : "0";
1186 }
1187 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1188 std::string result = AllocTemporary();
1189 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1190 return result;
1191 }
1192 UNREACHABLE();
1193 return "0";
1194}
1195
1196void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1197 for (const auto& node : bb) {
1198 Visit(node);
1199 }
1200}
1201
1202std::string ARBDecompiler::Visit(const Node& node) {
1203 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1204 if (const auto amend_index = operation->GetAmendIndex()) {
1205 Visit(ir.GetAmendNode(*amend_index));
1206 }
1207 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1208 if (index >= OPERATION_DECOMPILERS.size()) {
1209 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1210 return {};
1211 }
1212 const auto decompiler = OPERATION_DECOMPILERS[index];
1213 if (decompiler == nullptr) {
1214 UNREACHABLE_MSG("Undefined operation: {}", index);
1215 return {};
1216 }
1217 return (this->*decompiler)(*operation);
1218 }
1219
1220 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1221 const u32 index = gpr->GetIndex();
1222 if (index == Register::ZeroIndex) {
1223 return "{0, 0, 0, 0}.x";
1224 }
1225 return fmt::format("R{}.x", index);
1226 }
1227
1228 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1229 return fmt::format("CV{}.x", cv->GetIndex());
1230 }
1231
1232 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1233 std::string temporary = AllocTemporary();
1234 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1235 return temporary;
1236 }
1237
1238 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1239 std::string temporary = AllocTemporary();
1240 switch (const auto index = predicate->GetIndex(); index) {
1241 case Tegra::Shader::Pred::UnusedIndex:
1242 AddLine("MOV.S {}, -1;", temporary);
1243 break;
1244 case Tegra::Shader::Pred::NeverExecute:
1245 AddLine("MOV.S {}, 0;", temporary);
1246 break;
1247 default:
1248 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1249 break;
1250 }
1251 if (predicate->IsNegated()) {
1252 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1253 }
1254 return temporary;
1255 }
1256
1257 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1258 if (abuf->IsPhysicalBuffer()) {
1259 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1260 return "{0, 0, 0, 0}.x";
1261 }
1262
1263 const auto buffer_index = [this, &abuf]() -> std::string {
1264 if (stage != ShaderType::Geometry) {
1265 return "";
1266 }
1267 return fmt::format("[{}]", Visit(abuf->GetBuffer()));
1268 };
1269
1270 const Attribute::Index index = abuf->GetIndex();
1271 const u32 element = abuf->GetElement();
1272 const char swizzle = Swizzle(element);
1273 switch (index) {
1274 case Attribute::Index::Position: {
1275 if (stage == ShaderType::Geometry) {
1276 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1277 Visit(abuf->GetBuffer()), swizzle);
1278 } else {
1279 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1280 }
1281 }
1282 case Attribute::Index::TessCoordInstanceIDVertexID:
1283 ASSERT(stage == ShaderType::Vertex);
1284 switch (element) {
1285 case 2:
1286 return "vertex.instance";
1287 case 3:
1288 return "vertex.id";
1289 }
1290 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1291 break;
1292 case Attribute::Index::PointCoord:
1293 switch (element) {
1294 case 0:
1295 return "fragment.pointcoord.x";
1296 case 1:
1297 return "fragment.pointcoord.y";
1298 }
1299 UNIMPLEMENTED();
1300 break;
1301 case Attribute::Index::FrontFacing: {
1302 ASSERT(stage == ShaderType::Fragment);
1303 ASSERT(element == 3);
1304 const std::string temporary = AllocVectorTemporary();
1305 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1306 AddLine("MOV.U.CC RC.x, -RC;");
1307 AddLine("MOV.S {}.x, 0;", temporary);
1308 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1309 return fmt::format("{}.x", temporary);
1310 }
1311 default:
1312 if (IsGenericAttribute(index)) {
1313 if (stage == ShaderType::Geometry) {
1314 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1315 Visit(abuf->GetBuffer()), swizzle);
1316 } else {
1317 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1318 GetGenericAttributeIndex(index), swizzle);
1319 }
1320 }
1321 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
1322 break;
1323 }
1324 return "{0, 0, 0, 0}.x";
1325 }
1326
1327 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1328 std::string offset_string;
1329 const auto& offset = cbuf->GetOffset();
1330 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1331 offset_string = std::to_string(imm->GetValue());
1332 } else {
1333 offset_string = Visit(offset);
1334 }
1335 std::string temporary = AllocTemporary();
1336 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1337 return temporary;
1338 }
1339
1340 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1341 std::string temporary = AllocTemporary();
1342 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1343 Visit(gmem->GetBaseAddress()));
1344 AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
1345 temporary);
1346 return temporary;
1347 }
1348
1349 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1350 std::string temporary = Visit(lmem->GetAddress());
1351 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1352 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1353 return temporary;
1354 }
1355
1356 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1357 std::string temporary = Visit(smem->GetAddress());
1358 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1359 return temporary;
1360 }
1361
1362 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1363 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1364 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1365 }
1366
1367 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1368 if (const auto amend_index = conditional->GetAmendIndex()) {
1369 Visit(ir.GetAmendNode(*amend_index));
1370 }
1371 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1372 AddLine("IF NE.x;");
1373 VisitBlock(conditional->GetCode());
1374 AddLine("ENDIF;");
1375 return {};
1376 }
1377
1378 if (const auto cmt = std::get_if<CommentNode>(&*node)) {
1379 // Uncommenting this will generate invalid code. GLASM lacks comments.
1380 // AddLine("// {}", cmt->GetText());
1381 return {};
1382 }
1383
1384 UNIMPLEMENTED();
1385 return {};
1386}
1387
1388std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1389 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1390 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1391 UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
1392 meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
1393
1394 const std::size_t count = operation.GetOperandsCount();
1395 std::string temporary = AllocVectorTemporary();
1396 std::size_t i = 0;
1397 for (; i < count; ++i) {
1398 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1399 }
1400 if (meta.sampler.is_array) {
1401 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
1402 }
1403 if (meta.sampler.is_shadow) {
1404 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
1405 }
1406 return {std::move(temporary), i};
1407}
1408
1409std::string ARBDecompiler::BuildAoffi(Operation operation) {
1410 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1411 if (meta.aoffi.empty()) {
1412 return {};
1413 }
1414 const std::string temporary = AllocVectorTemporary();
1415 std::size_t i = 0;
1416 for (auto& node : meta.aoffi) {
1417 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1418 }
1419 return fmt::format(", offset({})", temporary);
1420}
1421
1422void ARBDecompiler::Exit() {
1423 if (stage != ShaderType::Fragment) {
1424 AddLine("RET;");
1425 return;
1426 }
1427
1428 const auto safe_get_register = [this](u32 reg) -> std::string {
1429 // TODO(Rodrigo): Replace with contains once C++20 releases
1430 const auto& used_registers = ir.GetRegisters();
1431 if (used_registers.find(reg) != used_registers.end()) {
1432 return fmt::format("R{}.x", reg);
1433 }
1434 return "{0, 0, 0, 0}.x";
1435 };
1436
1437 const auto& header = ir.GetHeader();
1438 u32 current_reg = 0;
1439 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1440 for (u32 component = 0; component < 4; ++component) {
1441 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1442 continue;
1443 }
1444 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1445 safe_get_register(current_reg));
1446 ++current_reg;
1447 }
1448 }
1449 if (header.ps.omap.depth) {
1450 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1451 }
1452
1453 AddLine("RET;");
1454}
1455
1456std::string ARBDecompiler::Assign(Operation operation) {
1457 const Node& dest = operation[0];
1458 const Node& src = operation[1];
1459
1460 std::string dest_name;
1461 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1462 if (gpr->GetIndex() == Register::ZeroIndex) {
1463 // Writing to Register::ZeroIndex is a no op
1464 return {};
1465 }
1466 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1467 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1468 const u32 element = abuf->GetElement();
1469 const char swizzle = Swizzle(element);
1470 switch (const Attribute::Index index = abuf->GetIndex()) {
1471 case Attribute::Index::Position:
1472 dest_name = fmt::format("result.position.{}", swizzle);
1473 break;
1474 case Attribute::Index::LayerViewportPointSize:
1475 switch (element) {
1476 case 0:
1477 UNIMPLEMENTED();
1478 return {};
1479 case 1:
1480 case 2:
1481 if (!device.HasNvViewportArray2()) {
1482 LOG_ERROR(
1483 Render_OpenGL,
1484 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1485 return {};
1486 }
1487 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1488 break;
1489 case 3:
1490 dest_name = "result.pointsize.x";
1491 break;
1492 }
1493 break;
1494 case Attribute::Index::ClipDistances0123:
1495 dest_name = fmt::format("result.clip[{}].x", element);
1496 break;
1497 case Attribute::Index::ClipDistances4567:
1498 dest_name = fmt::format("result.clip[{}].x", element + 4);
1499 break;
1500 default:
1501 if (!IsGenericAttribute(index)) {
1502 UNREACHABLE();
1503 return {};
1504 }
1505 dest_name =
1506 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1507 break;
1508 }
1509 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1510 const std::string address = Visit(lmem->GetAddress());
1511 AddLine("SHR.U {}, {}, 2;", address, address);
1512 dest_name = fmt::format("lmem[{}].x", address);
1513 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1514 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1515 ResetTemporaries();
1516 return {};
1517 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1518 const std::string temporary = AllocTemporary();
1519 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1520 Visit(gmem->GetBaseAddress()));
1521 AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
1522 temporary);
1523 ResetTemporaries();
1524 return {};
1525 } else {
1526 UNREACHABLE();
1527 ResetTemporaries();
1528 return {};
1529 }
1530
1531 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1532 ResetTemporaries();
1533 return {};
1534}
1535
1536std::string ARBDecompiler::Select(Operation operation) {
1537 std::string temporary = AllocTemporary();
1538 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1539 Visit(operation[2]));
1540 return temporary;
1541}
1542
1543std::string ARBDecompiler::FClamp(Operation operation) {
1544 // 1.0f in hex, replace with std::bit_cast on C++20
1545 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1546
1547 std::string temporary = AllocTemporary();
1548 const Node& value = operation[0];
1549 const Node& low = operation[1];
1550 const Node& high = operation[2];
1551 const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
1552 const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
1553 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1554 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1555 } else {
1556 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1557 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1558 }
1559 return temporary;
1560}
1561
1562std::string ARBDecompiler::FCastHalf0(Operation operation) {
1563 const std::string temporary = AllocVectorTemporary();
1564 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1565 return fmt::format("{}.x", temporary);
1566}
1567
1568std::string ARBDecompiler::FCastHalf1(Operation operation) {
1569 const std::string temporary = AllocVectorTemporary();
1570 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1571 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1572 return fmt::format("{}.x", temporary);
1573}
1574
1575std::string ARBDecompiler::FSqrt(Operation operation) {
1576 std::string temporary = AllocTemporary();
1577 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1578 AddLine("RCP.F32 {}, {};", temporary, temporary);
1579 return temporary;
1580}
1581
1582std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1583 const std::string temporary = AllocVectorTemporary();
1584 if (!device.HasWarpIntrinsics()) {
1585 LOG_ERROR(Render_OpenGL,
1586 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1587 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1588 return fmt::format("{}.x", temporary);
1589 }
1590
1591 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1592 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1593 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1594 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1595 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1596 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1597 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1598 return fmt::format("{}.x", temporary);
1599}
1600
1601std::string ARBDecompiler::HAdd2(Operation operation) {
1602 const std::string tmp1 = AllocVectorTemporary();
1603 const std::string tmp2 = AllocVectorTemporary();
1604 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1605 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1606 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1607 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1608 return fmt::format("{}.x", tmp1);
1609}
1610
1611std::string ARBDecompiler::HMul2(Operation operation) {
1612 const std::string tmp1 = AllocVectorTemporary();
1613 const std::string tmp2 = AllocVectorTemporary();
1614 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1615 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1616 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1617 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1618 return fmt::format("{}.x", tmp1);
1619}
1620
1621std::string ARBDecompiler::HFma2(Operation operation) {
1622 const std::string tmp1 = AllocVectorTemporary();
1623 const std::string tmp2 = AllocVectorTemporary();
1624 const std::string tmp3 = AllocVectorTemporary();
1625 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1626 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1627 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1628 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1629 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1630 return fmt::format("{}.x", tmp1);
1631}
1632
1633std::string ARBDecompiler::HAbsolute(Operation operation) {
1634 const std::string temporary = AllocVectorTemporary();
1635 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1636 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1637 return fmt::format("{}.x", temporary);
1638}
1639
1640std::string ARBDecompiler::HNegate(Operation operation) {
1641 const std::string temporary = AllocVectorTemporary();
1642 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1643 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1644 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1645 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1646 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1647 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1648 return fmt::format("{}.x", temporary);
1649}
1650
1651std::string ARBDecompiler::HClamp(Operation operation) {
1652 const std::string tmp1 = AllocVectorTemporary();
1653 const std::string tmp2 = AllocVectorTemporary();
1654 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1655 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1656 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1657 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1658 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1659 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1660 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1661 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1662 return fmt::format("{}.x", tmp1);
1663}
1664
1665std::string ARBDecompiler::HCastFloat(Operation operation) {
1666 const std::string temporary = AllocVectorTemporary();
1667 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1668 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1669 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1670 return fmt::format("{}.x", temporary);
1671}
1672
1673std::string ARBDecompiler::HUnpack(Operation operation) {
1674 const std::string operand = Visit(operation[0]);
1675 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1676 case Tegra::Shader::HalfType::H0_H1:
1677 return operand;
1678 case Tegra::Shader::HalfType::F32: {
1679 const std::string temporary = AllocVectorTemporary();
1680 AddLine("MOV.U {}.x, {};", temporary, operand);
1681 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1682 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1683 return fmt::format("{}.x", temporary);
1684 }
1685 case Tegra::Shader::HalfType::H0_H0: {
1686 const std::string temporary = AllocVectorTemporary();
1687 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1688 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1689 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1690 return fmt::format("{}.x", temporary);
1691 }
1692 case Tegra::Shader::HalfType::H1_H1: {
1693 const std::string temporary = AllocVectorTemporary();
1694 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1695 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1696 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1697 return fmt::format("{}.x", temporary);
1698 }
1699 }
1700 UNREACHABLE();
1701 return "{0, 0, 0, 0}.x";
1702}
1703
1704std::string ARBDecompiler::HMergeF32(Operation operation) {
1705 const std::string temporary = AllocVectorTemporary();
1706 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1707 return fmt::format("{}.x", temporary);
1708}
1709
1710std::string ARBDecompiler::HMergeH0(Operation operation) {
1711 const std::string temporary = AllocVectorTemporary();
1712 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1713 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1714 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1715 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1716 return fmt::format("{}.x", temporary);
1717}
1718
1719std::string ARBDecompiler::HMergeH1(Operation operation) {
1720 const std::string temporary = AllocVectorTemporary();
1721 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1722 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1723 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1724 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1725 return fmt::format("{}.x", temporary);
1726}
1727
1728std::string ARBDecompiler::HPack2(Operation operation) {
1729 const std::string temporary = AllocVectorTemporary();
1730 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1731 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1732 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1733 return fmt::format("{}.x", temporary);
1734}
1735
1736std::string ARBDecompiler::LogicalAssign(Operation operation) {
1737 const Node& dest = operation[0];
1738 const Node& src = operation[1];
1739
1740 std::string target;
1741
1742 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1743 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1744
1745 const Tegra::Shader::Pred index = pred->GetIndex();
1746 switch (index) {
1747 case Tegra::Shader::Pred::NeverExecute:
1748 case Tegra::Shader::Pred::UnusedIndex:
1749 // Writing to these predicates is a no-op
1750 return {};
1751 }
1752 target = fmt::format("P{}.x", static_cast<u64>(index));
1753 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1754 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1755 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1756 } else {
1757 UNREACHABLE();
1758 ResetTemporaries();
1759 return {};
1760 }
1761
1762 AddLine("MOV.U {}, {};", target, Visit(src));
1763 ResetTemporaries();
1764 return {};
1765}
1766
1767std::string ARBDecompiler::LogicalPick2(Operation operation) {
1768 std::string temporary = AllocTemporary();
1769 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1770 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1771 return temporary;
1772}
1773
1774std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1775 std::string temporary = AllocTemporary();
1776 const std::string op = Visit(operation[0]);
1777 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1778 return temporary;
1779}
1780
1781std::string ARBDecompiler::FloatOrdered(Operation operation) {
1782 std::string temporary = AllocTemporary();
1783 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1784 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1785 AddLine("MOV.S {}, -1;", temporary);
1786 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1787 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1788 return temporary;
1789}
1790
1791std::string ARBDecompiler::FloatUnordered(Operation operation) {
1792 std::string temporary = AllocTemporary();
1793 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1794 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1795 AddLine("MOV.S {}, 0;", temporary);
1796 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1797 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1798 return temporary;
1799}
1800
1801std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1802 std::string temporary = AllocTemporary();
1803 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1804 AddLine("MOV.S {}, 0;", temporary);
1805 AddLine("IF CF.x;");
1806 AddLine("MOV.S {}, -1;", temporary);
1807 AddLine("ENDIF;");
1808 return temporary;
1809}
1810
1811std::string ARBDecompiler::Texture(Operation operation) {
1812 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1813 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1814 const auto [temporary, swizzle] = BuildCoords(operation);
1815
1816 std::string_view opcode = "TEX";
1817 std::string extra;
1818 if (meta.bias) {
1819 ASSERT(!meta.lod);
1820 opcode = "TXB";
1821
1822 if (swizzle < 4) {
1823 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1824 } else {
1825 const std::string bias = AllocTemporary();
1826 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1827 extra = fmt::format(" {},", bias);
1828 }
1829 }
1830 if (meta.lod) {
1831 ASSERT(!meta.bias);
1832 opcode = "TXL";
1833
1834 if (swizzle < 4) {
1835 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1836 } else {
1837 const std::string lod = AllocTemporary();
1838 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1839 extra = fmt::format(" {},", lod);
1840 }
1841 }
1842
1843 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
1844 TextureType(meta), BuildAoffi(operation));
1845 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1846 return fmt::format("{}.x", temporary);
1847}
1848
1849std::string ARBDecompiler::TextureGather(Operation operation) {
1850 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1851 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1852 const auto [temporary, swizzle] = BuildCoords(operation);
1853
1854 std::string comp;
1855 if (!meta.sampler.is_shadow) {
1856 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1857 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1858 }
1859
1860 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1861 TextureType(meta), BuildAoffi(operation));
1862 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1863 return fmt::format("{}.x", temporary);
1864}
1865
1866std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1867 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1868 const std::string temporary = AllocVectorTemporary();
1869 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1870
1871 ASSERT(!meta.sampler.is_array);
1872
1873 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1874 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1875 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1876 return fmt::format("{}.x", temporary);
1877}
1878
1879std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1880 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1881 const std::string temporary = AllocVectorTemporary();
1882 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1883
1884 ASSERT(!meta.sampler.is_array);
1885
1886 const std::size_t count = operation.GetOperandsCount();
1887 for (std::size_t i = 0; i < count; ++i) {
1888 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1889 }
1890 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1891 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1892 AddLine("TRUNC.S {}, {};", temporary, temporary);
1893 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1894 return fmt::format("{}.x", temporary);
1895}
1896
1897std::string ARBDecompiler::TexelFetch(Operation operation) {
1898 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1899 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1900 const auto [temporary, swizzle] = BuildCoords(operation);
1901
1902 if (!meta.sampler.is_buffer) {
1903 ASSERT(swizzle < 4);
1904 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1905 }
1906 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
1907 BuildAoffi(operation));
1908 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1909 return fmt::format("{}.x", temporary);
1910}
1911
1912std::string ARBDecompiler::TextureGradient(Operation operation) {
1913 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1914 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1915 const std::string ddx = AllocVectorTemporary();
1916 const std::string ddy = AllocVectorTemporary();
1917 const std::string coord = BuildCoords(operation).first;
1918
1919 const std::size_t num_components = meta.derivates.size() / 2;
1920 for (std::size_t index = 0; index < num_components; ++index) {
1921 const char swizzle = Swizzle(index);
1922 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1923 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1924 }
1925
1926 const std::string_view result = coord;
1927 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1928 TextureType(meta), BuildAoffi(operation));
1929 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1930 return fmt::format("{}.x", result);
1931}
1932
1933std::string ARBDecompiler::ImageLoad(Operation operation) {
1934 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1935 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1936 const std::size_t count = operation.GetOperandsCount();
1937 const std::string_view type = ImageType(meta.image.type);
1938
1939 const std::string temporary = AllocVectorTemporary();
1940 for (std::size_t i = 0; i < count; ++i) {
1941 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1942 }
1943 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1944 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1945 return fmt::format("{}.x", temporary);
1946}
1947
1948std::string ARBDecompiler::ImageStore(Operation operation) {
1949 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1950 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1951 const std::size_t num_coords = operation.GetOperandsCount();
1952 const std::size_t num_values = meta.values.size();
1953 const std::string_view type = ImageType(meta.image.type);
1954
1955 const std::string coord = AllocVectorTemporary();
1956 const std::string value = AllocVectorTemporary();
1957 for (std::size_t i = 0; i < num_coords; ++i) {
1958 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
1959 }
1960 for (std::size_t i = 0; i < num_values; ++i) {
1961 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
1962 }
1963 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
1964 return {};
1965}
1966
1967std::string ARBDecompiler::Branch(Operation operation) {
1968 const auto target = std::get<ImmediateNode>(*operation[0]);
1969 AddLine("MOV.U PC.x, {};", target.GetValue());
1970 AddLine("CONT;");
1971 return {};
1972}
1973
1974std::string ARBDecompiler::BranchIndirect(Operation operation) {
1975 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
1976 AddLine("CONT;");
1977 return {};
1978}
1979
1980std::string ARBDecompiler::PushFlowStack(Operation operation) {
1981 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1982 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
1983 const std::string_view stack_name = StackName(stack);
1984 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
1985 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1986 return {};
1987}
1988
1989std::string ARBDecompiler::PopFlowStack(Operation operation) {
1990 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1991 const std::string_view stack_name = StackName(stack);
1992 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1993 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
1994 AddLine("CONT;");
1995 return {};
1996}
1997
1998std::string ARBDecompiler::Exit(Operation) {
1999 Exit();
2000 return {};
2001}
2002
2003std::string ARBDecompiler::Discard(Operation) {
2004 AddLine("KIL TR;");
2005 return {};
2006}
2007
2008std::string ARBDecompiler::EmitVertex(Operation) {
2009 AddLine("EMIT;");
2010 return {};
2011}
2012
2013std::string ARBDecompiler::EndPrimitive(Operation) {
2014 AddLine("ENDPRIM;");
2015 return {};
2016}
2017
2018std::string ARBDecompiler::InvocationId(Operation) {
2019 return "primitive.invocation";
2020}
2021
2022std::string ARBDecompiler::YNegate(Operation) {
2023 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2024 const std::string temporary = AllocTemporary();
2025 AddLine("MOV.F {}, 1;", temporary);
2026 return temporary;
2027}
2028
2029std::string ARBDecompiler::ThreadId(Operation) {
2030 return fmt::format("{}.threadid", StageInputName(stage));
2031}
2032
2033std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2034 if (!device.HasWarpIntrinsics()) {
2035 LOG_ERROR(Render_OpenGL,
2036 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2037 return Visit(operation[0]);
2038 }
2039 const std::string temporary = AllocVectorTemporary();
2040 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2041 Visit(operation[1]));
2042 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2043 return fmt::format("{}.x", temporary);
2044}
2045
2046std::string ARBDecompiler::Barrier(Operation) {
2047 if (!ir.IsDecompiled()) {
2048 LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
2049 return {};
2050 }
2051 AddLine("BAR;");
2052 return {};
2053}
2054
2055std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2056 AddLine("MEMBAR.CTA;");
2057 return {};
2058}
2059
2060std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2061 AddLine("MEMBAR;");
2062 return {};
2063}
2064
2065} // Anonymous namespace
2066
2067std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2068 const VideoCommon::Shader::Registry& registry,
2069 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2070 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2071}
2072
2073} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 9964ea894..d9f7b4cc6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,22 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22 22
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
24 24
25CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) 25Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
26 : VideoCommon::BufferBlock{cpu_addr, size} { 26 : VideoCommon::BufferBlock{cpu_addr, size} {
27 gl_buffer.Create(); 27 gl_buffer.Create();
28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
29 if (device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 }
29} 33}
30 34
31CachedBufferBlock::~CachedBufferBlock() = default; 35Buffer::~Buffer() = default;
36
37void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
39 data);
40}
41
42void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
44 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
45 glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
46 data);
47}
48
49void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
50 std::size_t size) const {
51 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
52 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
53}
32 54
33OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 55OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
34 const Device& device, std::size_t stream_size) 56 const Device& device_, std::size_t stream_size)
35 : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { 57 : GenericBufferCache{rasterizer, system,
58 std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
59 device{device_} {
36 if (!device.HasFastBufferSubData()) { 60 if (!device.HasFastBufferSubData()) {
37 return; 61 return;
38 } 62 }
39 63
40 static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); 64 static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
41 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 65 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
42 for (const GLuint cbuf : cbufs) { 66 for (const GLuint cbuf : cbufs) {
43 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); 67 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -48,44 +72,21 @@ OGLBufferCache::~OGLBufferCache() {
48 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 72 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
49} 73}
50 74
51Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 75std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
52 return std::make_shared<CachedBufferBlock>(cpu_addr, size); 76 return std::make_shared<Buffer>(device, cpu_addr, size);
53}
54
55GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
56 return buffer->GetHandle();
57}
58
59GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
60 return 0;
61} 77}
62 78
63void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 79OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
64 const u8* data) { 80 return {0, 0, 0};
65 glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
66 static_cast<GLsizeiptr>(size), data);
67}
68
69void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
70 u8* data) {
71 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
72 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
73 glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
74 static_cast<GLsizeiptr>(size), data);
75}
76
77void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
78 std::size_t dst_offset, std::size_t size) {
79 glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
80 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
81} 81}
82 82
83OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, 83OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
84 std::size_t size) { 84 std::size_t size) {
85 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); 85 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
86 const GLuint& cbuf = cbufs[cbuf_cursor++]; 86 const GLuint cbuf = cbufs[cbuf_cursor++];
87
87 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); 88 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
88 return {cbuf, 0}; 89 return {cbuf, 0, 0};
89} 90}
90 91
91} // namespace OpenGL 92} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..59d95adbc 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/rasterizer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_stream_buffer.h" 14#include "video_core/renderer_opengl/gl_stream_buffer.h"
16 15
@@ -24,57 +23,57 @@ class Device;
24class OGLStreamBuffer; 23class OGLStreamBuffer;
25class RasterizerOpenGL; 24class RasterizerOpenGL;
26 25
27class CachedBufferBlock; 26class Buffer : public VideoCommon::BufferBlock {
27public:
28 explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
29 ~Buffer();
28 30
29using Buffer = std::shared_ptr<CachedBufferBlock>; 31 void Upload(std::size_t offset, std::size_t size, const u8* data) const;
30using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
31 32
32class CachedBufferBlock : public VideoCommon::BufferBlock { 33 void Download(std::size_t offset, std::size_t size, u8* data) const;
33public: 34
34 explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); 35 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
35 ~CachedBufferBlock(); 36 std::size_t size) const;
36 37
37 GLuint GetHandle() const { 38 GLuint Handle() const noexcept {
38 return gl_buffer.handle; 39 return gl_buffer.handle;
39 } 40 }
40 41
42 u64 Address() const noexcept {
43 return gpu_address;
44 }
45
41private: 46private:
42 OGLBuffer gl_buffer; 47 OGLBuffer gl_buffer;
48 u64 gpu_address = 0;
43}; 49};
44 50
51using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
45class OGLBufferCache final : public GenericBufferCache { 52class OGLBufferCache final : public GenericBufferCache {
46public: 53public:
47 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
48 const Device& device, std::size_t stream_size); 55 const Device& device, std::size_t stream_size);
49 ~OGLBufferCache(); 56 ~OGLBufferCache();
50 57
51 GLuint GetEmptyBuffer(std::size_t) override; 58 BufferInfo GetEmptyBuffer(std::size_t) override;
52 59
53 void Acquire() noexcept { 60 void Acquire() noexcept {
54 cbuf_cursor = 0; 61 cbuf_cursor = 0;
55 } 62 }
56 63
57protected: 64protected:
58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; 65 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
59
60 GLuint ToHandle(const Buffer& buffer) override;
61
62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
63 const u8* data) override;
64
65 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
66 u8* data) override;
67
68 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
69 std::size_t dst_offset, std::size_t size) override;
70 66
71 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; 67 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
72 68
73private: 69private:
70 static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
71 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
72
73 const Device& device;
74
74 std::size_t cbuf_cursor = 0; 75 std::size_t cbuf_cursor = 0;
75 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * 76 std::array<GLuint, NUM_CBUFS> cbufs{};
76 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
77 cbufs;
78}; 77};
79 78
80} // namespace OpenGL 79} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a14641b97..208fc6167 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -123,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
123 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); 123 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
124 u32 base_images = 0; 124 u32 base_images = 0;
125 125
126 // Reserve more image bindings on fragment and vertex stages. 126 // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
127 // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
128 // fragment stage, and at least 1 for the rest of the stages.
129 // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
130
131 // Reserve at least 4 image bindings on the fragment stage.
127 bindings[4].image = 132 bindings[4].image =
128 Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); 133 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
129 bindings[0].image = 134
130 Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); 135 // This is guaranteed to be at least 1.
136 const u32 total_extracted_images = num_images / (NumStages - 1);
131 137
132 // Reserve the other image bindings. 138 // Reserve the other image bindings.
133 const u32 total_extracted_images = num_images / (NumStages - 2); 139 for (std::size_t i = 0; i < NumStages; ++i) {
134 for (std::size_t i = 2; i < NumStages; ++i) {
135 const std::size_t stage = stage_swizzle[i]; 140 const std::size_t stage = stage_swizzle[i];
141 if (stage == 4) {
142 continue;
143 }
136 bindings[stage].image = 144 bindings[stage].image =
137 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); 145 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
138 } 146 }
@@ -170,7 +178,7 @@ bool IsASTCSupported() {
170 for (const GLenum format : formats) { 178 for (const GLenum format : formats) {
171 for (const GLenum support : required_support) { 179 for (const GLenum support : required_support) {
172 GLint value; 180 GLint value;
173 glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); 181 glGetInternalformativ(target, format, support, 1, &value);
174 if (value != GL_FULL_SUPPORT) { 182 if (value != GL_FULL_SUPPORT) {
175 return false; 183 return false;
176 } 184 }
@@ -185,6 +193,7 @@ bool IsASTCSupported() {
185Device::Device() 193Device::Device()
186 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { 194 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
187 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 195 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
196 const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
188 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); 197 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
189 const std::vector extensions = GetExtensions(); 198 const std::vector extensions = GetExtensions();
190 199
@@ -208,13 +217,21 @@ Device::Device()
208 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 217 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
209 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 218 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
210 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 219 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
220 has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
211 has_astc = IsASTCSupported(); 221 has_astc = IsASTCSupported();
212 has_variable_aoffi = TestVariableAoffi(); 222 has_variable_aoffi = TestVariableAoffi();
213 has_component_indexing_bug = is_amd; 223 has_component_indexing_bug = is_amd;
214 has_precise_bug = TestPreciseBug(); 224 has_precise_bug = TestPreciseBug();
225 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
226 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
227
228 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
229 // uniform buffers as "push constants"
215 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; 230 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
231
216 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 232 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
217 GLAD_GL_NV_compute_program5; 233 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
234 GLAD_GL_NV_transform_feedback2;
218 235
219 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 236 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
220 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); 237 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
@@ -235,6 +252,7 @@ Device::Device(std::nullptr_t) {
235 has_shader_ballot = true; 252 has_shader_ballot = true;
236 has_vertex_viewport_layer = true; 253 has_vertex_viewport_layer = true;
237 has_image_load_formatted = true; 254 has_image_load_formatted = true;
255 has_texture_shadow_lod = true;
238 has_variable_aoffi = true; 256 has_variable_aoffi = true;
239} 257}
240 258
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 98cca0254..e1d811966 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -68,6 +68,14 @@ public:
68 return has_image_load_formatted; 68 return has_image_load_formatted;
69 } 69 }
70 70
71 bool HasTextureShadowLod() const {
72 return has_texture_shadow_lod;
73 }
74
75 bool HasVertexBufferUnifiedMemory() const {
76 return has_vertex_buffer_unified_memory;
77 }
78
71 bool HasASTC() const { 79 bool HasASTC() const {
72 return has_astc; 80 return has_astc;
73 } 81 }
@@ -88,6 +96,10 @@ public:
88 return has_fast_buffer_sub_data; 96 return has_fast_buffer_sub_data;
89 } 97 }
90 98
99 bool HasNvViewportArray2() const {
100 return has_nv_viewport_array2;
101 }
102
91 bool UseAssemblyShaders() const { 103 bool UseAssemblyShaders() const {
92 return use_assembly_shaders; 104 return use_assembly_shaders;
93 } 105 }
@@ -106,11 +118,14 @@ private:
106 bool has_shader_ballot{}; 118 bool has_shader_ballot{};
107 bool has_vertex_viewport_layer{}; 119 bool has_vertex_viewport_layer{};
108 bool has_image_load_formatted{}; 120 bool has_image_load_formatted{};
121 bool has_texture_shadow_lod{};
122 bool has_vertex_buffer_unified_memory{};
109 bool has_astc{}; 123 bool has_astc{};
110 bool has_variable_aoffi{}; 124 bool has_variable_aoffi{};
111 bool has_component_indexing_bug{}; 125 bool has_component_indexing_bug{};
112 bool has_precise_bug{}; 126 bool has_precise_bug{};
113 bool has_fast_buffer_sub_data{}; 127 bool has_fast_buffer_sub_data{};
128 bool has_nv_viewport_array2{};
114 bool use_assembly_shaders{}; 129 bool use_assembly_shaders{};
115}; 130};
116 131
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 55e79aaf6..e960a0ef1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/maxwell_to_gl.h" 31#include "video_core/renderer_opengl/maxwell_to_gl.h"
32#include "video_core/renderer_opengl/renderer_opengl.h" 32#include "video_core/renderer_opengl/renderer_opengl.h"
33#include "video_core/shader_cache.h"
33 34
34namespace OpenGL { 35namespace OpenGL {
35 36
@@ -60,15 +61,28 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = 61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
61 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; 62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
62 63
63constexpr std::size_t NumSupportedVertexAttributes = 16; 64constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
64 66
65template <typename Engine, typename Entry> 67template <typename Engine, typename Entry>
66Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 68Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
67 ShaderType shader_type, std::size_t index = 0) { 69 ShaderType shader_type, std::size_t index = 0) {
70 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71 if (entry.is_separated) {
72 const u32 buffer_1 = entry.buffer;
73 const u32 buffer_2 = entry.secondary_buffer;
74 const u32 offset_1 = entry.offset;
75 const u32 offset_2 = entry.secondary_offset;
76 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78 return engine.GetTextureInfo(handle_1 | handle_2);
79 }
80 }
68 if (entry.is_bindless) { 81 if (entry.is_bindless) {
69 const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 82 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
70 return engine.GetTextureInfo(tex_handle); 83 return engine.GetTextureInfo(handle);
71 } 84 }
85
72 const auto& gpu_profile = engine.AccessGuestDriverProfile(); 86 const auto& gpu_profile = engine.AccessGuestDriverProfile();
73 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); 87 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
74 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 88 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -93,6 +107,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
93 return buffer.size; 107 return buffer.size;
94} 108}
95 109
110/// Translates hardware transform feedback indices
111/// @param location Hardware location
112/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
113/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
114std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
115 const u8 index = location / 4;
116 if (index >= 8 && index <= 39) {
117 return {GL_GENERIC_ATTRIB_NV, index - 8};
118 }
119 if (index >= 48 && index <= 55) {
120 return {GL_TEXTURE_COORD_NV, index - 48};
121 }
122 switch (index) {
123 case 7:
124 return {GL_POSITION, 0};
125 case 40:
126 return {GL_PRIMARY_COLOR_NV, 0};
127 case 41:
128 return {GL_SECONDARY_COLOR_NV, 0};
129 case 42:
130 return {GL_BACK_PRIMARY_COLOR_NV, 0};
131 case 43:
132 return {GL_BACK_SECONDARY_COLOR_NV, 0};
133 }
134 UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
135 return {GL_POSITION, 0};
136}
137
96void oglEnable(GLenum cap, bool state) { 138void oglEnable(GLenum cap, bool state) {
97 (state ? glEnable : glDisable)(cap); 139 (state ? glEnable : glDisable)(cap);
98} 140}
@@ -152,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
152 // avoid OpenGL errors. 194 // avoid OpenGL errors.
153 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't 195 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
154 // assume every shader uses them all. 196 // assume every shader uses them all.
155 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { 197 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
156 if (!flags[Dirty::VertexFormat0 + index]) { 198 if (!flags[Dirty::VertexFormat0 + index]) {
157 continue; 199 continue;
158 } 200 }
@@ -171,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() {
171 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || 213 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
172 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { 214 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
173 glVertexAttribIFormat(gl_index, attrib.ComponentCount(), 215 glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
174 MaxwellToGL::VertexType(attrib), attrib.offset); 216 MaxwellToGL::VertexFormat(attrib), attrib.offset);
175 } else { 217 } else {
176 glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), 218 glVertexAttribFormat(gl_index, attrib.ComponentCount(),
219 MaxwellToGL::VertexFormat(attrib),
177 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); 220 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
178 } 221 }
179 glVertexAttribBinding(gl_index, attrib.buffer); 222 glVertexAttribBinding(gl_index, attrib.buffer);
@@ -190,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
190 233
191 MICROPROFILE_SCOPE(OpenGL_VB); 234 MICROPROFILE_SCOPE(OpenGL_VB);
192 235
236 const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
237
193 // Upload all guest vertex arrays sequentially to our buffer 238 // Upload all guest vertex arrays sequentially to our buffer
194 const auto& regs = gpu.regs; 239 const auto& regs = gpu.regs;
195 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 240 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
196 if (!flags[Dirty::VertexBuffer0 + index]) { 241 if (!flags[Dirty::VertexBuffer0 + index]) {
197 continue; 242 continue;
198 } 243 }
@@ -205,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
205 250
206 const GPUVAddr start = vertex_array.StartAddress(); 251 const GPUVAddr start = vertex_array.StartAddress();
207 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 252 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
208
209 ASSERT(end >= start); 253 ASSERT(end >= start);
254
255 const GLuint gl_index = static_cast<GLuint>(index);
210 const u64 size = end - start; 256 const u64 size = end - start;
211 if (size == 0) { 257 if (size == 0) {
212 glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); 258 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
259 if (use_unified_memory) {
260 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
261 }
213 continue; 262 continue;
214 } 263 }
215 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); 264 const auto info = buffer_cache.UploadMemory(start, size);
216 glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, 265 if (use_unified_memory) {
217 vertex_array.stride); 266 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
267 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
268 info.address + info.offset, size);
269 } else {
270 glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
271 }
218 } 272 }
219} 273}
220 274
@@ -227,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
227 flags[Dirty::VertexInstances] = false; 281 flags[Dirty::VertexInstances] = false;
228 282
229 const auto& regs = gpu.regs; 283 const auto& regs = gpu.regs;
230 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { 284 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
231 if (!flags[Dirty::VertexInstance0 + index]) { 285 if (!flags[Dirty::VertexInstance0 + index]) {
232 continue; 286 continue;
233 } 287 }
@@ -244,9 +298,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
244 MICROPROFILE_SCOPE(OpenGL_Index); 298 MICROPROFILE_SCOPE(OpenGL_Index);
245 const auto& regs = system.GPU().Maxwell3D().regs; 299 const auto& regs = system.GPU().Maxwell3D().regs;
246 const std::size_t size = CalculateIndexBufferSize(); 300 const std::size_t size = CalculateIndexBufferSize();
247 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); 301 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
248 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); 302 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
249 return offset; 303 return info.offset;
250} 304}
251 305
252void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 306void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@@ -282,7 +336,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
282 continue; 336 continue;
283 } 337 }
284 338
285 Shader shader{shader_cache.GetStageProgram(program)}; 339 Shader* const shader = shader_cache.GetStageProgram(program);
286 340
287 if (device.UseAssemblyShaders()) { 341 if (device.UseAssemblyShaders()) {
288 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this 342 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -576,7 +630,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
576 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 630 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
577 631
578 // Prepare the vertex array. 632 // Prepare the vertex array.
579 buffer_cache.Map(buffer_size); 633 const bool invalidated = buffer_cache.Map(buffer_size);
634
635 if (invalidated) {
636 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
637 auto& dirty = gpu.dirty.flags;
638 dirty[Dirty::VertexBuffers] = true;
639 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
640 dirty[index] = true;
641 }
642 }
580 643
581 // Prepare vertex array format. 644 // Prepare vertex array format.
582 SetupVertexFormat(); 645 SetupVertexFormat();
@@ -593,9 +656,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
593 if (!device.UseAssemblyShaders()) { 656 if (!device.UseAssemblyShaders()) {
594 MaxwellUniformData ubo; 657 MaxwellUniformData ubo;
595 ubo.SetFromRegs(gpu); 658 ubo.SetFromRegs(gpu);
596 const auto [buffer, offset] = 659 const auto info =
597 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 660 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
598 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, 661 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
599 static_cast<GLsizeiptr>(sizeof(ubo))); 662 static_cast<GLsizeiptr>(sizeof(ubo)));
600 } 663 }
601 664
@@ -842,7 +905,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
842 return true; 905 return true;
843} 906}
844 907
845void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { 908void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
846 static constexpr std::array PARAMETER_LUT = { 909 static constexpr std::array PARAMETER_LUT = {
847 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 910 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
848 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, 911 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -872,7 +935,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
872 } 935 }
873} 936}
874 937
875void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { 938void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
876 MICROPROFILE_SCOPE(OpenGL_UBO); 939 MICROPROFILE_SCOPE(OpenGL_UBO);
877 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 940 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
878 const auto& entries = kernel->GetEntries(); 941 const auto& entries = kernel->GetEntries();
@@ -906,8 +969,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
906 if (device.UseAssemblyShaders()) { 969 if (device.UseAssemblyShaders()) {
907 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); 970 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
908 } else { 971 } else {
909 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 972 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
910 buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
911 } 973 }
912 return; 974 return;
913 } 975 }
@@ -920,28 +982,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
920 982
921 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); 983 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
922 const GPUVAddr gpu_addr = buffer.address; 984 const GPUVAddr gpu_addr = buffer.address;
923 auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); 985 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
924 986
925 if (device.UseAssemblyShaders()) { 987 if (device.UseAssemblyShaders()) {
926 UNIMPLEMENTED_IF(use_unified); 988 UNIMPLEMENTED_IF(use_unified);
927 if (offset != 0) { 989 if (info.offset != 0) {
928 const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; 990 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
929 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); 991 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
930 cbuf = staging_cbuf; 992 info.handle = staging_cbuf;
931 offset = 0; 993 info.offset = 0;
932 } 994 }
933 glBindBufferRangeNV(stage, binding, cbuf, offset, size); 995 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
934 return; 996 return;
935 } 997 }
936 998
937 if (use_unified) { 999 if (use_unified) {
938 glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); 1000 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
1001 unified_offset, size);
939 } else { 1002 } else {
940 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 1003 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
941 } 1004 }
942} 1005}
943 1006
944void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 1007void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
945 auto& gpu{system.GPU()}; 1008 auto& gpu{system.GPU()};
946 auto& memory_manager{gpu.MemoryManager()}; 1009 auto& memory_manager{gpu.MemoryManager()};
947 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 1010 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -956,7 +1019,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
956 } 1019 }
957} 1020}
958 1021
959void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { 1022void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
960 auto& gpu{system.GPU()}; 1023 auto& gpu{system.GPU()};
961 auto& memory_manager{gpu.MemoryManager()}; 1024 auto& memory_manager{gpu.MemoryManager()};
962 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 1025 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -973,13 +1036,12 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
973void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, 1036void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
974 GPUVAddr gpu_addr, std::size_t size) { 1037 GPUVAddr gpu_addr, std::size_t size) {
975 const auto alignment{device.GetShaderStorageBufferAlignment()}; 1038 const auto alignment{device.GetShaderStorageBufferAlignment()};
976 const auto [ssbo, buffer_offset] = 1039 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
977 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); 1040 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
978 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
979 static_cast<GLsizeiptr>(size)); 1041 static_cast<GLsizeiptr>(size));
980} 1042}
981 1043
982void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { 1044void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
983 MICROPROFILE_SCOPE(OpenGL_Texture); 1045 MICROPROFILE_SCOPE(OpenGL_Texture);
984 const auto& maxwell3d = system.GPU().Maxwell3D(); 1046 const auto& maxwell3d = system.GPU().Maxwell3D();
985 u32 binding = device.GetBaseBindings(stage_index).sampler; 1047 u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -992,7 +1054,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
992 } 1054 }
993} 1055}
994 1056
995void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { 1057void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
996 MICROPROFILE_SCOPE(OpenGL_Texture); 1058 MICROPROFILE_SCOPE(OpenGL_Texture);
997 const auto& compute = system.GPU().KeplerCompute(); 1059 const auto& compute = system.GPU().KeplerCompute();
998 u32 binding = 0; 1060 u32 binding = 0;
@@ -1021,7 +1083,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
1021 } 1083 }
1022} 1084}
1023 1085
1024void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 1086void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1025 const auto& maxwell3d = system.GPU().Maxwell3D(); 1087 const auto& maxwell3d = system.GPU().Maxwell3D();
1026 u32 binding = device.GetBaseBindings(stage_index).image; 1088 u32 binding = device.GetBaseBindings(stage_index).image;
1027 for (const auto& entry : shader->GetEntries().images) { 1089 for (const auto& entry : shader->GetEntries().images) {
@@ -1031,7 +1093,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
1031 } 1093 }
1032} 1094}
1033 1095
1034void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 1096void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
1035 const auto& compute = system.GPU().KeplerCompute(); 1097 const auto& compute = system.GPU().KeplerCompute();
1036 u32 binding = 0; 1098 u32 binding = 0;
1037 for (const auto& entry : shader->GetEntries().images) { 1099 for (const auto& entry : shader->GetEntries().images) {
@@ -1547,12 +1609,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
1547 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); 1609 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
1548} 1610}
1549 1611
1612void RasterizerOpenGL::SyncTransformFeedback() {
1613 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
1614 // when this is required.
1615 const auto& regs = system.GPU().Maxwell3D().regs;
1616
1617 static constexpr std::size_t STRIDE = 3;
1618 std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
1619 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
1620
1621 GLint* cursor = attribs.data();
1622 GLint* current_stream = streams.data();
1623
1624 for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
1625 const auto& layout = regs.tfb_layouts[feedback];
1626 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
1627 if (layout.varying_count == 0) {
1628 continue;
1629 }
1630
1631 *current_stream = static_cast<GLint>(feedback);
1632 if (current_stream != streams.data()) {
1633 // When stepping one stream, push the expected token
1634 cursor[0] = GL_NEXT_BUFFER_NV;
1635 cursor[1] = 0;
1636 cursor[2] = 0;
1637 cursor += STRIDE;
1638 }
1639 ++current_stream;
1640
1641 const auto& locations = regs.tfb_varying_locs[feedback];
1642 std::optional<u8> current_index;
1643 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
1644 const u8 location = locations[offset];
1645 const u8 index = location / 4;
1646
1647 if (current_index == index) {
1648 // Increase number of components of the previous attachment
1649 ++cursor[-2];
1650 continue;
1651 }
1652 current_index = index;
1653
1654 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
1655 cursor[1] = 1;
1656 cursor += STRIDE;
1657 }
1658 }
1659
1660 const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
1661 const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
1662 glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
1663 GL_INTERLEAVED_ATTRIBS);
1664}
1665
1550void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { 1666void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1551 const auto& regs = system.GPU().Maxwell3D().regs; 1667 const auto& regs = system.GPU().Maxwell3D().regs;
1552 if (regs.tfb_enabled == 0) { 1668 if (regs.tfb_enabled == 0) {
1553 return; 1669 return;
1554 } 1670 }
1555 1671
1672 if (device.UseAssemblyShaders()) {
1673 SyncTransformFeedback();
1674 }
1675
1556 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 1676 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1557 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 1677 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1558 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 1678 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1579,6 +1699,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1579 static_cast<GLsizeiptr>(size)); 1699 static_cast<GLsizeiptr>(size));
1580 } 1700 }
1581 1701
1702 // We may have to call BeginTransformFeedbackNV here since they seem to call different
1703 // implementations on Nvidia's driver (the pointer is different) but we are using
1704 // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
1705 // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
1582 glBeginTransformFeedback(GL_POINTS); 1706 glBeginTransformFeedback(GL_POINTS);
1583} 1707}
1584 1708
@@ -1600,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
1600 const GLuint handle = transform_feedback_buffers[index].handle; 1724 const GLuint handle = transform_feedback_buffers[index].handle;
1601 const GPUVAddr gpu_addr = binding.Address(); 1725 const GPUVAddr gpu_addr = binding.Address();
1602 const std::size_t size = binding.buffer_size; 1726 const std::size_t size = binding.buffer_size;
1603 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 1727 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1604 glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); 1728 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1729 static_cast<GLsizeiptr>(size));
1605 } 1730 }
1606} 1731}
1607 1732
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f5dc56a0e..4f082592f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
19#include "video_core/engines/const_buffer_info.h" 19#include "video_core/engines/const_buffer_info.h"
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/rasterizer_accelerated.h" 21#include "video_core/rasterizer_accelerated.h"
22#include "video_core/rasterizer_cache.h"
23#include "video_core/rasterizer_interface.h" 22#include "video_core/rasterizer_interface.h"
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 23#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 24#include "video_core/renderer_opengl/gl_device.h"
@@ -100,10 +99,10 @@ private:
100 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); 99 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
101 100
102 /// Configures the current constbuffers to use for the draw command. 101 /// Configures the current constbuffers to use for the draw command.
103 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); 102 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
104 103
105 /// Configures the current constbuffers to use for the kernel invocation. 104 /// Configures the current constbuffers to use for the kernel invocation.
106 void SetupComputeConstBuffers(const Shader& kernel); 105 void SetupComputeConstBuffers(Shader* kernel);
107 106
108 /// Configures a constant buffer. 107 /// Configures a constant buffer.
109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 108 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -111,30 +110,30 @@ private:
111 std::size_t unified_offset); 110 std::size_t unified_offset);
112 111
113 /// Configures the current global memory entries to use for the draw command. 112 /// Configures the current global memory entries to use for the draw command.
114 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 113 void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
115 114
116 /// Configures the current global memory entries to use for the kernel invocation. 115 /// Configures the current global memory entries to use for the kernel invocation.
117 void SetupComputeGlobalMemory(const Shader& kernel); 116 void SetupComputeGlobalMemory(Shader* kernel);
118 117
119 /// Configures a constant buffer. 118 /// Configures a constant buffer.
120 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 119 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
121 std::size_t size); 120 std::size_t size);
122 121
123 /// Configures the current textures to use for the draw command. 122 /// Configures the current textures to use for the draw command.
124 void SetupDrawTextures(std::size_t stage_index, const Shader& shader); 123 void SetupDrawTextures(std::size_t stage_index, Shader* shader);
125 124
126 /// Configures the textures used in a compute shader. 125 /// Configures the textures used in a compute shader.
127 void SetupComputeTextures(const Shader& kernel); 126 void SetupComputeTextures(Shader* kernel);
128 127
129 /// Configures a texture. 128 /// Configures a texture.
130 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 129 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
131 const SamplerEntry& entry); 130 const SamplerEntry& entry);
132 131
133 /// Configures images in a graphics shader. 132 /// Configures images in a graphics shader.
134 void SetupDrawImages(std::size_t stage_index, const Shader& shader); 133 void SetupDrawImages(std::size_t stage_index, Shader* shader);
135 134
136 /// Configures images in a compute shader. 135 /// Configures images in a compute shader.
137 void SetupComputeImages(const Shader& shader); 136 void SetupComputeImages(Shader* shader);
138 137
139 /// Configures an image. 138 /// Configures an image.
140 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); 139 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
@@ -202,6 +201,10 @@ private:
202 /// Syncs the framebuffer sRGB state to match the guest state 201 /// Syncs the framebuffer sRGB state to match the guest state
203 void SyncFramebufferSRGB(); 202 void SyncFramebufferSRGB();
204 203
204 /// Syncs transform feedback state to match guest state
205 /// @note Only valid on assembly shaders
206 void SyncTransformFeedback();
207
205 /// Begin a transform feedback 208 /// Begin a transform feedback
206 void BeginTransformFeedback(GLenum primitive_mode); 209 void BeginTransformFeedback(GLenum primitive_mode);
207 210
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a991ca64a..c6a3bf3a1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,6 +20,7 @@
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_decompiler.h" 26#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -29,6 +30,7 @@
29#include "video_core/shader/memory_util.h" 30#include "video_core/shader/memory_util.h"
30#include "video_core/shader/registry.h" 31#include "video_core/shader/registry.h"
31#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
33#include "video_core/shader_cache.h"
32 34
33namespace OpenGL { 35namespace OpenGL {
34 36
@@ -147,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
147 auto program = std::make_shared<ProgramHandle>(); 149 auto program = std::make_shared<ProgramHandle>();
148 150
149 if (device.UseAssemblyShaders()) { 151 if (device.UseAssemblyShaders()) {
150 const std::string arb = "Not implemented"; 152 const std::string arb =
153 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
151 154
152 GLuint& arb_prog = program->assembly_program.handle; 155 GLuint& arb_prog = program->assembly_program.handle;
153 156
@@ -194,12 +197,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
194 197
195} // Anonymous namespace 198} // Anonymous namespace
196 199
197CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 200Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
198 std::shared_ptr<VideoCommon::Shader::Registry> registry, 201 ProgramSharedPtr program_)
199 ShaderEntries entries, ProgramSharedPtr program_) 202 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
200 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
201 size_in_bytes{size_in_bytes}, program{std::move(program_)} {
202 // Assign either the assembly program or source program. We can't have both.
203 handle = program->assembly_program.handle; 203 handle = program->assembly_program.handle;
204 if (handle == 0) { 204 if (handle == 0) {
205 handle = program->source_program.handle; 205 handle = program->source_program.handle;
@@ -207,16 +207,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
207 ASSERT(handle != 0); 207 ASSERT(handle != 0);
208} 208}
209 209
210CachedShader::~CachedShader() = default; 210Shader::~Shader() = default;
211 211
212GLuint CachedShader::GetHandle() const { 212GLuint Shader::GetHandle() const {
213 DEBUG_ASSERT(registry->IsConsistent()); 213 DEBUG_ASSERT(registry->IsConsistent());
214 return handle; 214 return handle;
215} 215}
216 216
217Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 217std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
218 Maxwell::ShaderProgram program_type, ProgramCode code, 218 Maxwell::ShaderProgram program_type,
219 ProgramCode code_b) { 219 ProgramCode code, ProgramCode code_b) {
220 const auto shader_type = GetShaderType(program_type); 220 const auto shader_type = GetShaderType(program_type);
221 const std::size_t size_in_bytes = code.size() * sizeof(u64); 221 const std::size_t size_in_bytes = code.size() * sizeof(u64);
222 222
@@ -241,12 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
241 entry.bindless_samplers = registry->GetBindlessSamplers(); 241 entry.bindless_samplers = registry->GetBindlessSamplers();
242 params.disk_cache.SaveEntry(std::move(entry)); 242 params.disk_cache.SaveEntry(std::move(entry));
243 243
244 return std::shared_ptr<CachedShader>( 244 return std::unique_ptr<Shader>(new Shader(
245 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), 245 std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
246 MakeEntries(params.device, ir, shader_type), std::move(program)));
247} 246}
248 247
249Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 248std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
249 ProgramCode code) {
250 const std::size_t size_in_bytes = code.size() * sizeof(u64); 250 const std::size_t size_in_bytes = code.size() * sizeof(u64);
251 251
252 auto& engine = params.system.GPU().KeplerCompute(); 252 auto& engine = params.system.GPU().KeplerCompute();
@@ -266,23 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
266 entry.bindless_samplers = registry->GetBindlessSamplers(); 266 entry.bindless_samplers = registry->GetBindlessSamplers();
267 params.disk_cache.SaveEntry(std::move(entry)); 267 params.disk_cache.SaveEntry(std::move(entry));
268 268
269 return std::shared_ptr<CachedShader>( 269 return std::unique_ptr<Shader>(new Shader(std::move(registry),
270 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), 270 MakeEntries(params.device, ir, ShaderType::Compute),
271 MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); 271 std::move(program)));
272} 272}
273 273
274Shader CachedShader::CreateFromCache(const ShaderParameters& params, 274std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
275 const PrecompiledShader& precompiled_shader, 275 const PrecompiledShader& precompiled_shader) {
276 std::size_t size_in_bytes) { 276 return std::unique_ptr<Shader>(new Shader(
277 return std::shared_ptr<CachedShader>( 277 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
278 new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
279 precompiled_shader.entries, precompiled_shader.program));
280} 278}
281 279
282ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 280ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
283 Core::Frontend::EmuWindow& emu_window, const Device& device) 281 Core::Frontend::EmuWindow& emu_window, const Device& device)
284 : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, 282 : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
285 disk_cache{system} {} 283 emu_window{emu_window}, device{device}, disk_cache{system} {}
284
285ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
286 286
287void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, 287void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
288 const VideoCore::DiskResourceLoadCallback& callback) { 288 const VideoCore::DiskResourceLoadCallback& callback) {
@@ -436,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
436 return program; 436 return program;
437} 437}
438 438
439Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 439Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
440 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { 440 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
441 return last_shaders[static_cast<std::size_t>(program)]; 441 return last_shaders[static_cast<std::size_t>(program)];
442 } 442 }
@@ -446,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
446 446
447 // Look up shader in the cache based on address 447 // Look up shader in the cache based on address
448 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; 448 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
449 Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; 449 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
450 if (shader) {
451 return last_shaders[static_cast<std::size_t>(program)] = shader; 450 return last_shaders[static_cast<std::size_t>(program)] = shader;
452 } 451 }
453 452
@@ -461,62 +460,64 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
461 const u8* host_ptr_b = memory_manager.GetPointer(address_b); 460 const u8* host_ptr_b = memory_manager.GetPointer(address_b);
462 code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); 461 code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
463 } 462 }
463 const std::size_t code_size = code.size() * sizeof(u64);
464 464
465 const auto unique_identifier = GetUniqueIdentifier( 465 const u64 unique_identifier = GetUniqueIdentifier(
466 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 466 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
467 467
468 const ShaderParameters params{system, disk_cache, device, 468 const ShaderParameters params{system, disk_cache, device,
469 *cpu_addr, host_ptr, unique_identifier}; 469 *cpu_addr, host_ptr, unique_identifier};
470 470
471 std::unique_ptr<Shader> shader;
471 const auto found = runtime_cache.find(unique_identifier); 472 const auto found = runtime_cache.find(unique_identifier);
472 if (found == runtime_cache.end()) { 473 if (found == runtime_cache.end()) {
473 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), 474 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
474 std::move(code_b));
475 } else { 475 } else {
476 const std::size_t size_in_bytes = code.size() * sizeof(u64); 476 shader = Shader::CreateFromCache(params, found->second);
477 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
478 } 477 }
479 478
479 Shader* const result = shader.get();
480 if (cpu_addr) { 480 if (cpu_addr) {
481 Register(shader); 481 Register(std::move(shader), *cpu_addr, code_size);
482 } else { 482 } else {
483 null_shader = shader; 483 null_shader = std::move(shader);
484 } 484 }
485 485
486 return last_shaders[static_cast<std::size_t>(program)] = shader; 486 return last_shaders[static_cast<std::size_t>(program)] = result;
487} 487}
488 488
489Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 489Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
490 auto& memory_manager{system.GPU().MemoryManager()}; 490 auto& memory_manager{system.GPU().MemoryManager()};
491 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; 491 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
492 492
493 auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; 493 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
494 if (kernel) {
495 return kernel; 494 return kernel;
496 } 495 }
497 496
498 const auto host_ptr{memory_manager.GetPointer(code_addr)}; 497 const auto host_ptr{memory_manager.GetPointer(code_addr)};
499 // No kernel found, create a new one 498 // No kernel found, create a new one
500 auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; 499 ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
501 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 500 const std::size_t code_size{code.size() * sizeof(u64)};
501 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
502 502
503 const ShaderParameters params{system, disk_cache, device, 503 const ShaderParameters params{system, disk_cache, device,
504 *cpu_addr, host_ptr, unique_identifier}; 504 *cpu_addr, host_ptr, unique_identifier};
505 505
506 std::unique_ptr<Shader> kernel;
506 const auto found = runtime_cache.find(unique_identifier); 507 const auto found = runtime_cache.find(unique_identifier);
507 if (found == runtime_cache.end()) { 508 if (found == runtime_cache.end()) {
508 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 509 kernel = Shader::CreateKernelFromMemory(params, std::move(code));
509 } else { 510 } else {
510 const std::size_t size_in_bytes = code.size() * sizeof(u64); 511 kernel = Shader::CreateFromCache(params, found->second);
511 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
512 } 512 }
513 513
514 Shader* const result = kernel.get();
514 if (cpu_addr) { 515 if (cpu_addr) {
515 Register(kernel); 516 Register(std::move(kernel), *cpu_addr, code_size);
516 } else { 517 } else {
517 null_kernel = kernel; 518 null_kernel = std::move(kernel);
518 } 519 }
519 return kernel; 520 return result;
520} 521}
521 522
522} // namespace OpenGL 523} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f9..994aaeaf2 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
18 18
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 20#include "video_core/engines/shader_type.h"
21#include "video_core/rasterizer_cache.h"
22#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/shader/registry.h" 24#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader_cache.h"
27 27
28namespace Core { 28namespace Core {
29class System; 29class System;
@@ -35,12 +35,9 @@ class EmuWindow;
35 35
36namespace OpenGL { 36namespace OpenGL {
37 37
38class CachedShader;
39class Device; 38class Device;
40class RasterizerOpenGL; 39class RasterizerOpenGL;
41struct UnspecializedShader;
42 40
43using Shader = std::shared_ptr<CachedShader>;
44using Maxwell = Tegra::Engines::Maxwell3D::Regs; 41using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45 42
46struct ProgramHandle { 43struct ProgramHandle {
@@ -64,62 +61,53 @@ struct ShaderParameters {
64 u64 unique_identifier; 61 u64 unique_identifier;
65}; 62};
66 63
67class CachedShader final : public RasterizerCacheObject { 64class Shader final {
68public: 65public:
69 ~CachedShader(); 66 ~Shader();
70 67
71 /// Gets the GL program handle for the shader 68 /// Gets the GL program handle for the shader
72 GLuint GetHandle() const; 69 GLuint GetHandle() const;
73 70
74 /// Returns the size in bytes of the shader
75 std::size_t GetSizeInBytes() const override {
76 return size_in_bytes;
77 }
78
79 /// Gets the shader entries for the shader 71 /// Gets the shader entries for the shader
80 const ShaderEntries& GetEntries() const { 72 const ShaderEntries& GetEntries() const {
81 return entries; 73 return entries;
82 } 74 }
83 75
84 static Shader CreateStageFromMemory(const ShaderParameters& params, 76 static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
85 Maxwell::ShaderProgram program_type, 77 Maxwell::ShaderProgram program_type,
86 ProgramCode program_code, ProgramCode program_code_b); 78 ProgramCode program_code,
87 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); 79 ProgramCode program_code_b);
80 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
81 ProgramCode code);
88 82
89 static Shader CreateFromCache(const ShaderParameters& params, 83 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
90 const PrecompiledShader& precompiled_shader, 84 const PrecompiledShader& precompiled_shader);
91 std::size_t size_in_bytes);
92 85
93private: 86private:
94 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 87 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
95 std::shared_ptr<VideoCommon::Shader::Registry> registry, 88 ProgramSharedPtr program);
96 ShaderEntries entries, ProgramSharedPtr program);
97 89
98 std::shared_ptr<VideoCommon::Shader::Registry> registry; 90 std::shared_ptr<VideoCommon::Shader::Registry> registry;
99 ShaderEntries entries; 91 ShaderEntries entries;
100 std::size_t size_in_bytes = 0;
101 ProgramSharedPtr program; 92 ProgramSharedPtr program;
102 GLuint handle = 0; 93 GLuint handle = 0;
103}; 94};
104 95
105class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 96class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
106public: 97public:
107 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 98 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
108 Core::Frontend::EmuWindow& emu_window, const Device& device); 99 Core::Frontend::EmuWindow& emu_window, const Device& device);
100 ~ShaderCacheOpenGL() override;
109 101
110 /// Loads disk cache for the current game 102 /// Loads disk cache for the current game
111 void LoadDiskCache(const std::atomic_bool& stop_loading, 103 void LoadDiskCache(const std::atomic_bool& stop_loading,
112 const VideoCore::DiskResourceLoadCallback& callback); 104 const VideoCore::DiskResourceLoadCallback& callback);
113 105
114 /// Gets the current specified shader stage program 106 /// Gets the current specified shader stage program
115 Shader GetStageProgram(Maxwell::ShaderProgram program); 107 Shader* GetStageProgram(Maxwell::ShaderProgram program);
116 108
117 /// Gets a compute kernel in the passed address 109 /// Gets a compute kernel in the passed address
118 Shader GetComputeKernel(GPUVAddr code_addr); 110 Shader* GetComputeKernel(GPUVAddr code_addr);
119
120protected:
121 // We do not have to flush this cache as things in it are never modified by us.
122 void FlushObjectInner(const Shader& object) override {}
123 111
124private: 112private:
125 ProgramSharedPtr GeneratePrecompiledProgram( 113 ProgramSharedPtr GeneratePrecompiledProgram(
@@ -132,10 +120,10 @@ private:
132 ShaderDiskCacheOpenGL disk_cache; 120 ShaderDiskCacheOpenGL disk_cache;
133 std::unordered_map<u64, PrecompiledShader> runtime_cache; 121 std::unordered_map<u64, PrecompiledShader> runtime_cache;
134 122
135 Shader null_shader{}; 123 std::unique_ptr<Shader> null_shader;
136 Shader null_kernel{}; 124 std::unique_ptr<Shader> null_kernel;
137 125
138 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 126 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
139}; 127};
140 128
141} // namespace OpenGL 129} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d6e30b321..2c49aeaac 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 37using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using Tegra::Shader::TextureType;
40using VideoCommon::Shader::BuildTransformFeedback; 41using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry; 42using VideoCommon::Shader::Registry;
42 43
@@ -526,6 +527,9 @@ private:
526 if (device.HasImageLoadFormatted()) { 527 if (device.HasImageLoadFormatted()) {
527 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); 528 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
528 } 529 }
530 if (device.HasTextureShadowLod()) {
531 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
532 }
529 if (device.HasWarpIntrinsics()) { 533 if (device.HasWarpIntrinsics()) {
530 code.AddLine("#extension GL_NV_gpu_shader5 : require"); 534 code.AddLine("#extension GL_NV_gpu_shader5 : require");
531 code.AddLine("#extension GL_NV_shader_thread_group : require"); 535 code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -909,13 +913,13 @@ private:
909 return "samplerBuffer"; 913 return "samplerBuffer";
910 } 914 }
911 switch (sampler.type) { 915 switch (sampler.type) {
912 case Tegra::Shader::TextureType::Texture1D: 916 case TextureType::Texture1D:
913 return "sampler1D"; 917 return "sampler1D";
914 case Tegra::Shader::TextureType::Texture2D: 918 case TextureType::Texture2D:
915 return "sampler2D"; 919 return "sampler2D";
916 case Tegra::Shader::TextureType::Texture3D: 920 case TextureType::Texture3D:
917 return "sampler3D"; 921 return "sampler3D";
918 case Tegra::Shader::TextureType::TextureCube: 922 case TextureType::TextureCube:
919 return "samplerCube"; 923 return "samplerCube";
920 default: 924 default:
921 UNREACHABLE(); 925 UNREACHABLE();
@@ -1380,8 +1384,19 @@ private:
1380 const std::size_t count = operation.GetOperandsCount(); 1384 const std::size_t count = operation.GetOperandsCount();
1381 const bool has_array = meta->sampler.is_array; 1385 const bool has_array = meta->sampler.is_array;
1382 const bool has_shadow = meta->sampler.is_shadow; 1386 const bool has_shadow = meta->sampler.is_shadow;
1387 const bool workaround_lod_array_shadow_as_grad =
1388 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1389 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1390 meta->sampler.type == TextureType::TextureCube);
1391
1392 std::string expr = "texture";
1393
1394 if (workaround_lod_array_shadow_as_grad) {
1395 expr += "Grad";
1396 } else {
1397 expr += function_suffix;
1398 }
1383 1399
1384 std::string expr = "texture" + function_suffix;
1385 if (!meta->aoffi.empty()) { 1400 if (!meta->aoffi.empty()) {
1386 expr += "Offset"; 1401 expr += "Offset";
1387 } else if (!meta->ptp.empty()) { 1402 } else if (!meta->ptp.empty()) {
@@ -1415,6 +1430,16 @@ private:
1415 expr += ')'; 1430 expr += ')';
1416 } 1431 }
1417 1432
1433 if (workaround_lod_array_shadow_as_grad) {
1434 switch (meta->sampler.type) {
1435 case TextureType::Texture2D:
1436 return expr + ", vec2(0.0), vec2(0.0))";
1437 case TextureType::TextureCube:
1438 return expr + ", vec3(0.0), vec3(0.0))";
1439 }
1440 UNREACHABLE();
1441 }
1442
1418 for (const auto& variant : extras) { 1443 for (const auto& variant : extras) {
1419 if (const auto argument = std::get_if<TextureArgument>(&variant)) { 1444 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1420 expr += GenerateTextureArgument(*argument); 1445 expr += GenerateTextureArgument(*argument);
@@ -2041,8 +2066,19 @@ private:
2041 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 2066 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2042 ASSERT(meta); 2067 ASSERT(meta);
2043 2068
2044 std::string expr = GenerateTexture( 2069 std::string expr{};
2045 operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); 2070
2071 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2072 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2073 meta->sampler.type == TextureType::TextureCube)) {
2074 LOG_ERROR(Render_OpenGL,
2075 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2076 expr = GenerateTexture(operation, "Lod", {});
2077 } else {
2078 expr = GenerateTexture(operation, "Lod",
2079 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2080 }
2081
2046 if (meta->sampler.is_shadow) { 2082 if (meta->sampler.is_shadow) {
2047 expr = "vec4(" + expr + ')'; 2083 expr = "vec4(" + expr + ')';
2048 } 2084 }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
29 29
30namespace { 30namespace {
31 31
32using VideoCommon::Shader::SeparateSamplerKey;
33
32using ShaderCacheVersionHash = std::array<u8, 64>; 34using ShaderCacheVersionHash = std::array<u8, 64>;
33 35
34struct ConstBufferKey { 36struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
37 u32 value = 0; 39 u32 value = 0;
38}; 40};
39 41
40struct BoundSamplerKey { 42struct BoundSamplerEntry {
41 u32 offset = 0; 43 u32 offset = 0;
42 Tegra::Engines::SamplerDescriptor sampler; 44 Tegra::Engines::SamplerDescriptor sampler;
43}; 45};
44 46
45struct BindlessSamplerKey { 47struct SeparateSamplerEntry {
48 u32 cbuf1 = 0;
49 u32 cbuf2 = 0;
50 u32 offset1 = 0;
51 u32 offset2 = 0;
52 Tegra::Engines::SamplerDescriptor sampler;
53};
54
55struct BindlessSamplerEntry {
46 u32 cbuf = 0; 56 u32 cbuf = 0;
47 u32 offset = 0; 57 u32 offset = 0;
48 Tegra::Engines::SamplerDescriptor sampler; 58 Tegra::Engines::SamplerDescriptor sampler;
49}; 59};
50 60
51constexpr u32 NativeVersion = 20; 61constexpr u32 NativeVersion = 21;
52 62
53ShaderCacheVersionHash GetShaderCacheVersionHash() { 63ShaderCacheVersionHash GetShaderCacheVersionHash() {
54 ShaderCacheVersionHash hash{}; 64 ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
87 u32 texture_handler_size_value; 97 u32 texture_handler_size_value;
88 u32 num_keys; 98 u32 num_keys;
89 u32 num_bound_samplers; 99 u32 num_bound_samplers;
100 u32 num_separate_samplers;
90 u32 num_bindless_samplers; 101 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || 102 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 || 103 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 || 104 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || 105 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || 106 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
107 file.ReadArray(&num_separate_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) { 108 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false; 109 return false;
98 } 110 }
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
101 } 113 }
102 114
103 std::vector<ConstBufferKey> flat_keys(num_keys); 115 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); 116 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); 117 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
118 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || 119 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != 120 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() || 121 flat_bound_samplers.size() ||
122 file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
123 flat_separate_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != 124 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) { 125 flat_bindless_samplers.size()) {
111 return false; 126 return false;
112 } 127 }
113 for (const auto& key : flat_keys) { 128 for (const auto& entry : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value}); 129 keys.insert({{entry.cbuf, entry.offset}, entry.value});
115 } 130 }
116 for (const auto& key : flat_bound_samplers) { 131 for (const auto& entry : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler); 132 bound_samplers.emplace(entry.offset, entry.sampler);
118 } 133 }
119 for (const auto& key : flat_bindless_samplers) { 134 for (const auto& entry : flat_separate_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); 135 SeparateSamplerKey key;
136 key.buffers = {entry.cbuf1, entry.cbuf2};
137 key.offsets = {entry.offset1, entry.offset2};
138 separate_samplers.emplace(key, entry.sampler);
139 }
140 for (const auto& entry : flat_bindless_samplers) {
141 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
121 } 142 }
122 143
123 return true; 144 return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || 163 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 || 164 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || 165 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
166 file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { 167 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false; 168 return false;
147 } 169 }
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); 174 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 } 175 }
154 176
155 std::vector<BoundSamplerKey> flat_bound_samplers; 177 std::vector<BoundSamplerEntry> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size()); 178 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) { 179 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); 180 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
181 }
182
183 std::vector<SeparateSamplerEntry> flat_separate_samplers;
184 flat_separate_samplers.reserve(separate_samplers.size());
185 for (const auto& [key, sampler] : separate_samplers) {
186 SeparateSamplerEntry entry;
187 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
188 std::tie(entry.offset1, entry.offset2) = key.offsets;
189 entry.sampler = sampler;
190 flat_separate_samplers.push_back(entry);
159 } 191 }
160 192
161 std::vector<BindlessSamplerKey> flat_bindless_samplers; 193 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size()); 194 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) { 195 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back( 196 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler}); 197 BindlessSamplerEntry{address.first, address.second, sampler});
166 } 198 }
167 199
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && 200 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == 201 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() && 202 flat_bound_samplers.size() &&
203 file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
204 flat_separate_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == 205 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size(); 206 flat_bindless_samplers.size();
173} 207}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
57 VideoCommon::Shader::ComputeInfo compute_info; 57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys; 58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers; 59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers; 61 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
61}; 62};
62 63
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 6ec328c53..3655ff629 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,11 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <deque> 5#include <tuple>
6#include <vector> 6#include <vector>
7
7#include "common/alignment.h" 8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h"
10#include "video_core/renderer_opengl/gl_stream_buffer.h" 12#include "video_core/renderer_opengl/gl_stream_buffer.h"
11 13
12MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
14 16
15namespace OpenGL { 17namespace OpenGL {
16 18
17OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, 19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
18 bool use_persistent)
19 : buffer_size(size) { 20 : buffer_size(size) {
20 gl_buffer.Create(); 21 gl_buffer.Create();
21 22
@@ -29,34 +30,22 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
29 allocate_size *= 2; 30 allocate_size *= 2;
30 } 31 }
31 32
32 if (use_persistent) { 33 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
33 persistent = true; 34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
34 coherent = prefer_coherent; 35 mapped_ptr = static_cast<u8*>(
35 const GLbitfield flags = 36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
36 GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); 37
37 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 38 if (device.HasVertexBufferUnifiedMemory()) {
38 mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( 39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
39 gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); 40 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
40 } else {
41 glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
42 } 41 }
43} 42}
44 43
45OGLStreamBuffer::~OGLStreamBuffer() { 44OGLStreamBuffer::~OGLStreamBuffer() {
46 if (persistent) { 45 glUnmapNamedBuffer(gl_buffer.handle);
47 glUnmapNamedBuffer(gl_buffer.handle);
48 }
49 gl_buffer.Release(); 46 gl_buffer.Release();
50} 47}
51 48
52GLuint OGLStreamBuffer::GetHandle() const {
53 return gl_buffer.handle;
54}
55
56GLsizeiptr OGLStreamBuffer::GetSize() const {
57 return buffer_size;
58}
59
60std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { 49std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
61 ASSERT(size <= buffer_size); 50 ASSERT(size <= buffer_size);
62 ASSERT(alignment <= buffer_size); 51 ASSERT(alignment <= buffer_size);
@@ -68,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
68 57
69 bool invalidate = false; 58 bool invalidate = false;
70 if (buffer_pos + size > buffer_size) { 59 if (buffer_pos + size > buffer_size) {
60 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
61 glInvalidateBufferData(gl_buffer.handle);
62
71 buffer_pos = 0; 63 buffer_pos = 0;
72 invalidate = true; 64 invalidate = true;
73
74 if (persistent) {
75 glUnmapNamedBuffer(gl_buffer.handle);
76 }
77 } 65 }
78 66
79 if (invalidate || !persistent) { 67 return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
80 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
81 GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
82 (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
83 (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
84 mapped_ptr = static_cast<u8*>(
85 glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
86 mapped_offset = buffer_pos;
87 }
88
89 return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
90} 68}
91 69
92void OGLStreamBuffer::Unmap(GLsizeiptr size) { 70void OGLStreamBuffer::Unmap(GLsizeiptr size) {
93 ASSERT(size <= mapped_size); 71 ASSERT(size <= mapped_size);
94 72
95 if (!coherent && size > 0) { 73 if (size > 0) {
96 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); 74 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
97 }
98
99 if (!persistent) {
100 glUnmapNamedBuffer(gl_buffer.handle);
101 } 75 }
102 76
103 buffer_pos += size; 77 buffer_pos += size;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index f8383cbd4..307a67113 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,15 +11,13 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class Device;
15
14class OGLStreamBuffer : private NonCopyable { 16class OGLStreamBuffer : private NonCopyable {
15public: 17public:
16 explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, 18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
17 bool use_persistent = true);
18 ~OGLStreamBuffer(); 19 ~OGLStreamBuffer();
19 20
20 GLuint GetHandle() const;
21 GLsizeiptr GetSize() const;
22
23 /* 21 /*
24 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes 22 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
25 * and the optional alignment requirement. 23 * and the optional alignment requirement.
@@ -32,15 +30,24 @@ public:
32 30
33 void Unmap(GLsizeiptr size); 31 void Unmap(GLsizeiptr size);
34 32
33 GLuint Handle() const {
34 return gl_buffer.handle;
35 }
36
37 u64 Address() const {
38 return gpu_address;
39 }
40
41 GLsizeiptr Size() const noexcept {
42 return buffer_size;
43 }
44
35private: 45private:
36 OGLBuffer gl_buffer; 46 OGLBuffer gl_buffer;
37 47
38 bool coherent = false; 48 GLuint64EXT gpu_address = 0;
39 bool persistent = false;
40
41 GLintptr buffer_pos = 0; 49 GLintptr buffer_pos = 0;
42 GLsizeiptr buffer_size = 0; 50 GLsizeiptr buffer_size = 0;
43 GLintptr mapped_offset = 0;
44 GLsizeiptr mapped_size = 0; 51 GLsizeiptr mapped_size = 0;
45 u8* mapped_ptr = nullptr; 52 u8* mapped_ptr = nullptr;
46}; 53};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 57db5a08b..61505879b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
263 target = GetTextureTarget(params.target); 263 target = GetTextureTarget(params.target);
264 texture = CreateTexture(params, target, internal_format, texture_buffer); 264 texture = CreateTexture(params, target, internal_format, texture_buffer);
265 DecorateSurfaceName(); 265 DecorateSurfaceName();
266 main_view = CreateViewInner( 266
267 ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), 267 u32 num_layers = 1;
268 true); 268 if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
269 num_layers = params.depth;
270 }
271
272 main_view =
273 CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
269} 274}
270 275
271CachedSurface::~CachedSurface() = default; 276CachedSurface::~CachedSurface() = default;
@@ -413,20 +418,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
413 418
414CachedSurfaceView::~CachedSurfaceView() = default; 419CachedSurfaceView::~CachedSurfaceView() = default;
415 420
416void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { 421void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
417 ASSERT(params.num_levels == 1); 422 ASSERT(params.num_levels == 1);
418 423
424 if (params.target == SurfaceTarget::Texture3D) {
425 if (params.num_layers > 1) {
426 ASSERT(params.base_layer == 0);
427 glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
428 } else {
429 glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
430 params.base_level, params.base_layer);
431 }
432 return;
433 }
434
419 if (params.num_layers > 1) { 435 if (params.num_layers > 1) {
420 // Layered framebuffer attachments
421 UNIMPLEMENTED_IF(params.base_layer != 0); 436 UNIMPLEMENTED_IF(params.base_layer != 0);
422 437 glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
423 switch (params.target) {
424 case SurfaceTarget::Texture2DArray:
425 glFramebufferTexture(target, attachment, GetTexture(), 0);
426 break;
427 default:
428 UNIMPLEMENTED();
429 }
430 return; 438 return;
431 } 439 }
432 440
@@ -434,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
434 const GLuint texture = surface.GetTexture(); 442 const GLuint texture = surface.GetTexture();
435 switch (surface.GetSurfaceParams().target) { 443 switch (surface.GetSurfaceParams().target) {
436 case SurfaceTarget::Texture1D: 444 case SurfaceTarget::Texture1D:
437 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); 445 glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
438 break; 446 break;
439 case SurfaceTarget::Texture2D: 447 case SurfaceTarget::Texture2D:
440 glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); 448 glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
441 break; 449 break;
442 case SurfaceTarget::Texture1DArray: 450 case SurfaceTarget::Texture1DArray:
443 case SurfaceTarget::Texture2DArray: 451 case SurfaceTarget::Texture2DArray:
444 case SurfaceTarget::TextureCubemap: 452 case SurfaceTarget::TextureCubemap:
445 case SurfaceTarget::TextureCubeArray: 453 case SurfaceTarget::TextureCubeArray:
446 glFramebufferTextureLayer(target, attachment, texture, params.base_level, 454 glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
447 params.base_layer); 455 params.base_layer);
448 break; 456 break;
449 default: 457 default:
@@ -500,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
500 OGLTextureView texture_view; 508 OGLTextureView texture_view;
501 texture_view.Create(); 509 texture_view.Create();
502 510
503 glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, 511 if (target == GL_TEXTURE_3D) {
504 params.num_levels, params.base_layer, params.num_layers); 512 glTextureView(texture_view.handle, target, surface.texture.handle, format,
513 params.base_level, params.num_levels, 0, 1);
514 } else {
515 glTextureView(texture_view.handle, target, surface.texture.handle, format,
516 params.base_level, params.num_levels, params.base_layer, params.num_layers);
517 }
505 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); 518 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
506 519
507 return texture_view; 520 return texture_view;
@@ -544,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
544 const Tegra::Engines::Fermi2D::Config& copy_config) { 557 const Tegra::Engines::Fermi2D::Config& copy_config) {
545 const auto& src_params{src_view->GetSurfaceParams()}; 558 const auto& src_params{src_view->GetSurfaceParams()};
546 const auto& dst_params{dst_view->GetSurfaceParams()}; 559 const auto& dst_params{dst_view->GetSurfaceParams()};
547 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); 560 UNIMPLEMENTED_IF(src_params.depth != 1);
548 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); 561 UNIMPLEMENTED_IF(dst_params.depth != 1);
549 562
550 state_tracker.NotifyScissor0(); 563 state_tracker.NotifyScissor0();
551 state_tracker.NotifyFramebuffer(); 564 state_tracker.NotifyFramebuffer();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8a2ac8603..bfc4ddf5d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -80,8 +80,10 @@ public:
80 explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); 80 explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
81 ~CachedSurfaceView(); 81 ~CachedSurfaceView();
82 82
83 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER 83 /// @brief Attaches this texture view to the currently bound fb_target framebuffer
84 void Attach(GLenum attachment, GLenum target) const; 84 /// @param attachment Attachment to bind textures to
85 /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
86 void Attach(GLenum attachment, GLenum fb_target) const;
85 87
86 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, 88 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
87 Tegra::Texture::SwizzleSource y_source, 89 Tegra::Texture::SwizzleSource y_source,
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 994ae98eb..774e70a5b 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -24,10 +24,11 @@ namespace MaxwellToGL {
24 24
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 25using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26 26
27inline GLenum VertexType(Maxwell::VertexAttribute attrib) { 27inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
28 switch (attrib.type) { 28 switch (attrib.type) {
29 case Maxwell::VertexAttribute::Type::UnsignedInt:
30 case Maxwell::VertexAttribute::Type::UnsignedNorm: 29 case Maxwell::VertexAttribute::Type::UnsignedNorm:
30 case Maxwell::VertexAttribute::Type::UnsignedScaled:
31 case Maxwell::VertexAttribute::Type::UnsignedInt:
31 switch (attrib.size) { 32 switch (attrib.size) {
32 case Maxwell::VertexAttribute::Size::Size_8: 33 case Maxwell::VertexAttribute::Size::Size_8:
33 case Maxwell::VertexAttribute::Size::Size_8_8: 34 case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -46,12 +47,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
46 return GL_UNSIGNED_INT; 47 return GL_UNSIGNED_INT;
47 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 48 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
48 return GL_UNSIGNED_INT_2_10_10_10_REV; 49 return GL_UNSIGNED_INT_2_10_10_10_REV;
49 default:
50 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
51 return {};
52 } 50 }
53 case Maxwell::VertexAttribute::Type::SignedInt: 51 break;
54 case Maxwell::VertexAttribute::Type::SignedNorm: 52 case Maxwell::VertexAttribute::Type::SignedNorm:
53 case Maxwell::VertexAttribute::Type::SignedScaled:
54 case Maxwell::VertexAttribute::Type::SignedInt:
55 switch (attrib.size) { 55 switch (attrib.size) {
56 case Maxwell::VertexAttribute::Size::Size_8: 56 case Maxwell::VertexAttribute::Size::Size_8:
57 case Maxwell::VertexAttribute::Size::Size_8_8: 57 case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -70,10 +70,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
70 return GL_INT; 70 return GL_INT;
71 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 71 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
72 return GL_INT_2_10_10_10_REV; 72 return GL_INT_2_10_10_10_REV;
73 default:
74 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
75 return {};
76 } 73 }
74 break;
77 case Maxwell::VertexAttribute::Type::Float: 75 case Maxwell::VertexAttribute::Type::Float:
78 switch (attrib.size) { 76 switch (attrib.size) {
79 case Maxwell::VertexAttribute::Size::Size_16: 77 case Maxwell::VertexAttribute::Size::Size_16:
@@ -86,46 +84,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
86 case Maxwell::VertexAttribute::Size::Size_32_32_32: 84 case Maxwell::VertexAttribute::Size::Size_32_32_32:
87 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 85 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
88 return GL_FLOAT; 86 return GL_FLOAT;
89 default:
90 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
91 return {};
92 }
93 case Maxwell::VertexAttribute::Type::UnsignedScaled:
94 switch (attrib.size) {
95 case Maxwell::VertexAttribute::Size::Size_8:
96 case Maxwell::VertexAttribute::Size::Size_8_8:
97 case Maxwell::VertexAttribute::Size::Size_8_8_8:
98 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
99 return GL_UNSIGNED_BYTE;
100 case Maxwell::VertexAttribute::Size::Size_16:
101 case Maxwell::VertexAttribute::Size::Size_16_16:
102 case Maxwell::VertexAttribute::Size::Size_16_16_16:
103 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
104 return GL_UNSIGNED_SHORT;
105 default:
106 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
107 return {};
108 } 87 }
109 case Maxwell::VertexAttribute::Type::SignedScaled: 88 break;
110 switch (attrib.size) {
111 case Maxwell::VertexAttribute::Size::Size_8:
112 case Maxwell::VertexAttribute::Size::Size_8_8:
113 case Maxwell::VertexAttribute::Size::Size_8_8_8:
114 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
115 return GL_BYTE;
116 case Maxwell::VertexAttribute::Size::Size_16:
117 case Maxwell::VertexAttribute::Size::Size_16_16:
118 case Maxwell::VertexAttribute::Size::Size_16_16_16:
119 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
120 return GL_SHORT;
121 default:
122 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
123 return {};
124 }
125 default:
126 LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
127 return {};
128 } 89 }
90 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
91 attrib.SizeString());
92 return {};
129} 93}
130 94
131inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { 95inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -137,8 +101,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
137 case Maxwell::IndexFormat::UnsignedInt: 101 case Maxwell::IndexFormat::UnsignedInt:
138 return GL_UNSIGNED_INT; 102 return GL_UNSIGNED_INT;
139 } 103 }
140 LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); 104 UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
141 UNREACHABLE();
142 return {}; 105 return {};
143} 106}
144 107
@@ -180,33 +143,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
180} 143}
181 144
182inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, 145inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
183 Tegra::Texture::TextureMipmapFilter mip_filter_mode) { 146 Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
184 switch (filter_mode) { 147 switch (filter_mode) {
185 case Tegra::Texture::TextureFilter::Linear: { 148 case Tegra::Texture::TextureFilter::Nearest:
186 switch (mip_filter_mode) { 149 switch (mipmap_filter_mode) {
187 case Tegra::Texture::TextureMipmapFilter::None: 150 case Tegra::Texture::TextureMipmapFilter::None:
188 return GL_LINEAR; 151 return GL_NEAREST;
189 case Tegra::Texture::TextureMipmapFilter::Nearest: 152 case Tegra::Texture::TextureMipmapFilter::Nearest:
190 return GL_LINEAR_MIPMAP_NEAREST; 153 return GL_NEAREST_MIPMAP_NEAREST;
191 case Tegra::Texture::TextureMipmapFilter::Linear: 154 case Tegra::Texture::TextureMipmapFilter::Linear:
192 return GL_LINEAR_MIPMAP_LINEAR; 155 return GL_NEAREST_MIPMAP_LINEAR;
193 } 156 }
194 break; 157 break;
195 } 158 case Tegra::Texture::TextureFilter::Linear:
196 case Tegra::Texture::TextureFilter::Nearest: { 159 switch (mipmap_filter_mode) {
197 switch (mip_filter_mode) {
198 case Tegra::Texture::TextureMipmapFilter::None: 160 case Tegra::Texture::TextureMipmapFilter::None:
199 return GL_NEAREST; 161 return GL_LINEAR;
200 case Tegra::Texture::TextureMipmapFilter::Nearest: 162 case Tegra::Texture::TextureMipmapFilter::Nearest:
201 return GL_NEAREST_MIPMAP_NEAREST; 163 return GL_LINEAR_MIPMAP_NEAREST;
202 case Tegra::Texture::TextureMipmapFilter::Linear: 164 case Tegra::Texture::TextureMipmapFilter::Linear:
203 return GL_NEAREST_MIPMAP_LINEAR; 165 return GL_LINEAR_MIPMAP_LINEAR;
204 } 166 }
205 break; 167 break;
206 } 168 }
207 } 169 UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
208 LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); 170 static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
209 return GL_LINEAR; 171 return GL_NEAREST;
210} 172}
211 173
212inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { 174inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -229,10 +191,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
229 } else { 191 } else {
230 return GL_MIRROR_CLAMP_TO_EDGE; 192 return GL_MIRROR_CLAMP_TO_EDGE;
231 } 193 }
232 default:
233 LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
234 return GL_REPEAT;
235 } 194 }
195 UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
196 return GL_REPEAT;
236} 197}
237 198
238inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { 199inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -254,8 +215,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
254 case Tegra::Texture::DepthCompareFunc::Always: 215 case Tegra::Texture::DepthCompareFunc::Always:
255 return GL_ALWAYS; 216 return GL_ALWAYS;
256 } 217 }
257 LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}", 218 UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
258 static_cast<u32>(func));
259 return GL_GREATER; 219 return GL_GREATER;
260} 220}
261 221
@@ -277,7 +237,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
277 case Maxwell::Blend::Equation::MaxGL: 237 case Maxwell::Blend::Equation::MaxGL:
278 return GL_MAX; 238 return GL_MAX;
279 } 239 }
280 LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); 240 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
281 return GL_FUNC_ADD; 241 return GL_FUNC_ADD;
282} 242}
283 243
@@ -341,7 +301,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
341 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: 301 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
342 return GL_ONE_MINUS_CONSTANT_ALPHA; 302 return GL_ONE_MINUS_CONSTANT_ALPHA;
343 } 303 }
344 LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); 304 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
345 return GL_ZERO; 305 return GL_ZERO;
346} 306}
347 307
@@ -361,7 +321,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
361 case Tegra::Texture::SwizzleSource::OneFloat: 321 case Tegra::Texture::SwizzleSource::OneFloat:
362 return GL_ONE; 322 return GL_ONE;
363 } 323 }
364 LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); 324 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
365 return GL_ZERO; 325 return GL_ZERO;
366} 326}
367 327
@@ -392,7 +352,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
392 case Maxwell::ComparisonOp::AlwaysOld: 352 case Maxwell::ComparisonOp::AlwaysOld:
393 return GL_ALWAYS; 353 return GL_ALWAYS;
394 } 354 }
395 LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); 355 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
396 return GL_ALWAYS; 356 return GL_ALWAYS;
397} 357}
398 358
@@ -423,7 +383,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
423 case Maxwell::StencilOp::DecrWrapOGL: 383 case Maxwell::StencilOp::DecrWrapOGL:
424 return GL_DECR_WRAP; 384 return GL_DECR_WRAP;
425 } 385 }
426 LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); 386 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
427 return GL_KEEP; 387 return GL_KEEP;
428} 388}
429 389
@@ -434,7 +394,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
434 case Maxwell::FrontFace::CounterClockWise: 394 case Maxwell::FrontFace::CounterClockWise:
435 return GL_CCW; 395 return GL_CCW;
436 } 396 }
437 LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); 397 UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
438 return GL_CCW; 398 return GL_CCW;
439} 399}
440 400
@@ -447,7 +407,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
447 case Maxwell::CullFace::FrontAndBack: 407 case Maxwell::CullFace::FrontAndBack:
448 return GL_FRONT_AND_BACK; 408 return GL_FRONT_AND_BACK;
449 } 409 }
450 LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); 410 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
451 return GL_BACK; 411 return GL_BACK;
452} 412}
453 413
@@ -486,7 +446,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
486 case Maxwell::LogicOperation::Set: 446 case Maxwell::LogicOperation::Set:
487 return GL_SET; 447 return GL_SET;
488 } 448 }
489 LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); 449 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
490 return GL_COPY; 450 return GL_COPY;
491} 451}
492 452
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6214fcbc3..c40adb6e7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
488 488
489 // Clear screen to black 489 // Clear screen to black
490 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 490 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
491
492 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
493 if (device.HasVertexBufferUnifiedMemory()) {
494 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
495
496 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
497 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
498 &vertex_buffer_address);
499 }
491} 500}
492 501
493void RendererOpenGL::AddTelemetryFields() { 502void RendererOpenGL::AddTelemetryFields() {
@@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
656 offsetof(ScreenRectVertex, tex_coord)); 665 offsetof(ScreenRectVertex, tex_coord));
657 glVertexAttribBinding(PositionLocation, 0); 666 glVertexAttribBinding(PositionLocation, 0);
658 glVertexAttribBinding(TexCoordLocation, 0); 667 glVertexAttribBinding(TexCoordLocation, 0);
659 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); 668 if (device.HasVertexBufferUnifiedMemory()) {
669 glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
670 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
671 sizeof(vertices));
672 } else {
673 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
674 }
660 675
661 glBindTextureUnit(0, screen_info.display_texture); 676 glBindTextureUnit(0, screen_info.display_texture);
662 glBindSampler(0, 0); 677 glBindSampler(0, 0);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 61bf507f4..8b18d32e6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -107,6 +107,9 @@ private:
107 OGLPipeline pipeline; 107 OGLPipeline pipeline;
108 OGLFramebuffer screenshot_framebuffer; 108 OGLFramebuffer screenshot_framebuffer;
109 109
110 // GPU address of the vertex buffer
111 GLuint64EXT vertex_buffer_address = 0;
112
110 /// Display information for Switch screen 113 /// Display information for Switch screen
111 ScreenInfo screen_info; 114 ScreenInfo screen_info;
112 115
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 62e950d31..d7f1ae89f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {
21 21
22VkFilter Filter(Tegra::Texture::TextureFilter filter) { 22VkFilter Filter(Tegra::Texture::TextureFilter filter) {
23 switch (filter) { 23 switch (filter) {
24 case Tegra::Texture::TextureFilter::Linear:
25 return VK_FILTER_LINEAR;
26 case Tegra::Texture::TextureFilter::Nearest: 24 case Tegra::Texture::TextureFilter::Nearest:
27 return VK_FILTER_NEAREST; 25 return VK_FILTER_NEAREST;
26 case Tegra::Texture::TextureFilter::Linear:
27 return VK_FILTER_LINEAR;
28 } 28 }
29 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); 29 UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
30 return {}; 30 return {};
31} 31}
32 32
33VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { 33VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
34 switch (mipmap_filter) { 34 switch (mipmap_filter) {
35 case Tegra::Texture::TextureMipmapFilter::None: 35 case Tegra::Texture::TextureMipmapFilter::None:
36 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping 36 // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
37 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to 37 // of GL_LINEAR or GL_NEAREST, but they can be emulated using
38 // use an image view with a single mipmap level to emulate this. 38 // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
39 return VK_SAMPLER_MIPMAP_MODE_LINEAR; 39 // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
40 ; 40 return VK_SAMPLER_MIPMAP_MODE_NEAREST;
41 case Tegra::Texture::TextureMipmapFilter::Linear:
42 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
43 case Tegra::Texture::TextureMipmapFilter::Nearest: 41 case Tegra::Texture::TextureMipmapFilter::Nearest:
44 return VK_SAMPLER_MIPMAP_MODE_NEAREST; 42 return VK_SAMPLER_MIPMAP_MODE_NEAREST;
43 case Tegra::Texture::TextureMipmapFilter::Linear:
44 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
45 } 45 }
46 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); 46 UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
47 return {}; 47 return {};
48} 48}
49 49
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
78 case Tegra::Texture::WrapMode::MirrorOnceBorder: 78 case Tegra::Texture::WrapMode::MirrorOnceBorder:
79 UNIMPLEMENTED(); 79 UNIMPLEMENTED();
80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; 80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
81 default:
82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
83 return {};
84 } 81 }
82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
83 return {};
85} 84}
86 85
87VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { 86VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -288,14 +287,35 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
288 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 287 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
289 case Maxwell::PrimitiveTopology::Patches: 288 case Maxwell::PrimitiveTopology::Patches:
290 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; 289 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
291 default:
292 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
293 return {};
294 } 290 }
291 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
292 return {};
295} 293}
296 294
297VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { 295VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
298 switch (type) { 296 switch (type) {
297 case Maxwell::VertexAttribute::Type::UnsignedNorm:
298 switch (size) {
299 case Maxwell::VertexAttribute::Size::Size_8:
300 return VK_FORMAT_R8_UNORM;
301 case Maxwell::VertexAttribute::Size::Size_8_8:
302 return VK_FORMAT_R8G8_UNORM;
303 case Maxwell::VertexAttribute::Size::Size_8_8_8:
304 return VK_FORMAT_R8G8B8_UNORM;
305 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
306 return VK_FORMAT_R8G8B8A8_UNORM;
307 case Maxwell::VertexAttribute::Size::Size_16:
308 return VK_FORMAT_R16_UNORM;
309 case Maxwell::VertexAttribute::Size::Size_16_16:
310 return VK_FORMAT_R16G16_UNORM;
311 case Maxwell::VertexAttribute::Size::Size_16_16_16:
312 return VK_FORMAT_R16G16B16_UNORM;
313 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
314 return VK_FORMAT_R16G16B16A16_UNORM;
315 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
316 return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
317 }
318 break;
299 case Maxwell::VertexAttribute::Type::SignedNorm: 319 case Maxwell::VertexAttribute::Type::SignedNorm:
300 switch (size) { 320 switch (size) {
301 case Maxwell::VertexAttribute::Size::Size_8: 321 case Maxwell::VertexAttribute::Size::Size_8:
@@ -316,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
316 return VK_FORMAT_R16G16B16A16_SNORM; 336 return VK_FORMAT_R16G16B16A16_SNORM;
317 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 337 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
318 return VK_FORMAT_A2B10G10R10_SNORM_PACK32; 338 return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
319 default:
320 break;
321 } 339 }
322 break; 340 break;
323 case Maxwell::VertexAttribute::Type::UnsignedNorm: 341 case Maxwell::VertexAttribute::Type::UnsignedScaled:
324 switch (size) { 342 switch (size) {
325 case Maxwell::VertexAttribute::Size::Size_8: 343 case Maxwell::VertexAttribute::Size::Size_8:
326 return VK_FORMAT_R8_UNORM; 344 return VK_FORMAT_R8_USCALED;
327 case Maxwell::VertexAttribute::Size::Size_8_8: 345 case Maxwell::VertexAttribute::Size::Size_8_8:
328 return VK_FORMAT_R8G8_UNORM; 346 return VK_FORMAT_R8G8_USCALED;
329 case Maxwell::VertexAttribute::Size::Size_8_8_8: 347 case Maxwell::VertexAttribute::Size::Size_8_8_8:
330 return VK_FORMAT_R8G8B8_UNORM; 348 return VK_FORMAT_R8G8B8_USCALED;
331 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 349 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
332 return VK_FORMAT_R8G8B8A8_UNORM; 350 return VK_FORMAT_R8G8B8A8_USCALED;
333 case Maxwell::VertexAttribute::Size::Size_16: 351 case Maxwell::VertexAttribute::Size::Size_16:
334 return VK_FORMAT_R16_UNORM; 352 return VK_FORMAT_R16_USCALED;
335 case Maxwell::VertexAttribute::Size::Size_16_16: 353 case Maxwell::VertexAttribute::Size::Size_16_16:
336 return VK_FORMAT_R16G16_UNORM; 354 return VK_FORMAT_R16G16_USCALED;
337 case Maxwell::VertexAttribute::Size::Size_16_16_16: 355 case Maxwell::VertexAttribute::Size::Size_16_16_16:
338 return VK_FORMAT_R16G16B16_UNORM; 356 return VK_FORMAT_R16G16B16_USCALED;
339 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 357 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
340 return VK_FORMAT_R16G16B16A16_UNORM; 358 return VK_FORMAT_R16G16B16A16_USCALED;
341 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 359 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
342 return VK_FORMAT_A2B10G10R10_UNORM_PACK32; 360 return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
343 default:
344 break;
345 } 361 }
346 break; 362 break;
347 case Maxwell::VertexAttribute::Type::SignedInt: 363 case Maxwell::VertexAttribute::Type::SignedScaled:
348 switch (size) { 364 switch (size) {
349 case Maxwell::VertexAttribute::Size::Size_8: 365 case Maxwell::VertexAttribute::Size::Size_8:
350 return VK_FORMAT_R8_SINT; 366 return VK_FORMAT_R8_SSCALED;
351 case Maxwell::VertexAttribute::Size::Size_8_8: 367 case Maxwell::VertexAttribute::Size::Size_8_8:
352 return VK_FORMAT_R8G8_SINT; 368 return VK_FORMAT_R8G8_SSCALED;
353 case Maxwell::VertexAttribute::Size::Size_8_8_8: 369 case Maxwell::VertexAttribute::Size::Size_8_8_8:
354 return VK_FORMAT_R8G8B8_SINT; 370 return VK_FORMAT_R8G8B8_SSCALED;
355 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 371 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
356 return VK_FORMAT_R8G8B8A8_SINT; 372 return VK_FORMAT_R8G8B8A8_SSCALED;
357 case Maxwell::VertexAttribute::Size::Size_16: 373 case Maxwell::VertexAttribute::Size::Size_16:
358 return VK_FORMAT_R16_SINT; 374 return VK_FORMAT_R16_SSCALED;
359 case Maxwell::VertexAttribute::Size::Size_16_16: 375 case Maxwell::VertexAttribute::Size::Size_16_16:
360 return VK_FORMAT_R16G16_SINT; 376 return VK_FORMAT_R16G16_SSCALED;
361 case Maxwell::VertexAttribute::Size::Size_16_16_16: 377 case Maxwell::VertexAttribute::Size::Size_16_16_16:
362 return VK_FORMAT_R16G16B16_SINT; 378 return VK_FORMAT_R16G16B16_SSCALED;
363 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 379 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
364 return VK_FORMAT_R16G16B16A16_SINT; 380 return VK_FORMAT_R16G16B16A16_SSCALED;
365 case Maxwell::VertexAttribute::Size::Size_32: 381 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
366 return VK_FORMAT_R32_SINT; 382 return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
367 case Maxwell::VertexAttribute::Size::Size_32_32:
368 return VK_FORMAT_R32G32_SINT;
369 case Maxwell::VertexAttribute::Size::Size_32_32_32:
370 return VK_FORMAT_R32G32B32_SINT;
371 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
372 return VK_FORMAT_R32G32B32A32_SINT;
373 default:
374 break;
375 } 383 }
376 break; 384 break;
377 case Maxwell::VertexAttribute::Type::UnsignedInt: 385 case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -400,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
400 return VK_FORMAT_R32G32B32_UINT; 408 return VK_FORMAT_R32G32B32_UINT;
401 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 409 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
402 return VK_FORMAT_R32G32B32A32_UINT; 410 return VK_FORMAT_R32G32B32A32_UINT;
403 default: 411 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
404 break; 412 return VK_FORMAT_A2B10G10R10_UINT_PACK32;
405 } 413 }
406 break; 414 break;
407 case Maxwell::VertexAttribute::Type::UnsignedScaled: 415 case Maxwell::VertexAttribute::Type::SignedInt:
408 switch (size) { 416 switch (size) {
409 case Maxwell::VertexAttribute::Size::Size_8: 417 case Maxwell::VertexAttribute::Size::Size_8:
410 return VK_FORMAT_R8_USCALED; 418 return VK_FORMAT_R8_SINT;
411 case Maxwell::VertexAttribute::Size::Size_8_8: 419 case Maxwell::VertexAttribute::Size::Size_8_8:
412 return VK_FORMAT_R8G8_USCALED; 420 return VK_FORMAT_R8G8_SINT;
413 case Maxwell::VertexAttribute::Size::Size_8_8_8: 421 case Maxwell::VertexAttribute::Size::Size_8_8_8:
414 return VK_FORMAT_R8G8B8_USCALED; 422 return VK_FORMAT_R8G8B8_SINT;
415 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 423 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
416 return VK_FORMAT_R8G8B8A8_USCALED; 424 return VK_FORMAT_R8G8B8A8_SINT;
417 case Maxwell::VertexAttribute::Size::Size_16: 425 case Maxwell::VertexAttribute::Size::Size_16:
418 return VK_FORMAT_R16_USCALED; 426 return VK_FORMAT_R16_SINT;
419 case Maxwell::VertexAttribute::Size::Size_16_16: 427 case Maxwell::VertexAttribute::Size::Size_16_16:
420 return VK_FORMAT_R16G16_USCALED; 428 return VK_FORMAT_R16G16_SINT;
421 case Maxwell::VertexAttribute::Size::Size_16_16_16: 429 case Maxwell::VertexAttribute::Size::Size_16_16_16:
422 return VK_FORMAT_R16G16B16_USCALED; 430 return VK_FORMAT_R16G16B16_SINT;
423 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 431 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
424 return VK_FORMAT_R16G16B16A16_USCALED; 432 return VK_FORMAT_R16G16B16A16_SINT;
425 default: 433 case Maxwell::VertexAttribute::Size::Size_32:
426 break; 434 return VK_FORMAT_R32_SINT;
435 case Maxwell::VertexAttribute::Size::Size_32_32:
436 return VK_FORMAT_R32G32_SINT;
437 case Maxwell::VertexAttribute::Size::Size_32_32_32:
438 return VK_FORMAT_R32G32B32_SINT;
439 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
440 return VK_FORMAT_R32G32B32A32_SINT;
441 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
442 return VK_FORMAT_A2B10G10R10_SINT_PACK32;
427 } 443 }
428 break; 444 break;
429 case Maxwell::VertexAttribute::Type::SignedScaled: 445 case Maxwell::VertexAttribute::Type::Float:
430 switch (size) { 446 switch (size) {
431 case Maxwell::VertexAttribute::Size::Size_8:
432 return VK_FORMAT_R8_SSCALED;
433 case Maxwell::VertexAttribute::Size::Size_8_8:
434 return VK_FORMAT_R8G8_SSCALED;
435 case Maxwell::VertexAttribute::Size::Size_8_8_8:
436 return VK_FORMAT_R8G8B8_SSCALED;
437 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
438 return VK_FORMAT_R8G8B8A8_SSCALED;
439 case Maxwell::VertexAttribute::Size::Size_16: 447 case Maxwell::VertexAttribute::Size::Size_16:
440 return VK_FORMAT_R16_SSCALED; 448 return VK_FORMAT_R16_SFLOAT;
441 case Maxwell::VertexAttribute::Size::Size_16_16: 449 case Maxwell::VertexAttribute::Size::Size_16_16:
442 return VK_FORMAT_R16G16_SSCALED; 450 return VK_FORMAT_R16G16_SFLOAT;
443 case Maxwell::VertexAttribute::Size::Size_16_16_16: 451 case Maxwell::VertexAttribute::Size::Size_16_16_16:
444 return VK_FORMAT_R16G16B16_SSCALED; 452 return VK_FORMAT_R16G16B16_SFLOAT;
445 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 453 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
446 return VK_FORMAT_R16G16B16A16_SSCALED; 454 return VK_FORMAT_R16G16B16A16_SFLOAT;
447 default:
448 break;
449 }
450 break;
451 case Maxwell::VertexAttribute::Type::Float:
452 switch (size) {
453 case Maxwell::VertexAttribute::Size::Size_32: 455 case Maxwell::VertexAttribute::Size::Size_32:
454 return VK_FORMAT_R32_SFLOAT; 456 return VK_FORMAT_R32_SFLOAT;
455 case Maxwell::VertexAttribute::Size::Size_32_32: 457 case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -458,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
458 return VK_FORMAT_R32G32B32_SFLOAT; 460 return VK_FORMAT_R32G32B32_SFLOAT;
459 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 461 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
460 return VK_FORMAT_R32G32B32A32_SFLOAT; 462 return VK_FORMAT_R32G32B32A32_SFLOAT;
461 case Maxwell::VertexAttribute::Size::Size_16:
462 return VK_FORMAT_R16_SFLOAT;
463 case Maxwell::VertexAttribute::Size::Size_16_16:
464 return VK_FORMAT_R16G16_SFLOAT;
465 case Maxwell::VertexAttribute::Size::Size_16_16_16:
466 return VK_FORMAT_R16G16B16_SFLOAT;
467 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
468 return VK_FORMAT_R16G16B16A16_SFLOAT;
469 default:
470 break;
471 } 463 }
472 break; 464 break;
473 } 465 }
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 59b441943..2d9b18ed9 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,6 +13,7 @@
13#include <fmt/format.h> 13#include <fmt/format.h>
14 14
15#include "common/dynamic_library.h" 15#include "common/dynamic_library.h"
16#include "common/file_util.h"
16#include "common/logging/log.h" 17#include "common/logging/log.h"
17#include "common/telemetry.h" 18#include "common/telemetry.h"
18#include "core/core.h" 19#include "core/core.h"
@@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
76 char* libvulkan_env = getenv("LIBVULKAN_PATH"); 77 char* libvulkan_env = getenv("LIBVULKAN_PATH");
77 if (!libvulkan_env || !library.Open(libvulkan_env)) { 78 if (!libvulkan_env || !library.Open(libvulkan_env)) {
78 // Use the libvulkan.dylib from the application bundle. 79 // Use the libvulkan.dylib from the application bundle.
79 std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; 80 const std::string filename =
81 FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
80 library.Open(filename.c_str()); 82 library.Open(filename.c_str());
81 } 83 }
82#else 84#else
@@ -153,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
153 } 155 }
154 } 156 }
155 157
156 static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; 158 std::vector<const char*> layers;
157 vk::Span<const char*> layers = layers_data; 159 layers.reserve(1);
158 if (!enable_layers) { 160 if (enable_layers) {
159 layers = {}; 161 layers.push_back("VK_LAYER_KHRONOS_validation");
162 }
163
164 const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
165 if (!layer_properties) {
166 LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
167 layers.clear();
168 }
169
170 for (auto layer_it = layers.begin(); layer_it != layers.end();) {
171 const char* const layer = *layer_it;
172 const auto it = std::find_if(
173 layer_properties->begin(), layer_properties->end(),
174 [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
175 if (it == layer_properties->end()) {
176 LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
177 layer_it = layers.erase(layer_it);
178 } else {
179 ++layer_it;
180 }
160 } 181 }
182
161 vk::Instance instance = vk::Instance::Create(layers, extensions, dld); 183 vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
162 if (!instance) { 184 if (!instance) {
163 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); 185 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5f33d9e40..f10f96cd8 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
37 37
38} // Anonymous namespace 38} // Anonymous namespace
39 39
40CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
41 VAddr cpu_addr, std::size_t size) 41 VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
42 : VideoCommon::BufferBlock{cpu_addr, size} { 42 : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
43 VkBufferCreateInfo ci; 43 VkBufferCreateInfo ci;
44 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 44 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
45 ci.pNext = nullptr; 45 ci.pNext = nullptr;
@@ -54,46 +54,17 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
54 buffer.commit = memory_manager.Commit(buffer.handle, false); 54 buffer.commit = memory_manager.Commit(buffer.handle, false);
55} 55}
56 56
57CachedBufferBlock::~CachedBufferBlock() = default; 57Buffer::~Buffer() = default;
58 58
59VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 59void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
60 const VKDevice& device, VKMemoryManager& memory_manager,
61 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
62 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
63 CreateStreamBuffer(device,
64 scheduler)},
65 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
66 staging_pool} {}
67
68VKBufferCache::~VKBufferCache() = default;
69
70Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
71 return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
72}
73
74VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
75 return buffer->GetHandle();
76}
77
78VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
79 size = std::max(size, std::size_t(4));
80 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
81 scheduler.RequestOutsideRenderPassOperationContext();
82 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
83 cmdbuf.FillBuffer(buffer, 0, size, 0);
84 });
85 return *empty.handle;
86}
87
88void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
89 const u8* data) {
90 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 60 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
91 std::memcpy(staging.commit->Map(size), data, size); 61 std::memcpy(staging.commit->Map(size), data, size);
92 62
93 scheduler.RequestOutsideRenderPassOperationContext(); 63 scheduler.RequestOutsideRenderPassOperationContext();
94 scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, 64
95 size](vk::CommandBuffer cmdbuf) { 65 const VkBuffer handle = Handle();
96 cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); 66 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
67 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
97 68
98 VkBufferMemoryBarrier barrier; 69 VkBufferMemoryBarrier barrier;
99 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 70 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -102,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
102 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; 73 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
103 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 74 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
104 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 75 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
105 barrier.buffer = buffer; 76 barrier.buffer = handle;
106 barrier.offset = offset; 77 barrier.offset = offset;
107 barrier.size = size; 78 barrier.size = size;
108 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 79 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@@ -110,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
110 }); 81 });
111} 82}
112 83
113void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 84void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
114 u8* data) {
115 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 85 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
116 scheduler.RequestOutsideRenderPassOperationContext(); 86 scheduler.RequestOutsideRenderPassOperationContext();
117 scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, 87
118 size](vk::CommandBuffer cmdbuf) { 88 const VkBuffer handle = Handle();
89 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
119 VkBufferMemoryBarrier barrier; 90 VkBufferMemoryBarrier barrier;
120 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 91 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
121 barrier.pNext = nullptr; 92 barrier.pNext = nullptr;
@@ -123,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
123 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 94 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
124 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 95 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
125 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 96 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
126 barrier.buffer = buffer; 97 barrier.buffer = handle;
127 barrier.offset = offset; 98 barrier.offset = offset;
128 barrier.size = size; 99 barrier.size = size;
129 100
@@ -131,18 +102,20 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
131 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 102 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
132 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 103 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
133 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); 104 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
134 cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); 105 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
135 }); 106 });
136 scheduler.Finish(); 107 scheduler.Finish();
137 108
138 std::memcpy(data, staging.commit->Map(size), size); 109 std::memcpy(data, staging.commit->Map(size), size);
139} 110}
140 111
141void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, 112void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
142 std::size_t dst_offset, std::size_t size) { 113 std::size_t size) const {
143 scheduler.RequestOutsideRenderPassOperationContext(); 114 scheduler.RequestOutsideRenderPassOperationContext();
144 scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, 115
145 dst_offset, size](vk::CommandBuffer cmdbuf) { 116 const VkBuffer dst_buffer = Handle();
117 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
118 size](vk::CommandBuffer cmdbuf) {
146 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); 119 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
147 120
148 std::array<VkBufferMemoryBarrier, 2> barriers; 121 std::array<VkBufferMemoryBarrier, 2> barriers;
@@ -169,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
169 }); 142 });
170} 143}
171 144
145VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
146 const VKDevice& device, VKMemoryManager& memory_manager,
147 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
148 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
149 CreateStreamBuffer(device,
150 scheduler)},
151 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
152 staging_pool} {}
153
154VKBufferCache::~VKBufferCache() = default;
155
156std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
157 return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
158 size);
159}
160
161VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
162 size = std::max(size, std::size_t(4));
163 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
164 scheduler.RequestOutsideRenderPassOperationContext();
165 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
166 cmdbuf.FillBuffer(buffer, 0, size, 0);
167 });
168 return {*empty.handle, 0, 0};
169}
170
172} // namespace Vulkan 171} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..3630aca77 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 11#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 12#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
14#include "video_core/renderer_vulkan/vk_stream_buffer.h" 13#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@@ -24,22 +23,34 @@ class VKDevice;
24class VKMemoryManager; 23class VKMemoryManager;
25class VKScheduler; 24class VKScheduler;
26 25
27class CachedBufferBlock final : public VideoCommon::BufferBlock { 26class Buffer final : public VideoCommon::BufferBlock {
28public: 27public:
29 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 28 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
30 VAddr cpu_addr, std::size_t size); 29 VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
31 ~CachedBufferBlock(); 30 ~Buffer();
32 31
33 VkBuffer GetHandle() const { 32 void Upload(std::size_t offset, std::size_t size, const u8* data) const;
33
34 void Download(std::size_t offset, std::size_t size, u8* data) const;
35
36 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
37 std::size_t size) const;
38
39 VkBuffer Handle() const {
34 return *buffer.handle; 40 return *buffer.handle;
35 } 41 }
36 42
43 u64 Address() const {
44 return 0;
45 }
46
37private: 47private:
48 VKScheduler& scheduler;
49 VKStagingBufferPool& staging_pool;
50
38 VKBuffer buffer; 51 VKBuffer buffer;
39}; 52};
40 53
41using Buffer = std::shared_ptr<CachedBufferBlock>;
42
43class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 54class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
44public: 55public:
45 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 56 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@@ -47,21 +58,10 @@ public:
47 VKScheduler& scheduler, VKStagingBufferPool& staging_pool); 58 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
48 ~VKBufferCache(); 59 ~VKBufferCache();
49 60
50 VkBuffer GetEmptyBuffer(std::size_t size) override; 61 BufferInfo GetEmptyBuffer(std::size_t size) override;
51 62
52protected: 63protected:
53 VkBuffer ToHandle(const Buffer& buffer) override; 64 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
54
55 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
56
57 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
58 const u8* data) override;
59
60 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
61 u8* data) override;
62
63 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
64 std::size_t dst_offset, std::size_t size) override;
65 65
66private: 66private:
67 const VKDevice& device; 67 const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index b8ccf164f..ea66e621e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
27#include "video_core/renderer_vulkan/wrapper.h" 27#include "video_core/renderer_vulkan/wrapper.h"
28#include "video_core/shader/compiler_settings.h" 28#include "video_core/shader/compiler_settings.h"
29#include "video_core/shader/memory_util.h" 29#include "video_core/shader/memory_util.h"
30#include "video_core/shader_cache.h"
30 31
31namespace Vulkan { 32namespace Vulkan {
32 33
@@ -132,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
132 return std::memcmp(&rhs, this, sizeof *this) == 0; 133 return std::memcmp(&rhs, this, sizeof *this) == 0;
133} 134}
134 135
135CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, 136Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
136 GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, 137 VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
137 u32 main_offset) 138 : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
138 : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
139 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, 139 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
140 compiler_settings, registry}, 140 compiler_settings, registry},
141 entries{GenerateShaderEntries(shader_ir)} {} 141 entries{GenerateShaderEntries(shader_ir)} {}
142 142
143CachedShader::~CachedShader() = default; 143Shader::~Shader() = default;
144 144
145Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( 145Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
146 Core::System& system, Tegra::Engines::ShaderType stage) { 146 Tegra::Engines::ShaderType stage) {
147 if (stage == Tegra::Engines::ShaderType::Compute) { 147 if (stage == ShaderType::Compute) {
148 return system.GPU().KeplerCompute(); 148 return system.GPU().KeplerCompute();
149 } else { 149 } else {
150 return system.GPU().Maxwell3D(); 150 return system.GPU().Maxwell3D();
@@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
156 VKDescriptorPool& descriptor_pool, 156 VKDescriptorPool& descriptor_pool,
157 VKUpdateDescriptorQueue& update_descriptor_queue, 157 VKUpdateDescriptorQueue& update_descriptor_queue,
158 VKRenderPassCache& renderpass_cache) 158 VKRenderPassCache& renderpass_cache)
159 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, 159 : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
160 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, 160 scheduler{scheduler}, descriptor_pool{descriptor_pool},
161 renderpass_cache{renderpass_cache} {} 161 update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
162 162
163VKPipelineCache::~VKPipelineCache() = default; 163VKPipelineCache::~VKPipelineCache() = default;
164 164
165std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 165std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
166 const auto& gpu = system.GPU().Maxwell3D(); 166 const auto& gpu = system.GPU().Maxwell3D();
167 167
168 std::array<Shader, Maxwell::MaxShaderProgram> shaders; 168 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
169 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 169 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
170 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 170 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
171 171
@@ -178,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
178 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 178 const GPUVAddr program_addr{GetShaderAddress(system, program)};
179 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 179 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
180 ASSERT(cpu_addr); 180 ASSERT(cpu_addr);
181 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; 181
182 if (!shader) { 182 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
183 if (!result) {
183 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 184 const auto host_ptr{memory_manager.GetPointer(program_addr)};
184 185
185 // No shader found - create a new one 186 // No shader found - create a new one
186 constexpr u32 stage_offset = STAGE_MAIN_OFFSET; 187 constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
187 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); 188 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
188 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); 189 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
190 const std::size_t size_in_bytes = code.size() * sizeof(u64);
191
192 auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
193 stage_offset);
194 result = shader.get();
189 195
190 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
191 std::move(code), stage_offset);
192 if (cpu_addr) { 196 if (cpu_addr) {
193 Register(shader); 197 Register(std::move(shader), *cpu_addr, size_in_bytes);
194 } else { 198 } else {
195 null_shader = shader; 199 null_shader = std::move(shader);
196 } 200 }
197 } 201 }
198 shaders[index] = std::move(shader); 202 shaders[index] = result;
199 } 203 }
200 return last_shaders = shaders; 204 return last_shaders = shaders;
201} 205}
@@ -236,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
236 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 240 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
237 ASSERT(cpu_addr); 241 ASSERT(cpu_addr);
238 242
239 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; 243 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
240 if (!shader) { 244 if (!shader) {
241 // No shader found - create a new one 245 // No shader found - create a new one
242 const auto host_ptr = memory_manager.GetPointer(program_addr); 246 const auto host_ptr = memory_manager.GetPointer(program_addr);
243 247
244 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); 248 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
245 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 249 const std::size_t size_in_bytes = code.size() * sizeof(u64);
246 program_addr, *cpu_addr, std::move(code), 250
247 KERNEL_MAIN_OFFSET); 251 auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
252 std::move(code), KERNEL_MAIN_OFFSET);
253 shader = shader_info.get();
254
248 if (cpu_addr) { 255 if (cpu_addr) {
249 Register(shader); 256 Register(std::move(shader_info), *cpu_addr, size_in_bytes);
250 } else { 257 } else {
251 null_kernel = shader; 258 null_kernel = std::move(shader_info);
252 } 259 }
253 } 260 }
254 261
@@ -264,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
264 return *entry; 271 return *entry;
265} 272}
266 273
267void VKPipelineCache::Unregister(const Shader& shader) { 274void VKPipelineCache::OnShaderRemoval(Shader* shader) {
268 bool finished = false; 275 bool finished = false;
269 const auto Finish = [&] { 276 const auto Finish = [&] {
270 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and 277 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -296,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
296 Finish(); 303 Finish();
297 it = compute_cache.erase(it); 304 it = compute_cache.erase(it);
298 } 305 }
299
300 RasterizerCache::Unregister(shader);
301} 306}
302 307
303std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 308std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -332,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
332 } 337 }
333 338
334 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 339 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
335 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 340 const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
336 const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; 341 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
337 ASSERT(shader);
338 342
339 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 343 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
340 const auto program_type = GetShaderType(program_enum); 344 const ShaderType program_type = GetShaderType(program_enum);
341 const auto& entries = shader->GetEntries(); 345 const auto& entries = shader->GetEntries();
342 program[stage] = { 346 program[stage] = {
343 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), 347 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a36e5112 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "video_core/engines/const_buffer_engine_interface.h"
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/rasterizer_cache.h"
21#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
26#include "video_core/shader/memory_util.h" 25#include "video_core/shader/memory_util.h"
27#include "video_core/shader/registry.h" 26#include "video_core/shader/registry.h"
28#include "video_core/shader/shader_ir.h" 27#include "video_core/shader/shader_ir.h"
28#include "video_core/shader_cache.h"
29 29
30namespace Core { 30namespace Core {
31class System; 31class System;
@@ -41,8 +41,6 @@ class VKFence;
41class VKScheduler; 41class VKScheduler;
42class VKUpdateDescriptorQueue; 42class VKUpdateDescriptorQueue;
43 43
44class CachedShader;
45using Shader = std::shared_ptr<CachedShader>;
46using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47 45
48struct GraphicsPipelineCacheKey { 46struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
102 100
103namespace Vulkan { 101namespace Vulkan {
104 102
105class CachedShader final : public RasterizerCacheObject { 103class Shader {
106public: 104public:
107 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, 105 explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
108 VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, 106 VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
109 u32 main_offset); 107 ~Shader();
110 ~CachedShader();
111 108
112 GPUVAddr GetGpuAddr() const { 109 GPUVAddr GetGpuAddr() const {
113 return gpu_addr; 110 return gpu_addr;
114 } 111 }
115 112
116 std::size_t GetSizeInBytes() const override {
117 return program_code.size() * sizeof(u64);
118 }
119
120 VideoCommon::Shader::ShaderIR& GetIR() { 113 VideoCommon::Shader::ShaderIR& GetIR() {
121 return shader_ir; 114 return shader_ir;
122 } 115 }
@@ -144,25 +137,23 @@ private:
144 ShaderEntries entries; 137 ShaderEntries entries;
145}; 138};
146 139
147class VKPipelineCache final : public RasterizerCache<Shader> { 140class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
148public: 141public:
149 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 142 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
150 const VKDevice& device, VKScheduler& scheduler, 143 const VKDevice& device, VKScheduler& scheduler,
151 VKDescriptorPool& descriptor_pool, 144 VKDescriptorPool& descriptor_pool,
152 VKUpdateDescriptorQueue& update_descriptor_queue, 145 VKUpdateDescriptorQueue& update_descriptor_queue,
153 VKRenderPassCache& renderpass_cache); 146 VKRenderPassCache& renderpass_cache);
154 ~VKPipelineCache(); 147 ~VKPipelineCache() override;
155 148
156 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); 149 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
157 150
158 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); 151 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
159 152
160 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 153 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
161 154
162protected: 155protected:
163 void Unregister(const Shader& shader) override; 156 void OnShaderRemoval(Shader* shader) final;
164
165 void FlushObjectInner(const Shader& object) override {}
166 157
167private: 158private:
168 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 159 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
175 VKUpdateDescriptorQueue& update_descriptor_queue; 166 VKUpdateDescriptorQueue& update_descriptor_queue;
176 VKRenderPassCache& renderpass_cache; 167 VKRenderPassCache& renderpass_cache;
177 168
178 Shader null_shader{}; 169 std::unique_ptr<Shader> null_shader;
179 Shader null_kernel{}; 170 std::unique_ptr<Shader> null_kernel;
180 171
181 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 172 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
182 173
183 GraphicsPipelineCacheKey last_graphics_key; 174 GraphicsPipelineCacheKey last_graphics_key;
184 VKGraphicsPipeline* last_graphics_pipeline = nullptr; 175 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index d86c46412..a8d94eac3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
38#include "video_core/renderer_vulkan/vk_texture_cache.h" 38#include "video_core/renderer_vulkan/vk_texture_cache.h"
39#include "video_core/renderer_vulkan/vk_update_descriptor.h" 39#include "video_core/renderer_vulkan/vk_update_descriptor.h"
40#include "video_core/renderer_vulkan/wrapper.h" 40#include "video_core/renderer_vulkan/wrapper.h"
41#include "video_core/shader_cache.h"
41 42
42namespace Vulkan { 43namespace Vulkan {
43 44
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
98} 99}
99 100
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 101std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { 102 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; 103 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (std::size_t i = 0; i < std::size(addresses); ++i) { 104 for (std::size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; 105 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
117Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 118Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
118 std::size_t stage, std::size_t index = 0) { 119 std::size_t stage, std::size_t index = 0) {
119 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 120 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
121 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
122 if (entry.is_separated) {
123 const u32 buffer_1 = entry.buffer;
124 const u32 buffer_2 = entry.secondary_buffer;
125 const u32 offset_1 = entry.offset;
126 const u32 offset_2 = entry.secondary_offset;
127 const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
128 const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
129 return engine.GetTextureInfo(handle_1 | handle_2);
130 }
131 }
120 if (entry.is_bindless) { 132 if (entry.is_bindless) {
121 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); 133 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
122 return engine.GetTextureInfo(tex_handle); 134 return engine.GetTextureInfo(tex_handle);
@@ -131,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
131 } 143 }
132} 144}
133 145
146/// @brief Determine if an attachment to be updated has to preserve contents
147/// @param is_clear True when a clear is being executed
148/// @param regs 3D registers
149/// @return True when the contents have to be preserved
150bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
151 if (!is_clear) {
152 return true;
153 }
154 // First we have to make sure all clear masks are enabled.
155 if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
156 !regs.clear_buffers.A) {
157 return true;
158 }
159 // If scissors are disabled, the whole screen is cleared
160 if (!regs.clear_flags.scissor) {
161 return false;
162 }
163 // Then we have to confirm scissor testing clears the whole image
164 const std::size_t index = regs.clear_buffers.RT;
165 const auto& scissor = regs.scissor_test[0];
166 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
167 scissor.max_y < regs.rt[index].height;
168}
169
170/// @brief Determine if an attachment to be updated has to preserve contents
171/// @param is_clear True when a clear is being executed
172/// @param regs 3D registers
173/// @return True when the contents have to be preserved
174bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
175 // If we are not clearing, the contents have to be preserved
176 if (!is_clear) {
177 return true;
178 }
179 // For depth stencil clears we only have to confirm scissor test covers the whole image
180 if (!regs.clear_flags.scissor) {
181 return false;
182 }
183 // Make sure the clear cover the whole image
184 const auto& scissor = regs.scissor_test[0];
185 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
186 scissor.max_y < regs.zeta_height;
187}
188
134} // Anonymous namespace 189} // Anonymous namespace
135 190
136class BufferBindings final { 191class BufferBindings final {
@@ -332,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
332 387
333 buffer_cache.Unmap(); 388 buffer_cache.Unmap();
334 389
335 const Texceptions texceptions = UpdateAttachments(); 390 const Texceptions texceptions = UpdateAttachments(false);
336 SetupImageTransitions(texceptions, color_attachments, zeta_attachment); 391 SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
337 392
338 key.renderpass_params = GetRenderPassParams(texceptions); 393 key.renderpass_params = GetRenderPassParams(texceptions);
@@ -388,7 +443,7 @@ void RasterizerVulkan::Clear() {
388 return; 443 return;
389 } 444 }
390 445
391 [[maybe_unused]] const auto texceptions = UpdateAttachments(); 446 [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
392 DEBUG_ASSERT(texceptions.none()); 447 DEBUG_ASSERT(texceptions.none());
393 SetupImageTransitions(0, color_attachments, zeta_attachment); 448 SetupImageTransitions(0, color_attachments, zeta_attachment);
394 449
@@ -665,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
665 draw_counter = 0; 720 draw_counter = 0;
666} 721}
667 722
668RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { 723RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
669 MICROPROFILE_SCOPE(Vulkan_RenderTargets); 724 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
670 auto& dirty = system.GPU().Maxwell3D().dirty.flags; 725 auto& maxwell3d = system.GPU().Maxwell3D();
726 auto& dirty = maxwell3d.dirty.flags;
727 auto& regs = maxwell3d.regs;
728
671 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; 729 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
672 dirty[VideoCommon::Dirty::RenderTargets] = false; 730 dirty[VideoCommon::Dirty::RenderTargets] = false;
673 731
@@ -676,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
676 Texceptions texceptions; 734 Texceptions texceptions;
677 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { 735 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
678 if (update_rendertargets) { 736 if (update_rendertargets) {
679 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); 737 const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
738 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
680 } 739 }
681 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { 740 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
682 texceptions[rt] = true; 741 texceptions[rt] = true;
@@ -684,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
684 } 743 }
685 744
686 if (update_rendertargets) { 745 if (update_rendertargets) {
687 zeta_attachment = texture_cache.GetDepthBufferSurface(true); 746 const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
747 zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
688 } 748 }
689 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { 749 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
690 texceptions[ZETA_TEXCEPTION_INDEX] = true; 750 texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -716,7 +776,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
716 if (!view) { 776 if (!view) {
717 return false; 777 return false;
718 } 778 }
719 key.views.push_back(view->GetHandle()); 779 key.views.push_back(view->GetAttachment());
720 key.width = std::min(key.width, view->GetWidth()); 780 key.width = std::min(key.width, view->GetWidth());
721 key.height = std::min(key.height, view->GetHeight()); 781 key.height = std::min(key.height, view->GetHeight());
722 key.layers = std::min(key.layers, view->GetNumLayers()); 782 key.layers = std::min(key.layers, view->GetNumLayers());
@@ -776,12 +836,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
776} 836}
777 837
778void RasterizerVulkan::SetupShaderDescriptors( 838void RasterizerVulkan::SetupShaderDescriptors(
779 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { 839 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
780 texture_cache.GuardSamplers(true); 840 texture_cache.GuardSamplers(true);
781 841
782 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 842 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
783 // Skip VertexA stage 843 // Skip VertexA stage
784 const auto& shader = shaders[stage + 1]; 844 Shader* const shader = shaders[stage + 1];
785 if (!shader) { 845 if (!shader) {
786 continue; 846 continue;
787 } 847 }
@@ -858,10 +918,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
858 UNIMPLEMENTED_IF(binding.buffer_offset != 0); 918 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
859 919
860 const GPUVAddr gpu_addr = binding.Address(); 920 const GPUVAddr gpu_addr = binding.Address();
861 const std::size_t size = binding.buffer_size; 921 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
862 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 922 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
863 923
864 scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { 924 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
865 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); 925 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
866 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); 926 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
867 }); 927 });
@@ -913,8 +973,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
913 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); 973 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
914 continue; 974 continue;
915 } 975 }
916 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); 976 const auto info = buffer_cache.UploadMemory(start, size);
917 buffer_bindings.AddVertexBinding(buffer, offset); 977 buffer_bindings.AddVertexBinding(info.handle, info.offset);
918 } 978 }
919} 979}
920 980
@@ -936,7 +996,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
936 break; 996 break;
937 } 997 }
938 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 998 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
939 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 999 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1000 VkBuffer buffer = info.handle;
1001 u64 offset = info.offset;
940 std::tie(buffer, offset) = quad_indexed_pass.Assemble( 1002 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
941 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); 1003 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
942 1004
@@ -950,7 +1012,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
950 break; 1012 break;
951 } 1013 }
952 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 1014 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
953 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 1015 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1016 VkBuffer buffer = info.handle;
1017 u64 offset = info.offset;
954 1018
955 auto format = regs.index_array.format; 1019 auto format = regs.index_array.format;
956 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; 1020 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -1097,10 +1161,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1097 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); 1161 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1098 ASSERT(size <= MaxConstbufferSize); 1162 ASSERT(size <= MaxConstbufferSize);
1099 1163
1100 const auto [buffer_handle, offset] = 1164 const auto info =
1101 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1165 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
1102 1166 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1103 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
1104} 1167}
1105 1168
1106void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { 1169void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1114,14 +1177,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1114 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1177 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1115 // default buffer. 1178 // default buffer.
1116 static constexpr std::size_t dummy_size = 4; 1179 static constexpr std::size_t dummy_size = 4;
1117 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); 1180 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1118 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); 1181 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1119 return; 1182 return;
1120 } 1183 }
1121 1184
1122 const auto [buffer, offset] = buffer_cache.UploadMemory( 1185 const auto info = buffer_cache.UploadMemory(
1123 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); 1186 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1124 update_descriptor_queue.AddBuffer(buffer, offset, size); 1187 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1125} 1188}
1126 1189
1127void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, 1190void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
@@ -1137,12 +1200,12 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
1137 auto view = texture_cache.GetTextureSurface(texture.tic, entry); 1200 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1138 ASSERT(!view->IsBufferView()); 1201 ASSERT(!view->IsBufferView());
1139 1202
1140 const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, 1203 const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
1141 texture.tic.z_source, texture.tic.w_source); 1204 texture.tic.z_source, texture.tic.w_source);
1142 const auto sampler = sampler_cache.GetSampler(texture.tsc); 1205 const auto sampler = sampler_cache.GetSampler(texture.tsc);
1143 update_descriptor_queue.AddSampledImage(sampler, image_view); 1206 update_descriptor_queue.AddSampledImage(sampler, image_view);
1144 1207
1145 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1208 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1146 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 1209 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1147 sampled_views.push_back(ImageView{std::move(view), image_layout}); 1210 sampled_views.push_back(ImageView{std::move(view), image_layout});
1148} 1211}
@@ -1164,10 +1227,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
1164 1227
1165 UNIMPLEMENTED_IF(tic.IsBuffer()); 1228 UNIMPLEMENTED_IF(tic.IsBuffer());
1166 1229
1167 const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); 1230 const VkImageView image_view =
1231 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1168 update_descriptor_queue.AddImage(image_view); 1232 update_descriptor_queue.AddImage(image_view);
1169 1233
1170 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1234 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1171 *image_layout = VK_IMAGE_LAYOUT_GENERAL; 1235 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1172 image_views.push_back(ImageView{std::move(view), image_layout}); 1236 image_views.push_back(ImageView{std::move(view), image_layout});
1173} 1237}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 04be37a5e..83e00e7e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
159 159
160 void FlushWork(); 160 void FlushWork();
161 161
162 Texceptions UpdateAttachments(); 162 /// @brief Updates the currently bound attachments
163 /// @param is_clear True when the framebuffer is updated as a clear
164 /// @return Bitfield of attachments being used as sampled textures
165 Texceptions UpdateAttachments(bool is_clear);
163 166
164 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); 167 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
165 168
@@ -168,7 +171,7 @@ private:
168 bool is_indexed, bool is_instanced); 171 bool is_indexed, bool is_instanced);
169 172
170 /// Setup descriptors in the graphics pipeline. 173 /// Setup descriptors in the graphics pipeline.
171 void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); 174 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
172 175
173 void SetupImageTransitions(Texceptions texceptions, 176 void SetupImageTransitions(Texceptions texceptions,
174 const std::array<View, Maxwell::NumRenderTargets>& color_attachments, 177 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index e6f2fa553..616eacc36 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,6 +9,8 @@
9#include "video_core/renderer_vulkan/wrapper.h" 9#include "video_core/renderer_vulkan/wrapper.h"
10#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
11 11
12using Tegra::Texture::TextureMipmapFilter;
13
12namespace Vulkan { 14namespace Vulkan {
13 15
14namespace { 16namespace {
@@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
63 ci.maxAnisotropy = tsc.GetMaxAnisotropy(); 65 ci.maxAnisotropy = tsc.GetMaxAnisotropy();
64 ci.compareEnable = tsc.depth_compare_enabled; 66 ci.compareEnable = tsc.depth_compare_enabled;
65 ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); 67 ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
66 ci.minLod = tsc.GetMinLod(); 68 ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
67 ci.maxLod = tsc.GetMaxLod(); 69 ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
68 ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); 70 ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
69 ci.unnormalizedCoordinates = VK_FALSE; 71 ci.unnormalizedCoordinates = VK_FALSE;
70 return device.GetLogical().CreateSampler(ci); 72 return device.GetLogical().CreateSampler(ci);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e..56524e6f3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,6 +9,7 @@
9#include <utility> 9#include <utility>
10 10
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/thread.h"
12#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_query_cache.h" 14#include "video_core/renderer_vulkan/vk_query_cache.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
133} 134}
134 135
135void VKScheduler::WorkerThread() { 136void VKScheduler::WorkerThread() {
137 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
136 std::unique_lock lock{mutex}; 138 std::unique_lock lock{mutex};
137 do { 139 do {
138 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); 140 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index dfddf7ad6..689f0d276 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,10 +35,14 @@ public:
35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
36 void Unmap(u64 size); 36 void Unmap(u64 size);
37 37
38 VkBuffer GetHandle() const { 38 VkBuffer Handle() const noexcept {
39 return *buffer; 39 return *buffer;
40 } 40 }
41 41
42 u64 Address() const noexcept {
43 return 0;
44 }
45
42private: 46private:
43 struct Watch final { 47 struct Watch final {
44 VKFenceWatch fence; 48 VKFenceWatch fence;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index ea487b770..430031665 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
167 ci.extent = {params.width, params.height, 1}; 167 ci.extent = {params.width, params.height, 1};
168 break; 168 break;
169 case SurfaceTarget::Texture3D: 169 case SurfaceTarget::Texture3D:
170 ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
170 ci.extent = {params.width, params.height, params.depth}; 171 ci.extent = {params.width, params.height, params.depth};
171 break; 172 break;
172 case SurfaceTarget::TextureBuffer: 173 case SurfaceTarget::TextureBuffer:
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
176 return ci; 177 return ci;
177} 178}
178 179
180u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
181 Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
182 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
183 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
184}
185
179} // Anonymous namespace 186} // Anonymous namespace
180 187
181CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, 188CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
203 } 210 }
204 211
205 // TODO(Rodrigo): Move this to a virtual function. 212 // TODO(Rodrigo): Move this to a virtual function.
206 main_view = CreateViewInner( 213 u32 num_layers = 1;
207 ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), 214 if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
208 true); 215 num_layers = params.depth;
216 }
217 main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
209} 218}
210 219
211CachedSurface::~CachedSurface() = default; 220CachedSurface::~CachedSurface() = default;
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
253} 262}
254 263
255View CachedSurface::CreateView(const ViewParams& params) { 264View CachedSurface::CreateView(const ViewParams& params) {
256 return CreateViewInner(params, false);
257}
258
259View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
260 // TODO(Rodrigo): Add name decorations 265 // TODO(Rodrigo): Add name decorations
261 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); 266 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
262} 267}
263 268
264void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { 269void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
342} 347}
343 348
344CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, 349CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
345 const ViewParams& params, bool is_proxy) 350 const ViewParams& params)
346 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, 351 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
347 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, 352 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
348 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, 353 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
349 base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, 354 base_level{params.base_level}, num_levels{params.num_levels},
350 num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) 355 image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
351 : VK_IMAGE_VIEW_TYPE_1D} {} 356 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
357 base_layer = 0;
358 num_layers = 1;
359 base_slice = params.base_layer;
360 num_slices = params.num_layers;
361 } else {
362 base_layer = params.base_layer;
363 num_layers = params.num_layers;
364 }
365}
352 366
353CachedSurfaceView::~CachedSurfaceView() = default; 367CachedSurfaceView::~CachedSurfaceView() = default;
354 368
355VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, 369VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
356 SwizzleSource z_source, SwizzleSource w_source) { 370 SwizzleSource z_source, SwizzleSource w_source) {
357 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 371 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
358 if (last_image_view && last_swizzle == new_swizzle) { 372 if (last_image_view && last_swizzle == new_swizzle) {
359 return last_image_view; 373 return last_image_view;
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
399 }); 413 });
400 } 414 }
401 415
416 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
417 ASSERT(base_slice == 0);
418 ASSERT(num_slices == params.depth);
419 }
420
402 VkImageViewCreateInfo ci; 421 VkImageViewCreateInfo ci;
403 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 422 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
404 ci.pNext = nullptr; 423 ci.pNext = nullptr;
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
417 return last_image_view = *image_view; 436 return last_image_view = *image_view;
418} 437}
419 438
439VkImageView CachedSurfaceView::GetAttachment() {
440 if (render_target) {
441 return *render_target;
442 }
443
444 VkImageViewCreateInfo ci;
445 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
446 ci.pNext = nullptr;
447 ci.flags = 0;
448 ci.image = surface.GetImageHandle();
449 ci.format = surface.GetImage().GetFormat();
450 ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
451 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
452 ci.subresourceRange.aspectMask = aspect_mask;
453 ci.subresourceRange.baseMipLevel = base_level;
454 ci.subresourceRange.levelCount = num_levels;
455 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
456 ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
457 ci.subresourceRange.baseArrayLayer = base_slice;
458 ci.subresourceRange.layerCount = num_slices;
459 } else {
460 ci.viewType = image_view_type;
461 ci.subresourceRange.baseArrayLayer = base_layer;
462 ci.subresourceRange.layerCount = num_layers;
463 }
464 render_target = device.GetLogical().CreateImageView(ci);
465 return *render_target;
466}
467
420VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 468VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
421 const VKDevice& device, VKResourceManager& resource_manager, 469 const VKDevice& device, VKResourceManager& resource_manager,
422 VKMemoryManager& memory_manager, VKScheduler& scheduler, 470 VKMemoryManager& memory_manager, VKScheduler& scheduler,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f211ccb1e..807e26c8a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -91,7 +91,6 @@ protected:
91 void DecorateSurfaceName(); 91 void DecorateSurfaceName();
92 92
93 View CreateView(const ViewParams& params) override; 93 View CreateView(const ViewParams& params) override;
94 View CreateViewInner(const ViewParams& params, bool is_proxy);
95 94
96private: 95private:
97 void UploadBuffer(const std::vector<u8>& staging_buffer); 96 void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -120,23 +119,20 @@ private:
120class CachedSurfaceView final : public VideoCommon::ViewBase { 119class CachedSurfaceView final : public VideoCommon::ViewBase {
121public: 120public:
122 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, 121 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
123 const ViewParams& params, bool is_proxy); 122 const ViewParams& params);
124 ~CachedSurfaceView(); 123 ~CachedSurfaceView();
125 124
126 VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, 125 VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
127 Tegra::Texture::SwizzleSource y_source, 126 Tegra::Texture::SwizzleSource y_source,
128 Tegra::Texture::SwizzleSource z_source, 127 Tegra::Texture::SwizzleSource z_source,
129 Tegra::Texture::SwizzleSource w_source); 128 Tegra::Texture::SwizzleSource w_source);
129
130 VkImageView GetAttachment();
130 131
131 bool IsSameSurface(const CachedSurfaceView& rhs) const { 132 bool IsSameSurface(const CachedSurfaceView& rhs) const {
132 return &surface == &rhs.surface; 133 return &surface == &rhs.surface;
133 } 134 }
134 135
135 VkImageView GetHandle() {
136 return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
137 Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
138 }
139
140 u32 GetWidth() const { 136 u32 GetWidth() const {
141 return params.GetMipWidth(base_level); 137 return params.GetMipWidth(base_level);
142 } 138 }
@@ -180,14 +176,6 @@ public:
180 } 176 }
181 177
182private: 178private:
183 static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
184 Tegra::Texture::SwizzleSource y_source,
185 Tegra::Texture::SwizzleSource z_source,
186 Tegra::Texture::SwizzleSource w_source) {
187 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
188 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
189 }
190
191 // Store a copy of these values to avoid double dereference when reading them 179 // Store a copy of these values to avoid double dereference when reading them
192 const SurfaceParams params; 180 const SurfaceParams params;
193 const VkImage image; 181 const VkImage image;
@@ -196,15 +184,18 @@ private:
196 184
197 const VKDevice& device; 185 const VKDevice& device;
198 CachedSurface& surface; 186 CachedSurface& surface;
199 const u32 base_layer;
200 const u32 num_layers;
201 const u32 base_level; 187 const u32 base_level;
202 const u32 num_levels; 188 const u32 num_levels;
203 const VkImageViewType image_view_type; 189 const VkImageViewType image_view_type;
190 u32 base_layer = 0;
191 u32 num_layers = 0;
192 u32 base_slice = 0;
193 u32 num_slices = 0;
204 194
205 VkImageView last_image_view = nullptr; 195 VkImageView last_image_view = nullptr;
206 u32 last_swizzle = 0; 196 u32 last_swizzle = 0;
207 197
198 vk::ImageView render_target;
208 std::unordered_map<u32, vk::ImageView> view_cache; 199 std::unordered_map<u32, vk::ImageView> view_cache;
209}; 200};
210 201
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
24} 24}
25 25
26void VKUpdateDescriptorQueue::Acquire() { 26void VKUpdateDescriptorQueue::Acquire() {
27 entries.clear(); 27 // Minimum number of entries required.
28} 28 // This is the maximum number of entries a single draw call migth use.
29 static constexpr std::size_t MIN_ENTRIES = 0x400;
29 30
30void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, 31 if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
31 VkDescriptorSet set) {
32 if (payload.size() + entries.size() >= payload.max_size()) {
33 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); 32 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
34 scheduler.WaitWorker(); 33 scheduler.WaitWorker();
35 payload.clear(); 34 payload.clear();
36 } 35 }
36 upload_start = &*payload.end();
37}
37 38
38 // TODO(Rodrigo): Rework to write the payload directly 39void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
39 const auto payload_start = payload.data() + payload.size(); 40 VkDescriptorSet set) {
40 for (const auto& entry : entries) { 41 const void* const data = upload_start;
41 if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { 42 const vk::Device* const logical = &device.GetLogical();
42 payload.push_back(*image); 43 scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
43 } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { 44 logical->UpdateDescriptorSet(set, update_template, data);
44 payload.push_back(*buffer); 45 });
45 } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
46 payload.push_back(*texel);
47 } else {
48 UNREACHABLE();
49 }
50 }
51
52 scheduler.Record(
53 [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
54 logical->UpdateDescriptorSet(set, update_template, payload_start);
55 });
56} 46}
57 47
58} // namespace Vulkan 48} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
15class VKDevice; 15class VKDevice;
16class VKScheduler; 16class VKScheduler;
17 17
18class DescriptorUpdateEntry { 18struct DescriptorUpdateEntry {
19public: 19 DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
20 explicit DescriptorUpdateEntry() {}
21
22 DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
23 20
24 DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} 21 DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
25 22
26 DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} 23 DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
27 24
28private:
29 union { 25 union {
30 VkDescriptorImageInfo image; 26 VkDescriptorImageInfo image;
31 VkDescriptorBufferInfo buffer; 27 VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
45 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
46 42
47 void AddSampledImage(VkSampler sampler, VkImageView image_view) { 43 void AddSampledImage(VkSampler sampler, VkImageView image_view) {
48 entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); 44 payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
49 } 45 }
50 46
51 void AddImage(VkImageView image_view) { 47 void AddImage(VkImageView image_view) {
52 entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); 48 payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
53 } 49 }
54 50
55 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { 51 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
56 entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); 52 payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
57 } 53 }
58 54
59 void AddTexelBuffer(VkBufferView texel_buffer) { 55 void AddTexelBuffer(VkBufferView texel_buffer) {
60 entries.emplace_back(texel_buffer); 56 payload.emplace_back(texel_buffer);
61 } 57 }
62 58
63 VkImageLayout* GetLastImageLayout() { 59 VkImageLayout* LastImageLayout() {
64 return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; 60 return &payload.back().image.imageLayout;
65 } 61 }
66 62
67private: 63 const VkImageLayout* LastImageLayout() const {
68 using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; 64 return &payload.back().image.imageLayout;
65 }
69 66
67private:
70 const VKDevice& device; 68 const VKDevice& device;
71 VKScheduler& scheduler; 69 VKScheduler& scheduler;
72 70
73 boost::container::static_vector<Variant, 0x400> entries; 71 const DescriptorUpdateEntry* upload_start = nullptr;
74 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; 72 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
75}; 73};
76 74
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..0d485a662 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
153 153
154bool Load(InstanceDispatch& dld) noexcept { 154bool Load(InstanceDispatch& dld) noexcept {
155#define X(name) Proc(dld.name, dld, #name) 155#define X(name) Proc(dld.name, dld, #name)
156 return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); 156 return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
157 X(vkEnumerateInstanceLayerProperties);
157#undef X 158#undef X
158} 159}
159 160
@@ -725,8 +726,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
725 return supported == VK_TRUE; 726 return supported == VK_TRUE;
726} 727}
727 728
728VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const 729VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
729 noexcept {
730 VkSurfaceCapabilitiesKHR capabilities; 730 VkSurfaceCapabilitiesKHR capabilities;
731 Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); 731 Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
732 return capabilities; 732 return capabilities;
@@ -771,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
771 return properties; 771 return properties;
772} 772}
773 773
774std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
775 const InstanceDispatch& dld) {
776 u32 num;
777 if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
778 return std::nullopt;
779 }
780 std::vector<VkLayerProperties> properties(num);
781 if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
782 return std::nullopt;
783 }
784 return properties;
785}
786
774} // namespace Vulkan::vk 787} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..d56fdb3f9 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -141,6 +141,7 @@ struct InstanceDispatch {
141 PFN_vkCreateInstance vkCreateInstance; 141 PFN_vkCreateInstance vkCreateInstance;
142 PFN_vkDestroyInstance vkDestroyInstance; 142 PFN_vkDestroyInstance vkDestroyInstance;
143 PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; 143 PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
144 PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
144 145
145 PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; 146 PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
146 PFN_vkCreateDevice vkCreateDevice; 147 PFN_vkCreateDevice vkCreateDevice;
@@ -779,7 +780,7 @@ public:
779 780
780 bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; 781 bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
781 782
782 VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; 783 VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
783 784
784 std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; 785 std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
785 786
@@ -996,4 +997,7 @@ private:
996std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( 997std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
997 const InstanceDispatch& dld); 998 const InstanceDispatch& dld);
998 999
1000std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
1001 const InstanceDispatch& dld);
1002
999} // namespace Vulkan::vk 1003} // namespace Vulkan::vk
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
13 13
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16using std::move;
16using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
18 20
19u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
22 24
23 if (instr.hset2.ftz == 0) { 25 PredCondition cond;
24 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); 26 bool bf;
27 bool ftz;
28 bool neg_a;
29 bool abs_a;
30 bool neg_b;
31 bool abs_b;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
25 } 54 }
26 55
27 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); 56 Node op_b = [this, instr, opcode] {
28 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
29
30 Node op_b = [&]() {
31 switch (opcode->get().GetId()) { 57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
32 case OpCode::Id::HSET2_R: 62 case OpCode::Id::HSET2_R:
33 return GetRegister(instr.gpr20); 63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
34 default: 66 default:
35 UNREACHABLE(); 67 UNREACHABLE();
36 return Immediate(0); 68 return Node{};
37 } 69 }
38 }(); 70 }();
39 op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
40 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
41 71
42 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); 72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
43 89
44 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); 90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
45 93
46 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); 94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
47 95
48 // HSET2 operates on each half float in the pack. 96 // HSET2 operates on each half float in the pack.
49 std::array<Node, 2> values; 97 std::array<Node, 2> values;
50 for (u32 i = 0; i < 2; ++i) { 98 for (u32 i = 0; i < 2; ++i) {
51 const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; 99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
52 const Node true_value = Immediate(raw_value << (i * 16)); 100 Node true_value = Immediate(raw_value << (i * 16));
53 const Node false_value = Immediate(0); 101 Node false_value = Immediate(0);
54
55 const Node comparison =
56 Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
57 const Node predicate = Operation(combiner, comparison, second_pred);
58 102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
59 values[i] = 105 values[i] =
60 Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); 106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
61 } 107 }
62 108
63 const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); 109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
64 SetRegister(bb, instr.gpr0, value); 110 SetRegister(bb, instr.gpr0, move(value));
65 111
66 return pc; 112 return pc;
67} 113}
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 60b6ad72a..07778dc3e 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
97 break; 97 break;
98 case TextureFormat::B5G6R5: 98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5: 99 case TextureFormat::B6G5R5:
100 case TextureFormat::BF10GF11RF11:
100 if (component == 0) { 101 if (component == 0) {
101 return descriptor.b_type; 102 return descriptor.b_type;
102 } 103 }
@@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
119 } 120 }
120 break; 121 break;
121 } 122 }
122 UNIMPLEMENTED_MSG("texture format not implement={}", format); 123 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
123 return ComponentType::FLOAT; 124 return ComponentType::FLOAT;
124} 125}
125 126
@@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
191 return 6; 192 return 6;
192 } 193 }
193 return 0; 194 return 0;
195 case TextureFormat::BF10GF11RF11:
196 if (component == 1 || component == 2) {
197 return 11;
198 }
199 if (component == 0) {
200 return 10;
201 }
202 return 0;
194 case TextureFormat::G8R24: 203 case TextureFormat::G8R24:
195 if (component == 0) { 204 if (component == 0) {
196 return 8; 205 return 8;
@@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
211 return (component == 0 || component == 1) ? 8 : 0; 220 return (component == 0 || component == 1) ? 8 : 0;
212 case TextureFormat::G4R4: 221 case TextureFormat::G4R4:
213 return (component == 0 || component == 1) ? 4 : 0; 222 return (component == 0 || component == 1) ? 4 : 0;
214 default:
215 UNIMPLEMENTED_MSG("texture format not implement={}", format);
216 return 0;
217 } 223 }
224 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
225 return 0;
218} 226}
219 227
220std::size_t GetImageComponentMask(TextureFormat format) { 228std::size_t GetImageComponentMask(TextureFormat format) {
@@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
235 case TextureFormat::R32_B24G8: 243 case TextureFormat::R32_B24G8:
236 case TextureFormat::B5G6R5: 244 case TextureFormat::B5G6R5:
237 case TextureFormat::B6G5R5: 245 case TextureFormat::B6G5R5:
246 case TextureFormat::BF10GF11RF11:
238 return std::size_t{R | G | B}; 247 return std::size_t{R | G | B};
239 case TextureFormat::R32_G32: 248 case TextureFormat::R32_G32:
240 case TextureFormat::R16_G16: 249 case TextureFormat::R16_G16:
@@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) {
248 case TextureFormat::R8: 257 case TextureFormat::R8:
249 case TextureFormat::R1: 258 case TextureFormat::R1:
250 return std::size_t{R}; 259 return std::size_t{R};
251 default:
252 UNIMPLEMENTED_MSG("texture format not implement={}", format);
253 return std::size_t{R | G | B | A};
254 } 260 }
261 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
262 return std::size_t{R | G | B | A};
255} 263}
256 264
257std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { 265std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
299 return {std::move(original_value), true}; 307 return {std::move(original_value), true};
300 } 308 }
301 default: 309 default:
302 UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); 310 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
303 return {std::move(original_value), true}; 311 return {std::move(original_value), true};
304 } 312 }
305} 313}
@@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
459 default: 467 default:
460 break; 468 break;
461 } 469 }
462 UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", 470 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
463 static_cast<u64>(instr.suatom_d.operation.Value()), 471 static_cast<u64>(instr.suatom_d.operation.Value()),
464 static_cast<u64>(instr.suatom_d.operation_type.Value())); 472 static_cast<u64>(instr.suatom_d.operation_type.Value()));
465 return OperationCode::AtomicImageAdd; 473 return OperationCode::AtomicImageAdd;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..29ebf65ba 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
357 return pc; 357 return pc;
358} 358}
359 359
360ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, 360ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
361 std::optional<u32> buffer) { 361 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
362 if (info.IsComplete()) { 362 if (info.IsComplete()) {
363 return info; 363 return info;
364 } 364 }
365 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
366 : registry.ObtainBoundSampler(offset);
367 if (!sampler) { 365 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); 367 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
381 379
382std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, 380std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) { 381 SamplerInfo sampler_info) {
384 const auto offset = static_cast<u32>(sampler.index.Value()); 382 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, offset); 383 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386 384
387 // If this sampler has already been used, return the existing mapping. 385 // If this sampler has already been used, return the existing mapping.
388 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 386 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 const Node sampler_register = GetRegister(reg); 402 const Node sampler_register = GetRegister(reg);
405 const auto [base_node, tracked_sampler_info] = 403 const auto [base_node, tracked_sampler_info] =
406 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); 404 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
407 ASSERT(base_node != nullptr); 405 if (!base_node) {
408 if (base_node == nullptr) { 406 UNREACHABLE();
409 return std::nullopt; 407 return std::nullopt;
410 } 408 }
411 409
412 if (const auto bindless_sampler_info = 410 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
413 std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { 411 const u32 buffer = sampler_info->index;
414 const u32 buffer = bindless_sampler_info->GetIndex(); 412 const u32 offset = sampler_info->offset;
415 const u32 offset = bindless_sampler_info->GetOffset(); 413 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
416 info = GetSamplerInfo(info, offset, buffer);
417 414
418 // If this sampler has already been used, return the existing mapping. 415 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 416 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer = buffer, offset = offset](const Sampler& entry) { 417 [buffer, offset](const Sampler& entry) {
421 return entry.buffer == buffer && entry.offset == offset; 418 return entry.buffer == buffer && entry.offset == offset;
422 }); 419 });
423 if (it != used_samplers.end()) { 420 if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, 428 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false); 429 *info.is_shadow, *info.is_buffer, false);
433 } 430 }
434 if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { 431 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; 432 const std::pair indices = sampler_info->indices;
436 index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); 433 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, base_offset); 434 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
435
436 // Try to use an already created sampler if it exists
437 const auto it = std::find_if(
438 used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
439 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
440 indices == std::pair{entry.buffer, entry.secondary_buffer};
441 });
442 if (it != used_samplers.end()) {
443 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
444 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
445 return *it;
446 }
447
448 // Otherwise create a new mapping for this sampler
449 const u32 next_index = static_cast<u32>(used_samplers.size());
450 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
451 *info.is_shadow, *info.is_buffer);
452 }
453 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
454 const u32 base_offset = sampler_info->base_offset / 4;
455 index_var = GetCustomVariable(sampler_info->bindless_var);
456 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
438 457
439 // If this sampler has already been used, return the existing mapping. 458 // If this sampler has already been used, return the existing mapping.
440 const auto it = std::find_if( 459 const auto it = std::find_if(
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 074f21691..5071c83ca 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add
66 66
67u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, 67u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
68 const ProgramCode& code_b) { 68 const ProgramCode& code_b) {
69 u64 unique_identifier = boost::hash_value(code); 69 size_t unique_identifier = boost::hash_value(code);
70 if (is_a) { 70 if (is_a) {
71 // VertexA programs include two programs 71 // VertexA programs include two programs
72 boost::hash_combine(unique_identifier, boost::hash_value(code_b)); 72 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
73 } 73 }
74 return unique_identifier; 74 return static_cast<u64>(unique_identifier);
75} 75}
76 76
77} // namespace VideoCommon::Shader 77} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c5e5165ff..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
275using Node4 = std::array<Node, 4>; 275using Node4 = std::array<Node, 4>;
276using NodeBlock = std::vector<Node>; 276using NodeBlock = std::vector<Node>;
277 277
278class BindlessSamplerNode; 278struct ArraySamplerNode;
279class ArraySamplerNode; 279struct BindlessSamplerNode;
280struct SeparateSamplerNode;
280 281
281using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; 282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
282using TrackSampler = std::shared_ptr<TrackSamplerData>; 283using TrackSampler = std::shared_ptr<TrackSamplerData>;
283 284
284struct Sampler { 285struct Sampler {
@@ -288,63 +289,51 @@ struct Sampler {
288 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, 289 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
289 is_buffer{is_buffer}, is_indexed{is_indexed} {} 290 is_buffer{is_buffer}, is_indexed{is_indexed} {}
290 291
292 /// Separate sampler constructor
293 constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
295 bool is_buffer)
296 : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
298 is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
299
291 /// Bindless samplers constructor 300 /// Bindless samplers constructor
292 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, 301 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
293 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) 302 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
294 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, 303 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
295 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} 304 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
296 305
297 u32 index = 0; ///< Emulated index given for the this sampler. 306 u32 index = 0; ///< Emulated index given for the this sampler.
298 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. 307 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
299 u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). 308 u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
300 u32 size = 1; ///< Size of the sampler. 309 u32 buffer = 0; ///< Buffer where the bindless sampler is read.
310 u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
311 u32 size = 1; ///< Size of the sampler.
301 312
302 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 313 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
303 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. 314 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
304 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. 315 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
305 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. 316 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
306 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. 317 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
307 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. 318 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
319 bool is_separated = false; ///< Whether the image and sampler is separated or not.
308}; 320};
309 321
310/// Represents a tracked bindless sampler into a direct const buffer 322/// Represents a tracked bindless sampler into a direct const buffer
311class ArraySamplerNode final { 323struct ArraySamplerNode {
312public:
313 explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
314 : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
315
316 constexpr u32 GetIndex() const {
317 return index;
318 }
319
320 constexpr u32 GetBaseOffset() const {
321 return base_offset;
322 }
323
324 constexpr u32 GetIndexVar() const {
325 return bindless_var;
326 }
327
328private:
329 u32 index; 324 u32 index;
330 u32 base_offset; 325 u32 base_offset;
331 u32 bindless_var; 326 u32 bindless_var;
332}; 327};
333 328
334/// Represents a tracked bindless sampler into a direct const buffer 329/// Represents a tracked separate sampler image pair that was folded statically
335class BindlessSamplerNode final { 330struct SeparateSamplerNode {
336public: 331 std::pair<u32, u32> indices;
337 explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} 332 std::pair<u32, u32> offsets;
338 333};
339 constexpr u32 GetIndex() const {
340 return index;
341 }
342
343 constexpr u32 GetOffset() const {
344 return offset;
345 }
346 334
347private: 335/// Represents a tracked bindless sampler into a direct const buffer
336struct BindlessSamplerNode {
348 u32 index; 337 u32 index;
349 u32 offset; 338 u32 offset;
350}; 339};
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
48template <typename T, typename... Args> 48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) { 49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>); 50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); 51 return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
52} 52}
53 53
54template <typename... Args> 54template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..cdf274e54 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
93 return value; 93 return value;
94} 94}
95 95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
97 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
98 SeparateSamplerKey key;
99 key.buffers = buffers;
100 key.offsets = offsets;
101 const auto iter = separate_samplers.find(key);
102 if (iter != separate_samplers.end()) {
103 return iter->second;
104 }
105 if (!engine) {
106 return std::nullopt;
107 }
108
109 const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
110 const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
111 const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
112 separate_samplers.emplace(key, value);
113 return value;
114}
115
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, 116std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) { 117 u32 offset) {
98 const std::pair key = {buffer, offset}; 118 const std::pair key = {buffer, offset};
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..231206765 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
19 19
20namespace VideoCommon::Shader { 20namespace VideoCommon::Shader {
21 21
22struct SeparateSamplerKey {
23 std::pair<u32, u32> buffers;
24 std::pair<u32, u32> offsets;
25};
26
27} // namespace VideoCommon::Shader
28
29namespace std {
30
31template <>
32struct hash<VideoCommon::Shader::SeparateSamplerKey> {
33 std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
34 return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
35 key.offsets.second);
36 }
37};
38
39template <>
40struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
41 bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
42 const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
43 return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
44 }
45};
46
47} // namespace std
48
49namespace VideoCommon::Shader {
50
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; 51using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; 52using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
53using SeparateSamplerMap =
54 std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap = 55using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; 56 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26 57
@@ -73,6 +104,9 @@ public:
73 104
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); 105 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75 106
107 std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
108 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
109
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); 110 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77 111
78 /// Inserts a key. 112 /// Inserts a key.
@@ -128,6 +162,7 @@ private:
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; 162 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys; 163 KeyMap keys;
130 BoundSamplerMap bound_samplers; 164 BoundSamplerMap bound_samplers;
165 SeparateSamplerMap separate_samplers;
131 BindlessSamplerMap bindless_samplers; 166 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer; 167 u32 bound_buffer;
133 GraphicsInfo graphics_info; 168 GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
330 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); 330 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
331 331
332 /// Queries the missing sampler info from the execution context. 332 /// Queries the missing sampler info from the execution context.
333 SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, 333 SamplerInfo GetSamplerInfo(SamplerInfo info,
334 std::optional<u32> buffer = std::nullopt); 334 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
335 335
336 /// Accesses a texture sampler. 336 /// Accesses a texture sampler.
337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); 337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
409 409
410 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 410 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
411 411
412 std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, 412 std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
413 s64 cursor); 413 s64 cursor);
414
415 std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
416 const OperationNode& operation,
417 Node gpr, Node base_offset,
418 Node tracked, const NodeBlock& code,
419 s64 cursor);
414 420
415 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 421 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
416 422
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..d5ed81442 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16namespace { 16namespace {
17
17std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 18std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
18 OperationCode operation_code) { 19 OperationCode operation_code) {
19 for (; cursor >= 0; --cursor) { 20 for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
63 if (const auto operation = std::get_if<OperationNode>(&*node)) { 64 if (const auto operation = std::get_if<OperationNode>(&*node)) {
64 operation->SetAmendIndex(amend_index); 65 operation->SetAmendIndex(amend_index);
65 return true; 66 return true;
66 } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 67 }
68 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
67 conditional->SetAmendIndex(amend_index); 69 conditional->SetAmendIndex(amend_index);
68 return true; 70 return true;
69 } 71 }
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
72 74
73} // Anonymous namespace 75} // Anonymous namespace
74 76
75std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, 77std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
76 s64 cursor) { 78 s64 cursor) {
77 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 79 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
80 const u32 cbuf_index = cbuf->GetIndex();
81
78 // Constant buffer found, test if it's an immediate 82 // Constant buffer found, test if it's an immediate
79 const auto& offset = cbuf->GetOffset(); 83 const auto& offset = cbuf->GetOffset();
80 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 84 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
81 auto track = 85 auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
82 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
83 return {tracked, track}; 86 return {tracked, track};
84 } 87 }
85 if (const auto operation = std::get_if<OperationNode>(&*offset)) { 88 if (const auto operation = std::get_if<OperationNode>(&*offset)) {
86 const u32 bound_buffer = registry.GetBoundBuffer(); 89 const u32 bound_buffer = registry.GetBoundBuffer();
87 if (bound_buffer != cbuf->GetIndex()) { 90 if (bound_buffer != cbuf_index) {
88 return {}; 91 return {};
89 } 92 }
90 const auto pair = DecoupleIndirectRead(*operation); 93 if (const std::optional pair = DecoupleIndirectRead(*operation)) {
91 if (!pair) { 94 auto [gpr, base_offset] = *pair;
92 return {}; 95 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
96 code, cursor);
93 } 97 }
94 auto [gpr, base_offset] = *pair;
95 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
96 const auto& gpu_driver = registry.AccessGuestDriverProfile();
97 const u32 bindless_cv = NewCustomVariable();
98 Node op =
99 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
100
101 const Node cv_node = GetCustomVariable(bindless_cv);
102 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
103 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
104 AmendNodeCv(amend_index, code[cursor]);
105 // TODO Implement Bindless Index custom variable
106 auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
107 offset_inm->GetValue(), bindless_cv);
108 return {tracked, track};
109 } 98 }
110 return {}; 99 return {};
111 } 100 }
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
122 return TrackBindlessSampler(source, code, new_cursor); 111 return TrackBindlessSampler(source, code, new_cursor);
123 } 112 }
124 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 113 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
125 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { 114 const OperationNode& op = *operation;
126 if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); 115
127 std::get<0>(found)) { 116 const OperationCode opcode = operation->GetCode();
128 // Cbuf found in operand. 117 if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
118 ASSERT(op.GetOperandsCount() == 2);
119 auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
120 auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
121 if (node_a && node_b) {
122 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
123 std::pair{offset_a, offset_b});
124 return {tracked, std::move(track)};
125 }
126 }
127 std::size_t i = op.GetOperandsCount();
128 while (i--) {
129 if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
130 // Constant buffer found in operand.
129 return found; 131 return found;
130 } 132 }
131 } 133 }
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
139 return {}; 141 return {};
140} 142}
141 143
144std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
145 const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
146 const NodeBlock& code, s64 cursor) {
147 const auto offset_imm = std::get<ImmediateNode>(*base_offset);
148 const auto& gpu_driver = registry.AccessGuestDriverProfile();
149 const u32 bindless_cv = NewCustomVariable();
150 const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
151 Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
152
153 Node cv_node = GetCustomVariable(bindless_cv);
154 Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
155 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
156 AmendNodeCv(amend_index, code[cursor]);
157
158 // TODO: Implement bindless index custom variable
159 auto track =
160 MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
161 return {tracked, track};
162}
163
142std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, 164std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
143 s64 cursor) const { 165 s64 cursor) const {
144 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 166 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..2dd270e99
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h"
17
18namespace VideoCommon {
19
20template <class T>
21class ShaderCache {
22 static constexpr u64 PAGE_BITS = 14;
23
24 struct Entry {
25 VAddr addr_start;
26 VAddr addr_end;
27 T* data;
28
29 bool is_memory_marked = true;
30
31 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
32 return start < addr_end && addr_start < end;
33 }
34 };
35
36public:
37 virtual ~ShaderCache() = default;
38
39 /// @brief Removes shaders inside a given region
40 /// @note Checks for ranges
41 /// @param addr Start address of the invalidation
42 /// @param size Number of bytes of the invalidation
43 void InvalidateRegion(VAddr addr, std::size_t size) {
44 std::scoped_lock lock{invalidation_mutex};
45 InvalidatePagesInRegion(addr, size);
46 RemovePendingShaders();
47 }
48
49 /// @brief Unmarks a memory region as cached and marks it for removal
50 /// @param addr Start address of the CPU write operation
51 /// @param size Number of bytes of the CPU write operation
52 void OnCPUWrite(VAddr addr, std::size_t size) {
53 std::lock_guard lock{invalidation_mutex};
54 InvalidatePagesInRegion(addr, size);
55 }
56
57 /// @brief Flushes delayed removal operations
58 void SyncGuestHost() {
59 std::scoped_lock lock{invalidation_mutex};
60 RemovePendingShaders();
61 }
62
63 /// @brief Tries to obtain a cached shader starting in a given address
64 /// @note Doesn't check for ranges, the given address has to be the start of the shader
65 /// @param addr Start address of the shader, this doesn't cache for region
66 /// @return Pointer to a valid shader, nullptr when nothing is found
67 T* TryGet(VAddr addr) const {
68 std::scoped_lock lock{lookup_mutex};
69
70 const auto it = lookup_cache.find(addr);
71 if (it == lookup_cache.end()) {
72 return nullptr;
73 }
74 return it->second->data;
75 }
76
77protected:
78 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
79
80 /// @brief Register in the cache a given entry
81 /// @param data Shader to store in the cache
82 /// @param addr Start address of the shader that will be registered
83 /// @param size Size in bytes of the shader
84 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
85 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
86
87 const VAddr addr_end = addr + size;
88 Entry* const entry = NewEntry(addr, addr_end, data.get());
89
90 const u64 page_end = addr_end >> PAGE_BITS;
91 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
92 invalidation_cache[page].push_back(entry);
93 }
94
95 storage.push_back(std::move(data));
96
97 rasterizer.UpdatePagesCachedCount(addr, size, 1);
98 }
99
100 /// @brief Called when a shader is going to be removed
101 /// @param shader Shader that will be removed
102 /// @pre invalidation_cache is locked
103 /// @pre lookup_mutex is locked
104 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
105
106private:
107 /// @brief Invalidate pages in a given region
108 /// @pre invalidation_mutex is locked
109 void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
110 const VAddr addr_end = addr + size;
111 const u64 page_end = addr_end >> PAGE_BITS;
112 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
113 const auto it = invalidation_cache.find(page);
114 if (it == invalidation_cache.end()) {
115 continue;
116 }
117
118 std::vector<Entry*>& entries = it->second;
119 InvalidatePageEntries(entries, addr, addr_end);
120
121 // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
122 if (entries.empty()) {
123 invalidation_cache.erase(it);
124 }
125 }
126 }
127
128 /// @brief Remove shaders marked for deletion
129 /// @pre invalidation_mutex is locked
130 void RemovePendingShaders() {
131 if (marked_for_removal.empty()) {
132 return;
133 }
134 std::scoped_lock lock{lookup_mutex};
135
136 std::vector<T*> removed_shaders;
137 removed_shaders.reserve(marked_for_removal.size());
138
139 for (Entry* const entry : marked_for_removal) {
140 if (lookup_cache.erase(entry->addr_start) > 0) {
141 removed_shaders.push_back(entry->data);
142 }
143 }
144 marked_for_removal.clear();
145
146 if (!removed_shaders.empty()) {
147 RemoveShadersFromStorage(std::move(removed_shaders));
148 }
149 }
150
151 /// @brief Invalidates entries in a given range for the passed page
152 /// @param entries Vector of entries in the page, it will be modified on overlaps
153 /// @param addr Start address of the invalidation
154 /// @param addr_end Non-inclusive end address of the invalidation
155 /// @pre invalidation_mutex is locked
156 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
157 auto it = entries.begin();
158 while (it != entries.end()) {
159 Entry* const entry = *it;
160 if (!entry->Overlaps(addr, addr_end)) {
161 ++it;
162 continue;
163 }
164 UnmarkMemory(entry);
165 marked_for_removal.push_back(entry);
166
167 it = entries.erase(it);
168 }
169 }
170
171 /// @brief Unmarks an entry from the rasterizer cache
172 /// @param entry Entry to unmark from memory
173 void UnmarkMemory(Entry* entry) {
174 if (!entry->is_memory_marked) {
175 return;
176 }
177 entry->is_memory_marked = false;
178
179 const VAddr addr = entry->addr_start;
180 const std::size_t size = entry->addr_end - addr;
181 rasterizer.UpdatePagesCachedCount(addr, size, -1);
182 }
183
184 /// @brief Removes a vector of shaders from a list
185 /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
186 /// @pre invalidation_mutex is locked
187 /// @pre lookup_mutex is locked
188 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
189 // Remove duplicates
190 std::sort(removed_shaders.begin(), removed_shaders.end());
191 removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
192 removed_shaders.end());
193
194 // Now that there are no duplicates, we can notify removals
195 for (T* const shader : removed_shaders) {
196 OnShaderRemoval(shader);
197 }
198
199 // Remove them from the cache
200 const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
201 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
202 removed_shaders.end();
203 };
204 storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
205 }
206
207 /// @brief Creates a new entry in the lookup cache and returns its pointer
208 /// @pre lookup_mutex is locked
209 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
210 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
211 Entry* const entry_pointer = entry.get();
212
213 lookup_cache.emplace(addr, std::move(entry));
214 return entry_pointer;
215 }
216
217 VideoCore::RasterizerInterface& rasterizer;
218
219 mutable std::mutex lookup_mutex;
220 std::mutex invalidation_mutex;
221
222 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
223 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
224 std::vector<std::unique_ptr<T>> storage;
225 std::vector<Entry*> marked_for_removal;
226};
227
228} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 715f39d0d..0caf3b4f0 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
120 } 120 }
121 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; 121 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
122 const auto layer{static_cast<u32>(relative_address / layer_size)}; 122 const auto layer{static_cast<u32>(relative_address / layer_size)};
123 if (layer >= params.depth) {
124 return {};
125 }
123 const GPUVAddr mipmap_address = relative_address - layer_size * layer; 126 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
124 const auto mipmap_it = 127 const auto mipmap_it =
125 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); 128 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
@@ -248,12 +251,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
248 251
249 // Use an extra temporal buffer 252 // Use an extra temporal buffer
250 auto& tmp_buffer = staging_cache.GetBuffer(1); 253 auto& tmp_buffer = staging_cache.GetBuffer(1);
251 // Special case for 3D Texture Segments
252 const bool must_read_current_data =
253 params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
254 tmp_buffer.resize(guest_memory_size); 254 tmp_buffer.resize(guest_memory_size);
255 host_ptr = tmp_buffer.data(); 255 host_ptr = tmp_buffer.data();
256 if (must_read_current_data) { 256
257 if (params.target == SurfaceTarget::Texture3D) {
258 // Special case for 3D texture segments
257 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); 259 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
258 } 260 }
259 261
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 79e10ffbb..173f2edba 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -217,8 +217,8 @@ public:
217 } 217 }
218 218
219 bool IsProtected() const { 219 bool IsProtected() const {
220 // Only 3D Slices are to be protected 220 // Only 3D slices are to be protected
221 return is_target && params.block_depth > 0; 221 return is_target && params.target == SurfaceTarget::Texture3D;
222 } 222 }
223 223
224 bool IsRenderTarget() const { 224 bool IsRenderTarget() const {
@@ -250,6 +250,11 @@ public:
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); 250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 } 251 }
252 252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
253 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, 258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
254 const GPUVAddr view_addr, 259 const GPUVAddr view_addr,
255 const std::size_t candidate_size, const u32 mipmap, 260 const std::size_t candidate_size, const u32 mipmap,
@@ -272,8 +277,8 @@ public:
272 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, 277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
273 const std::size_t candidate_size) { 278 const std::size_t candidate_size) {
274 if (params.target == SurfaceTarget::Texture3D || 279 if (params.target == SurfaceTarget::Texture3D ||
275 (params.num_levels == 1 && !params.is_layered) || 280 view_params.target == SurfaceTarget::Texture3D ||
276 view_params.target == SurfaceTarget::Texture3D) { 281 (params.num_levels == 1 && !params.is_layered)) {
277 return {}; 282 return {};
278 } 283 }
279 const auto layer_mipmap{GetLayerMipmap(view_addr)}; 284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 884fabffe..0b2b2b8c4 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
215 params.num_levels = 1; 215 params.num_levels = 1;
216 params.emulated_levels = 1; 216 params.emulated_levels = 1;
217 217
218 const bool is_layered = config.layers > 1 && params.block_depth == 0; 218 if (config.memory_layout.is_3d != 0) {
219 params.is_layered = is_layered; 219 params.depth = config.layers.Value();
220 params.depth = is_layered ? config.layers.Value() : 1; 220 params.is_layered = false;
221 params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; 221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
222 return params; 231 return params;
223} 232}
224 233
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
237 params.width = config.width; 246 params.width = config.width;
238 params.height = config.height; 247 params.height = config.height;
239 params.pitch = config.pitch; 248 params.pitch = config.pitch;
240 // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters 249 // TODO(Rodrigo): Try to guess texture arrays from parameters
241 params.target = SurfaceTarget::Texture2D; 250 params.target = SurfaceTarget::Texture2D;
242 params.depth = 1; 251 params.depth = 1;
243 params.num_levels = 1; 252 params.num_levels = 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6f63217a2..6207d8dfe 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -24,6 +24,7 @@
24#include "core/core.h" 24#include "core/core.h"
25#include "core/memory.h" 25#include "core/memory.h"
26#include "core/settings.h" 26#include "core/settings.h"
27#include "video_core/compatible_formats.h"
27#include "video_core/dirty_flags.h" 28#include "video_core/dirty_flags.h"
28#include "video_core/engines/fermi_2d.h" 29#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
@@ -47,8 +48,8 @@ class RasterizerInterface;
47 48
48namespace VideoCommon { 49namespace VideoCommon {
49 50
51using VideoCore::Surface::FormatCompatibility;
50using VideoCore::Surface::PixelFormat; 52using VideoCore::Surface::PixelFormat;
51
52using VideoCore::Surface::SurfaceTarget; 53using VideoCore::Surface::SurfaceTarget;
53using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
54 55
@@ -298,15 +299,13 @@ public:
298 const GPUVAddr src_gpu_addr = src_config.Address(); 299 const GPUVAddr src_gpu_addr = src_config.Address();
299 const GPUVAddr dst_gpu_addr = dst_config.Address(); 300 const GPUVAddr dst_gpu_addr = dst_config.Address();
300 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); 301 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
301 const std::optional<VAddr> dst_cpu_addr = 302
302 system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); 303 const auto& memory_manager = system.GPU().MemoryManager();
303 const std::optional<VAddr> src_cpu_addr = 304 const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
304 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); 305 const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
305 std::pair<TSurface, TView> dst_surface = 306 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
306 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); 307 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
307 std::pair<TSurface, TView> src_surface = 308 ImageBlit(src_surface, dst_surface.second, copy_config);
308 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
309 ImageBlit(src_surface.second, dst_surface.second, copy_config);
310 dst_surface.first->MarkAsModified(true, Tick()); 309 dst_surface.first->MarkAsModified(true, Tick());
311 } 310 }
312 311
@@ -508,12 +507,12 @@ private:
508 return RecycleStrategy::Flush; 507 return RecycleStrategy::Flush;
509 } 508 }
510 // 3D Textures decision 509 // 3D Textures decision
511 if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { 510 if (params.target == SurfaceTarget::Texture3D) {
512 return RecycleStrategy::Flush; 511 return RecycleStrategy::Flush;
513 } 512 }
514 for (const auto& s : overlaps) { 513 for (const auto& s : overlaps) {
515 const auto& s_params = s->GetSurfaceParams(); 514 const auto& s_params = s->GetSurfaceParams();
516 if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { 515 if (s_params.target == SurfaceTarget::Texture3D) {
517 return RecycleStrategy::Flush; 516 return RecycleStrategy::Flush;
518 } 517 }
519 } 518 }
@@ -597,7 +596,7 @@ private:
597 } else { 596 } else {
598 new_surface = GetUncachedSurface(gpu_addr, params); 597 new_surface = GetUncachedSurface(gpu_addr, params);
599 } 598 }
600 const auto& final_params = new_surface->GetSurfaceParams(); 599 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
601 if (cr_params.type != final_params.type) { 600 if (cr_params.type != final_params.type) {
602 if (Settings::IsGPULevelExtreme()) { 601 if (Settings::IsGPULevelExtreme()) {
603 BufferCopy(current_surface, new_surface); 602 BufferCopy(current_surface, new_surface);
@@ -605,7 +604,7 @@ private:
605 } else { 604 } else {
606 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); 605 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
607 for (auto& brick : bricks) { 606 for (auto& brick : bricks) {
608 ImageCopy(current_surface, new_surface, brick); 607 TryCopyImage(current_surface, new_surface, brick);
609 } 608 }
610 } 609 }
611 Unregister(current_surface); 610 Unregister(current_surface);
@@ -696,7 +695,7 @@ private:
696 } 695 }
697 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, 696 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
698 src_params.depth); 697 src_params.depth);
699 ImageCopy(surface, new_surface, copy_params); 698 TryCopyImage(surface, new_surface, copy_params);
700 } 699 }
701 } 700 }
702 if (passed_tests == 0) { 701 if (passed_tests == 0) {
@@ -731,51 +730,9 @@ private:
731 */ 730 */
732 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, 731 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
733 const SurfaceParams& params, 732 const SurfaceParams& params,
734 const GPUVAddr gpu_addr, 733 GPUVAddr gpu_addr, VAddr cpu_addr,
735 const VAddr cpu_addr,
736 bool preserve_contents) { 734 bool preserve_contents) {
737 if (params.target == SurfaceTarget::Texture3D) { 735 if (params.target != SurfaceTarget::Texture3D) {
738 bool failed = false;
739 if (params.num_levels > 1) {
740 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
741 return std::nullopt;
742 }
743 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
744 bool modified = false;
745 for (auto& surface : overlaps) {
746 const SurfaceParams& src_params = surface->GetSurfaceParams();
747 if (src_params.target != SurfaceTarget::Texture2D) {
748 failed = true;
749 break;
750 }
751 if (src_params.height != params.height) {
752 failed = true;
753 break;
754 }
755 if (src_params.block_depth != params.block_depth ||
756 src_params.block_height != params.block_height) {
757 failed = true;
758 break;
759 }
760 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
761 const auto offsets = params.GetBlockOffsetXYZ(offset);
762 const auto z = std::get<2>(offsets);
763 modified |= surface->IsModified();
764 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
765 1);
766 ImageCopy(surface, new_surface, copy_params);
767 }
768 if (failed) {
769 return std::nullopt;
770 }
771 for (const auto& surface : overlaps) {
772 Unregister(surface);
773 }
774 new_surface->MarkAsModified(modified, Tick());
775 Register(new_surface);
776 auto view = new_surface->GetMainView();
777 return {{std::move(new_surface), view}};
778 } else {
779 for (const auto& surface : overlaps) { 736 for (const auto& surface : overlaps) {
780 if (!surface->MatchTarget(params.target)) { 737 if (!surface->MatchTarget(params.target)) {
781 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { 738 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
@@ -791,11 +748,60 @@ private:
791 continue; 748 continue;
792 } 749 }
793 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { 750 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
794 return {{surface, surface->GetMainView()}}; 751 return std::make_pair(surface, surface->GetMainView());
795 } 752 }
796 } 753 }
797 return InitializeSurface(gpu_addr, params, preserve_contents); 754 return InitializeSurface(gpu_addr, params, preserve_contents);
798 } 755 }
756
757 if (params.num_levels > 1) {
758 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
759 return std::nullopt;
760 }
761
762 if (overlaps.size() == 1) {
763 const auto& surface = overlaps[0];
764 const SurfaceParams& overlap_params = surface->GetSurfaceParams();
765 // Don't attempt to render to textures with more than one level for now
766 // The texture has to be to the right or the sample address if we want to render to it
767 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
768 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
769 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
770 if (slice < overlap_params.depth) {
771 auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
772 return std::make_pair(std::move(surface), std::move(view));
773 }
774 }
775 }
776
777 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
778 bool modified = false;
779
780 for (auto& surface : overlaps) {
781 const SurfaceParams& src_params = surface->GetSurfaceParams();
782 if (src_params.target != SurfaceTarget::Texture2D ||
783 src_params.height != params.height ||
784 src_params.block_depth != params.block_depth ||
785 src_params.block_height != params.block_height) {
786 return std::nullopt;
787 }
788 modified |= surface->IsModified();
789
790 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
791 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
792 const u32 width = params.width;
793 const u32 height = params.height;
794 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
795 TryCopyImage(surface, new_surface, copy_params);
796 }
797 for (const auto& surface : overlaps) {
798 Unregister(surface);
799 }
800 new_surface->MarkAsModified(modified, Tick());
801 Register(new_surface);
802
803 TView view = new_surface->GetMainView();
804 return std::make_pair(std::move(new_surface), std::move(view));
799 } 805 }
800 806
801 /** 807 /**
@@ -873,7 +879,7 @@ private:
873 } 879 }
874 } 880 }
875 881
876 // Check if it's a 3D texture 882 // Manage 3D textures
877 if (params.block_depth > 0) { 883 if (params.block_depth > 0) {
878 auto surface = 884 auto surface =
879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 885 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
@@ -1048,7 +1054,7 @@ private:
1048 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, 1054 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1049 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { 1055 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1050 auto deduced_src = DeduceSurface(src_gpu_addr, src_params); 1056 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1051 auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); 1057 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1052 if (deduced_src.Failed() || deduced_dst.Failed()) { 1058 if (deduced_src.Failed() || deduced_dst.Failed()) {
1053 return; 1059 return;
1054 } 1060 }
@@ -1187,6 +1193,19 @@ private:
1187 return {}; 1193 return {};
1188 } 1194 }
1189 1195
1196 /// Try to do an image copy logging when formats are incompatible.
1197 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
1198 const SurfaceParams& src_params = src->GetSurfaceParams();
1199 const SurfaceParams& dst_params = dst->GetSurfaceParams();
1200 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
1201 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}",
1202 static_cast<int>(dst_params.pixel_format),
1203 static_cast<int>(src_params.pixel_format));
1204 return;
1205 }
1206 ImageCopy(src, dst, copy);
1207 }
1208
1190 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1209 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
1191 return siblings_table[static_cast<std::size_t>(format)]; 1210 return siblings_table[static_cast<std::size_t>(format)];
1192 } 1211 }
@@ -1236,6 +1255,7 @@ private:
1236 VideoCore::RasterizerInterface& rasterizer; 1255 VideoCore::RasterizerInterface& rasterizer;
1237 1256
1238 FormatLookupTable format_lookup_table; 1257 FormatLookupTable format_lookup_table;
1258 FormatCompatibility format_compatibility;
1239 1259
1240 u64 ticks{}; 1260 u64 ticks{};
1241 1261
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 8b9404718..75c27e39e 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -208,6 +208,10 @@ if (MSVC)
208 copy_yuzu_unicorn_deps(yuzu) 208 copy_yuzu_unicorn_deps(yuzu)
209endif() 209endif()
210 210
211if (NOT APPLE)
212 target_compile_definitions(yuzu PRIVATE HAS_OPENGL)
213endif()
214
211if (ENABLE_VULKAN) 215if (ENABLE_VULKAN)
212 target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) 216 target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include)
213 target_compile_definitions(yuzu PRIVATE HAS_VULKAN) 217 target_compile_definitions(yuzu PRIVATE HAS_VULKAN)
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 1f5e43043..4bfce48a4 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -8,13 +8,16 @@
8#include <QHBoxLayout> 8#include <QHBoxLayout>
9#include <QKeyEvent> 9#include <QKeyEvent>
10#include <QMessageBox> 10#include <QMessageBox>
11#include <QOffscreenSurface>
12#include <QOpenGLContext>
13#include <QPainter> 11#include <QPainter>
14#include <QScreen> 12#include <QScreen>
15#include <QStringList> 13#include <QStringList>
16#include <QWindow> 14#include <QWindow>
17 15
16#ifdef HAS_OPENGL
17#include <QOffscreenSurface>
18#include <QOpenGLContext>
19#endif
20
18#if !defined(WIN32) && HAS_VULKAN 21#if !defined(WIN32) && HAS_VULKAN
19#include <qpa/qplatformnativeinterface.h> 22#include <qpa/qplatformnativeinterface.h>
20#endif 23#endif
@@ -41,49 +44,65 @@ EmuThread::EmuThread() = default;
41EmuThread::~EmuThread() = default; 44EmuThread::~EmuThread() = default;
42 45
43void EmuThread::run() { 46void EmuThread::run() {
44 MicroProfileOnThreadCreate("EmuThread"); 47 std::string name = "yuzu:EmuControlThread";
48 MicroProfileOnThreadCreate(name.c_str());
49 Common::SetCurrentThreadName(name.c_str());
50
51 auto& system = Core::System::GetInstance();
52
53 system.RegisterHostThread();
54
55 auto& gpu = system.GPU();
45 56
46 // Main process has been loaded. Make the context current to this thread and begin GPU and CPU 57 // Main process has been loaded. Make the context current to this thread and begin GPU and CPU
47 // execution. 58 // execution.
48 Core::System::GetInstance().GPU().Start(); 59 gpu.Start();
60
61 gpu.ObtainContext();
49 62
50 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); 63 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
51 64
52 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( 65 system.Renderer().Rasterizer().LoadDiskResources(
53 stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { 66 stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
54 emit LoadProgress(stage, value, total); 67 emit LoadProgress(stage, value, total);
55 }); 68 });
56 69
57 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 70 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
58 71
72 gpu.ReleaseContext();
73
59 // Holds whether the cpu was running during the last iteration, 74 // Holds whether the cpu was running during the last iteration,
60 // so that the DebugModeLeft signal can be emitted before the 75 // so that the DebugModeLeft signal can be emitted before the
61 // next execution step 76 // next execution step
62 bool was_active = false; 77 bool was_active = false;
63 while (!stop_run) { 78 while (!stop_run) {
64 if (running) { 79 if (running) {
65 if (!was_active) 80 if (was_active) {
66 emit DebugModeLeft(); 81 emit DebugModeLeft();
82 }
67 83
68 Core::System::ResultStatus result = Core::System::GetInstance().RunLoop(); 84 running_guard = true;
85 Core::System::ResultStatus result = system.Run();
69 if (result != Core::System::ResultStatus::Success) { 86 if (result != Core::System::ResultStatus::Success) {
87 running_guard = false;
70 this->SetRunning(false); 88 this->SetRunning(false);
71 emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails()); 89 emit ErrorThrown(result, system.GetStatusDetails());
72 } 90 }
91 running_wait.Wait();
92 result = system.Pause();
93 if (result != Core::System::ResultStatus::Success) {
94 running_guard = false;
95 this->SetRunning(false);
96 emit ErrorThrown(result, system.GetStatusDetails());
97 }
98 running_guard = false;
73 99
74 was_active = running || exec_step; 100 if (!stop_run) {
75 if (!was_active && !stop_run) 101 was_active = true;
76 emit DebugModeEntered(); 102 emit DebugModeEntered();
103 }
77 } else if (exec_step) { 104 } else if (exec_step) {
78 if (!was_active) 105 UNIMPLEMENTED();
79 emit DebugModeLeft();
80
81 exec_step = false;
82 Core::System::GetInstance().SingleStep();
83 emit DebugModeEntered();
84 yieldCurrentThread();
85
86 was_active = false;
87 } else { 106 } else {
88 std::unique_lock lock{running_mutex}; 107 std::unique_lock lock{running_mutex};
89 running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; }); 108 running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
@@ -91,13 +110,14 @@ void EmuThread::run() {
91 } 110 }
92 111
93 // Shutdown the core emulation 112 // Shutdown the core emulation
94 Core::System::GetInstance().Shutdown(); 113 system.Shutdown();
95 114
96#if MICROPROFILE_ENABLED 115#if MICROPROFILE_ENABLED
97 MicroProfileOnThreadExit(); 116 MicroProfileOnThreadExit();
98#endif 117#endif
99} 118}
100 119
120#ifdef HAS_OPENGL
101class OpenGLSharedContext : public Core::Frontend::GraphicsContext { 121class OpenGLSharedContext : public Core::Frontend::GraphicsContext {
102public: 122public:
103 /// Create the original context that should be shared from 123 /// Create the original context that should be shared from
@@ -183,6 +203,7 @@ private:
183 std::unique_ptr<QOffscreenSurface> offscreen_surface{}; 203 std::unique_ptr<QOffscreenSurface> offscreen_surface{};
184 QSurface* surface; 204 QSurface* surface;
185}; 205};
206#endif
186 207
187class DummyContext : public Core::Frontend::GraphicsContext {}; 208class DummyContext : public Core::Frontend::GraphicsContext {};
188 209
@@ -355,7 +376,7 @@ QByteArray GRenderWindow::saveGeometry() {
355} 376}
356 377
357qreal GRenderWindow::windowPixelRatio() const { 378qreal GRenderWindow::windowPixelRatio() const {
358 return devicePixelRatio(); 379 return devicePixelRatioF();
359} 380}
360 381
361std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const { 382std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const {
@@ -473,6 +494,7 @@ void GRenderWindow::resizeEvent(QResizeEvent* event) {
473} 494}
474 495
475std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { 496std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
497#ifdef HAS_OPENGL
476 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { 498 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) {
477 auto c = static_cast<OpenGLSharedContext*>(main_context.get()); 499 auto c = static_cast<OpenGLSharedContext*>(main_context.get());
478 // Bind the shared contexts to the main surface in case the backend wants to take over 500 // Bind the shared contexts to the main surface in case the backend wants to take over
@@ -480,6 +502,7 @@ std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedCont
480 return std::make_unique<OpenGLSharedContext>(c->GetShareContext(), 502 return std::make_unique<OpenGLSharedContext>(c->GetShareContext(),
481 child_widget->windowHandle()); 503 child_widget->windowHandle());
482 } 504 }
505#endif
483 return std::make_unique<DummyContext>(); 506 return std::make_unique<DummyContext>();
484} 507}
485 508
@@ -560,6 +583,7 @@ void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal
560} 583}
561 584
562bool GRenderWindow::InitializeOpenGL() { 585bool GRenderWindow::InitializeOpenGL() {
586#ifdef HAS_OPENGL
563 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, 587 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
564 // WA_DontShowOnScreen, WA_DeleteOnClose 588 // WA_DontShowOnScreen, WA_DeleteOnClose
565 auto child = new OpenGLRenderWidget(this); 589 auto child = new OpenGLRenderWidget(this);
@@ -571,6 +595,11 @@ bool GRenderWindow::InitializeOpenGL() {
571 std::make_unique<OpenGLSharedContext>(context->GetShareContext(), child->windowHandle())); 595 std::make_unique<OpenGLSharedContext>(context->GetShareContext(), child->windowHandle()));
572 596
573 return true; 597 return true;
598#else
599 QMessageBox::warning(this, tr("OpenGL not available!"),
600 tr("yuzu has not been compiled with OpenGL support."));
601 return false;
602#endif
574} 603}
575 604
576bool GRenderWindow::InitializeVulkan() { 605bool GRenderWindow::InitializeVulkan() {
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 3626604ca..6c59b4d5c 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -59,6 +59,12 @@ public:
59 this->running = running; 59 this->running = running;
60 lock.unlock(); 60 lock.unlock();
61 running_cv.notify_all(); 61 running_cv.notify_all();
62 if (!running) {
63 running_wait.Set();
64 /// Wait until effectively paused
65 while (running_guard)
66 ;
67 }
62 } 68 }
63 69
64 /** 70 /**
@@ -84,6 +90,8 @@ private:
84 std::atomic_bool stop_run{false}; 90 std::atomic_bool stop_run{false};
85 std::mutex running_mutex; 91 std::mutex running_mutex;
86 std::condition_variable running_cv; 92 std::condition_variable running_cv;
93 Common::Event running_wait{};
94 std::atomic_bool running_guard{false};
87 95
88signals: 96signals:
89 /** 97 /**
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 7e9073cc3..bbbd96113 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -211,7 +211,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
211// This must be in alphabetical order according to action name as it must have the same order as 211// This must be in alphabetical order according to action name as it must have the same order as
212// UISetting::values.shortcuts, which is alphabetically ordered. 212// UISetting::values.shortcuts, which is alphabetically ordered.
213// clang-format off 213// clang-format off
214const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{ 214const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
215 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}}, 215 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}},
216 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, 216 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
217 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, 217 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -222,6 +222,7 @@ const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{
222 {QStringLiteral("Increase Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}}, 222 {QStringLiteral("Increase Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}},
223 {QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}}, 223 {QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}},
224 {QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}}, 224 {QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}},
225 {QStringLiteral("Mute Audio"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+M"), Qt::WindowShortcut}},
225 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, 226 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
226 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, 227 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
227 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, 228 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
@@ -631,13 +632,11 @@ void Config::ReadRendererValues() {
631 static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); 632 static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
632 Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); 633 Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
633 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); 634 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
634 Settings::values.resolution_factor =
635 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
636 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); 635 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
637 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); 636 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
638 Settings::values.use_frame_limit = 637 Settings::values.use_frame_limit =
639 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); 638 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
640 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 639 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
641 Settings::values.use_disk_shader_cache = 640 Settings::values.use_disk_shader_cache =
642 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); 641 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
643 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); 642 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
@@ -722,8 +721,6 @@ void Config::ReadUIValues() {
722 .toString(); 721 .toString();
723 UISettings::values.enable_discord_presence = 722 UISettings::values.enable_discord_presence =
724 ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); 723 ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
725 UISettings::values.screenshot_resolution_factor =
726 static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
727 UISettings::values.select_user_on_boot = 724 UISettings::values.select_user_on_boot =
728 ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); 725 ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
729 726
@@ -1082,8 +1079,6 @@ void Config::SaveRendererValues() {
1082 WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); 1079 WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
1083 WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); 1080 WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
1084 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); 1081 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
1085 WriteSetting(QStringLiteral("resolution_factor"),
1086 static_cast<double>(Settings::values.resolution_factor), 1.0);
1087 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); 1082 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
1088 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); 1083 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
1089 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); 1084 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
@@ -1159,8 +1154,6 @@ void Config::SaveUIValues() {
1159 QString::fromUtf8(UISettings::themes[0].second)); 1154 QString::fromUtf8(UISettings::themes[0].second));
1160 WriteSetting(QStringLiteral("enable_discord_presence"), 1155 WriteSetting(QStringLiteral("enable_discord_presence"),
1161 UISettings::values.enable_discord_presence, true); 1156 UISettings::values.enable_discord_presence, true);
1162 WriteSetting(QStringLiteral("screenshot_resolution_factor"),
1163 UISettings::values.screenshot_resolution_factor, 0);
1164 WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, 1157 WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
1165 false); 1158 false);
1166 1159
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 5cd2a5feb..09316382c 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -27,7 +27,7 @@ public:
27 default_mouse_buttons; 27 default_mouse_buttons;
28 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; 28 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
29 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; 29 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
30 static const std::array<UISettings::Shortcut, 15> default_hotkeys; 30 static const std::array<UISettings::Shortcut, 16> default_hotkeys;
31 31
32private: 32private:
33 void ReadValues(); 33 void ReadValues();
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index cb95423e0..74b2ad537 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -23,6 +23,11 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
23ConfigureGeneral::~ConfigureGeneral() = default; 23ConfigureGeneral::~ConfigureGeneral() = default;
24 24
25void ConfigureGeneral::SetConfiguration() { 25void ConfigureGeneral::SetConfiguration() {
26 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
27
28 ui->use_multi_core->setEnabled(runtime_lock);
29 ui->use_multi_core->setChecked(Settings::values.use_multi_core);
30
26 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); 31 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
27 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); 32 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
28 ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background); 33 ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background);
@@ -41,6 +46,7 @@ void ConfigureGeneral::ApplyConfiguration() {
41 46
42 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); 47 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
43 Settings::values.frame_limit = ui->frame_limit->value(); 48 Settings::values.frame_limit = ui->frame_limit->value();
49 Settings::values.use_multi_core = ui->use_multi_core->isChecked();
44} 50}
45 51
46void ConfigureGeneral::changeEvent(QEvent* event) { 52void ConfigureGeneral::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index fc3b7e65a..2711116a2 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -52,6 +52,13 @@
52 </layout> 52 </layout>
53 </item> 53 </item>
54 <item> 54 <item>
55 <widget class="QCheckBox" name="use_multi_core">
56 <property name="text">
57 <string>Multicore CPU Emulation</string>
58 </property>
59 </widget>
60 </item>
61 <item>
55 <widget class="QCheckBox" name="toggle_check_exit"> 62 <widget class="QCheckBox" name="toggle_check_exit">
56 <property name="text"> 63 <property name="text">
57 <string>Confirm exit while emulation is running</string> 64 <string>Confirm exit while emulation is running</string>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea667caef..304625cd7 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -19,47 +19,6 @@
19#include "video_core/renderer_vulkan/renderer_vulkan.h" 19#include "video_core/renderer_vulkan/renderer_vulkan.h"
20#endif 20#endif
21 21
22namespace {
23enum class Resolution : int {
24 Auto,
25 Scale1x,
26 Scale2x,
27 Scale3x,
28 Scale4x,
29};
30
31float ToResolutionFactor(Resolution option) {
32 switch (option) {
33 case Resolution::Auto:
34 return 0.f;
35 case Resolution::Scale1x:
36 return 1.f;
37 case Resolution::Scale2x:
38 return 2.f;
39 case Resolution::Scale3x:
40 return 3.f;
41 case Resolution::Scale4x:
42 return 4.f;
43 }
44 return 0.f;
45}
46
47Resolution FromResolutionFactor(float factor) {
48 if (factor == 0.f) {
49 return Resolution::Auto;
50 } else if (factor == 1.f) {
51 return Resolution::Scale1x;
52 } else if (factor == 2.f) {
53 return Resolution::Scale2x;
54 } else if (factor == 3.f) {
55 return Resolution::Scale3x;
56 } else if (factor == 4.f) {
57 return Resolution::Scale4x;
58 }
59 return Resolution::Auto;
60}
61} // Anonymous namespace
62
63ConfigureGraphics::ConfigureGraphics(QWidget* parent) 22ConfigureGraphics::ConfigureGraphics(QWidget* parent)
64 : QWidget(parent), ui(new Ui::ConfigureGraphics) { 23 : QWidget(parent), ui(new Ui::ConfigureGraphics) {
65 vulkan_device = Settings::values.vulkan_device; 24 vulkan_device = Settings::values.vulkan_device;
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
99 58
100 ui->api->setEnabled(runtime_lock); 59 ui->api->setEnabled(runtime_lock);
101 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); 60 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
102 ui->resolution_factor_combobox->setCurrentIndex(
103 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
104 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); 61 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
105 ui->use_disk_shader_cache->setEnabled(runtime_lock); 62 ui->use_disk_shader_cache->setEnabled(runtime_lock);
106 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 63 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
114void ConfigureGraphics::ApplyConfiguration() { 71void ConfigureGraphics::ApplyConfiguration() {
115 Settings::values.renderer_backend = GetCurrentGraphicsBackend(); 72 Settings::values.renderer_backend = GetCurrentGraphicsBackend();
116 Settings::values.vulkan_device = vulkan_device; 73 Settings::values.vulkan_device = vulkan_device;
117 Settings::values.resolution_factor =
118 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
119 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); 74 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
120 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 75 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
121 Settings::values.use_asynchronous_gpu_emulation = 76 Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c816d6108..6e75447a5 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,46 +85,6 @@
85 </widget> 85 </widget>
86 </item> 86 </item>
87 <item> 87 <item>
88 <layout class="QHBoxLayout" name="horizontalLayout_2">
89 <item>
90 <widget class="QLabel" name="label">
91 <property name="text">
92 <string>Internal Resolution:</string>
93 </property>
94 </widget>
95 </item>
96 <item>
97 <widget class="QComboBox" name="resolution_factor_combobox">
98 <item>
99 <property name="text">
100 <string>Auto (Window Size)</string>
101 </property>
102 </item>
103 <item>
104 <property name="text">
105 <string>Native (1280x720)</string>
106 </property>
107 </item>
108 <item>
109 <property name="text">
110 <string>2x Native (2560x1440)</string>
111 </property>
112 </item>
113 <item>
114 <property name="text">
115 <string>3x Native (3840x2160)</string>
116 </property>
117 </item>
118 <item>
119 <property name="text">
120 <string>4x Native (5120x2880)</string>
121 </property>
122 </item>
123 </widget>
124 </item>
125 </layout>
126 </item>
127 <item>
128 <layout class="QHBoxLayout" name="horizontalLayout_6"> 88 <layout class="QHBoxLayout" name="horizontalLayout_6">
129 <item> 89 <item>
130 <widget class="QLabel" name="ar_label"> 90 <widget class="QLabel" name="ar_label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 37aadf7f8..be5006ad3 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
12 12
13 ui->setupUi(this); 13 ui->setupUi(this);
14 14
15 // TODO: Remove this after assembly shaders are fully integrated
16 ui->use_assembly_shaders->setVisible(false);
17
18 SetConfiguration(); 15 SetConfiguration();
19} 16}
20 17
diff --git a/src/yuzu/configuration/configure_service.cpp b/src/yuzu/configuration/configure_service.cpp
index 06566e981..0de7a4f0b 100644
--- a/src/yuzu/configuration/configure_service.cpp
+++ b/src/yuzu/configuration/configure_service.cpp
@@ -68,6 +68,7 @@ void ConfigureService::SetConfiguration() {
68} 68}
69 69
70std::pair<QString, QString> ConfigureService::BCATDownloadEvents() { 70std::pair<QString, QString> ConfigureService::BCATDownloadEvents() {
71#ifdef YUZU_ENABLE_BOXCAT
71 std::optional<std::string> global; 72 std::optional<std::string> global;
72 std::map<std::string, Service::BCAT::EventStatus> map; 73 std::map<std::string, Service::BCAT::EventStatus> map;
73 const auto res = Service::BCAT::Boxcat::GetStatus(global, map); 74 const auto res = Service::BCAT::Boxcat::GetStatus(global, map);
@@ -105,7 +106,10 @@ std::pair<QString, QString> ConfigureService::BCATDownloadEvents() {
105 .arg(QString::fromStdString(key)) 106 .arg(QString::fromStdString(key))
106 .arg(FormatEventStatusString(value)); 107 .arg(FormatEventStatusString(value));
107 } 108 }
108 return {QStringLiteral("Current Boxcat Events"), std::move(out)}; 109 return {tr("Current Boxcat Events"), std::move(out)};
110#else
111 return {tr("Current Boxcat Events"), tr("There are currently no events on boxcat.")};
112#endif
109} 113}
110 114
111void ConfigureService::OnBCATImplChanged() { 115void ConfigureService::OnBCATImplChanged() {
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index c1ea25fb8..9bb0a0109 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -2,10 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <fmt/format.h>
6
5#include "yuzu/debugger/wait_tree.h" 7#include "yuzu/debugger/wait_tree.h"
6#include "yuzu/util/util.h" 8#include "yuzu/util/util.h"
7 9
8#include "common/assert.h" 10#include "common/assert.h"
11#include "core/arm/arm_interface.h"
9#include "core/core.h" 12#include "core/core.h"
10#include "core/hle/kernel/handle_table.h" 13#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/mutex.h" 14#include "core/hle/kernel/mutex.h"
@@ -59,8 +62,10 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
59 std::size_t row = 0; 62 std::size_t row = 0;
60 auto add_threads = [&](const std::vector<std::shared_ptr<Kernel::Thread>>& threads) { 63 auto add_threads = [&](const std::vector<std::shared_ptr<Kernel::Thread>>& threads) {
61 for (std::size_t i = 0; i < threads.size(); ++i) { 64 for (std::size_t i = 0; i < threads.size(); ++i) {
62 item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i])); 65 if (!threads[i]->IsHLEThread()) {
63 item_list.back()->row = row; 66 item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
67 item_list.back()->row = row;
68 }
64 ++row; 69 ++row;
65 } 70 }
66 }; 71 };
@@ -114,20 +119,21 @@ QString WaitTreeCallstack::GetText() const {
114std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const { 119std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {
115 std::vector<std::unique_ptr<WaitTreeItem>> list; 120 std::vector<std::unique_ptr<WaitTreeItem>> list;
116 121
117 constexpr std::size_t BaseRegister = 29; 122 if (thread.IsHLEThread()) {
118 auto& memory = Core::System::GetInstance().Memory(); 123 return list;
119 u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister]; 124 }
120 125
121 while (base_pointer != 0) { 126 if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64BitProcess()) {
122 const u64 lr = memory.Read64(base_pointer + sizeof(u64)); 127 return list;
123 if (lr == 0) { 128 }
124 break;
125 }
126 129
127 list.push_back(std::make_unique<WaitTreeText>( 130 auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(),
128 tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'}))); 131 thread.GetContext64());
129 132
130 base_pointer = memory.Read64(base_pointer); 133 for (auto& entry : backtrace) {
134 std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address,
135 entry.original_address, entry.offset, entry.name);
136 list.push_back(std::make_unique<WaitTreeText>(QString::fromStdString(s)));
131 } 137 }
132 138
133 return list; 139 return list;
@@ -206,7 +212,15 @@ QString WaitTreeThread::GetText() const {
206 status = tr("running"); 212 status = tr("running");
207 break; 213 break;
208 case Kernel::ThreadStatus::Ready: 214 case Kernel::ThreadStatus::Ready:
209 status = tr("ready"); 215 if (!thread.IsPaused()) {
216 if (thread.WasRunning()) {
217 status = tr("running");
218 } else {
219 status = tr("ready");
220 }
221 } else {
222 status = tr("paused");
223 }
210 break; 224 break;
211 case Kernel::ThreadStatus::Paused: 225 case Kernel::ThreadStatus::Paused:
212 status = tr("paused"); 226 status = tr("paused");
@@ -254,7 +268,15 @@ QColor WaitTreeThread::GetColor() const {
254 case Kernel::ThreadStatus::Running: 268 case Kernel::ThreadStatus::Running:
255 return QColor(Qt::GlobalColor::darkGreen); 269 return QColor(Qt::GlobalColor::darkGreen);
256 case Kernel::ThreadStatus::Ready: 270 case Kernel::ThreadStatus::Ready:
257 return QColor(Qt::GlobalColor::darkBlue); 271 if (!thread.IsPaused()) {
272 if (thread.WasRunning()) {
273 return QColor(Qt::GlobalColor::darkGreen);
274 } else {
275 return QColor(Qt::GlobalColor::darkBlue);
276 }
277 } else {
278 return QColor(Qt::GlobalColor::lightGray);
279 }
258 case Kernel::ThreadStatus::Paused: 280 case Kernel::ThreadStatus::Paused:
259 return QColor(Qt::GlobalColor::lightGray); 281 return QColor(Qt::GlobalColor::lightGray);
260 case Kernel::ThreadStatus::WaitHLEEvent: 282 case Kernel::ThreadStatus::WaitHLEEvent:
@@ -319,7 +341,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
319 341
320 if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) { 342 if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {
321 list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(), 343 list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(),
322 thread.IsSleepingOnWait())); 344 thread.IsWaitingSync()));
323 } 345 }
324 346
325 list.push_back(std::make_unique<WaitTreeCallstack>(thread)); 347 list.push_back(std::make_unique<WaitTreeCallstack>(thread));
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 42bbf8b35..663ba5632 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -56,6 +56,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
56#include <QShortcut> 56#include <QShortcut>
57#include <QStatusBar> 57#include <QStatusBar>
58#include <QSysInfo> 58#include <QSysInfo>
59#include <QUrl>
59#include <QtConcurrent/QtConcurrent> 60#include <QtConcurrent/QtConcurrent>
60 61
61#include <fmt/format.h> 62#include <fmt/format.h>
@@ -217,7 +218,20 @@ GMainWindow::GMainWindow()
217 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch, 218 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
218 Common::g_scm_desc); 219 Common::g_scm_desc);
219#ifdef ARCHITECTURE_x86_64 220#ifdef ARCHITECTURE_x86_64
220 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); 221 const auto& caps = Common::GetCPUCaps();
222 std::string cpu_string = caps.cpu_string;
223 if (caps.avx || caps.avx2 || caps.avx512) {
224 cpu_string += " | AVX";
225 if (caps.avx512) {
226 cpu_string += "512";
227 } else if (caps.avx2) {
228 cpu_string += '2';
229 }
230 if (caps.fma || caps.fma4) {
231 cpu_string += " | FMA";
232 }
233 }
234 LOG_INFO(Frontend, "Host CPU: {}", cpu_string);
221#endif 235#endif
222 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); 236 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
223 LOG_INFO(Frontend, "Host RAM: {:.2f} GB", 237 LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
@@ -520,14 +534,36 @@ void GMainWindow::InitializeWidgets() {
520 if (emulation_running) { 534 if (emulation_running) {
521 return; 535 return;
522 } 536 }
523 Settings::values.use_asynchronous_gpu_emulation = 537 bool is_async =
524 !Settings::values.use_asynchronous_gpu_emulation; 538 !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
539 Settings::values.use_asynchronous_gpu_emulation = is_async;
525 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); 540 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
526 Settings::Apply(); 541 Settings::Apply();
527 }); 542 });
528 async_status_button->setText(tr("ASYNC")); 543 async_status_button->setText(tr("ASYNC"));
529 async_status_button->setCheckable(true); 544 async_status_button->setCheckable(true);
530 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); 545 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
546
547 // Setup Multicore button
548 multicore_status_button = new QPushButton();
549 multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
550 multicore_status_button->setFocusPolicy(Qt::NoFocus);
551 connect(multicore_status_button, &QPushButton::clicked, [&] {
552 if (emulation_running) {
553 return;
554 }
555 Settings::values.use_multi_core = !Settings::values.use_multi_core;
556 bool is_async =
557 Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
558 Settings::values.use_asynchronous_gpu_emulation = is_async;
559 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
560 multicore_status_button->setChecked(Settings::values.use_multi_core);
561 Settings::Apply();
562 });
563 multicore_status_button->setText(tr("MULTICORE"));
564 multicore_status_button->setCheckable(true);
565 multicore_status_button->setChecked(Settings::values.use_multi_core);
566 statusBar()->insertPermanentWidget(0, multicore_status_button);
531 statusBar()->insertPermanentWidget(0, async_status_button); 567 statusBar()->insertPermanentWidget(0, async_status_button);
532 568
533 // Setup Renderer API button 569 // Setup Renderer API button
@@ -653,6 +689,11 @@ void GMainWindow::InitializeHotkeys() {
653 ui.action_Capture_Screenshot->setShortcutContext( 689 ui.action_Capture_Screenshot->setShortcutContext(
654 hotkey_registry.GetShortcutContext(main_window, capture_screenshot)); 690 hotkey_registry.GetShortcutContext(main_window, capture_screenshot));
655 691
692 ui.action_Fullscreen->setShortcut(
693 hotkey_registry.GetHotkey(main_window, fullscreen, this)->key());
694 ui.action_Fullscreen->setShortcutContext(
695 hotkey_registry.GetShortcutContext(main_window, fullscreen));
696
656 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this), 697 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),
657 &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile); 698 &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);
658 connect( 699 connect(
@@ -689,10 +730,7 @@ void GMainWindow::InitializeHotkeys() {
689 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 730 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
690 UpdateStatusBar(); 731 UpdateStatusBar();
691 }); 732 });
692 // TODO: Remove this comment/static whenever the next major release of 733 constexpr u16 SPEED_LIMIT_STEP = 5;
693 // MSVC occurs and we make it a requirement (see:
694 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
695 static constexpr u16 SPEED_LIMIT_STEP = 5;
696 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), 734 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
697 &QShortcut::activated, this, [&] { 735 &QShortcut::activated, this, [&] {
698 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 736 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -726,6 +764,9 @@ void GMainWindow::InitializeHotkeys() {
726 Settings::values.use_docked_mode); 764 Settings::values.use_docked_mode);
727 dock_status_button->setChecked(Settings::values.use_docked_mode); 765 dock_status_button->setChecked(Settings::values.use_docked_mode);
728 }); 766 });
767 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
768 &QShortcut::activated, this,
769 [] { Settings::values.audio_muted = !Settings::values.audio_muted; });
729} 770}
730 771
731void GMainWindow::SetDefaultUIGeometry() { 772void GMainWindow::SetDefaultUIGeometry() {
@@ -826,6 +867,10 @@ void GMainWindow::ConnectMenuEvents() {
826 connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame); 867 connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame);
827 connect(ui.action_Report_Compatibility, &QAction::triggered, this, 868 connect(ui.action_Report_Compatibility, &QAction::triggered, this,
828 &GMainWindow::OnMenuReportCompatibility); 869 &GMainWindow::OnMenuReportCompatibility);
870 connect(ui.action_Open_Mods_Page, &QAction::triggered, this, &GMainWindow::OnOpenModsPage);
871 connect(ui.action_Open_Quickstart_Guide, &QAction::triggered, this,
872 &GMainWindow::OnOpenQuickstartGuide);
873 connect(ui.action_Open_FAQ, &QAction::triggered, this, &GMainWindow::OnOpenFAQ);
829 connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); }); 874 connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); });
830 connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure); 875 connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure);
831 876
@@ -839,10 +884,6 @@ void GMainWindow::ConnectMenuEvents() {
839 connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize); 884 connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize);
840 885
841 // Fullscreen 886 // Fullscreen
842 ui.action_Fullscreen->setShortcut(
843 hotkey_registry
844 .GetHotkey(QStringLiteral("Main Window"), QStringLiteral("Fullscreen"), this)
845 ->key());
846 connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen); 887 connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen);
847 888
848 // Movie 889 // Movie
@@ -910,6 +951,8 @@ bool GMainWindow::LoadROM(const QString& filename) {
910 nullptr, // E-Commerce 951 nullptr, // E-Commerce
911 }); 952 });
912 953
954 system.RegisterHostThread();
955
913 const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())}; 956 const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())};
914 957
915 const auto drd_callout = 958 const auto drd_callout =
@@ -1026,6 +1069,7 @@ void GMainWindow::BootGame(const QString& filename) {
1026 } 1069 }
1027 status_bar_update_timer.start(2000); 1070 status_bar_update_timer.start(2000);
1028 async_status_button->setDisabled(true); 1071 async_status_button->setDisabled(true);
1072 multicore_status_button->setDisabled(true);
1029 renderer_status_button->setDisabled(true); 1073 renderer_status_button->setDisabled(true);
1030 1074
1031 if (UISettings::values.hide_mouse) { 1075 if (UISettings::values.hide_mouse) {
@@ -1115,6 +1159,7 @@ void GMainWindow::ShutdownGame() {
1115 game_fps_label->setVisible(false); 1159 game_fps_label->setVisible(false);
1116 emu_frametime_label->setVisible(false); 1160 emu_frametime_label->setVisible(false);
1117 async_status_button->setEnabled(true); 1161 async_status_button->setEnabled(true);
1162 multicore_status_button->setEnabled(true);
1118#ifdef HAS_VULKAN 1163#ifdef HAS_VULKAN
1119 renderer_status_button->setEnabled(true); 1164 renderer_status_button->setEnabled(true);
1120#endif 1165#endif
@@ -1799,6 +1844,26 @@ void GMainWindow::OnMenuReportCompatibility() {
1799 } 1844 }
1800} 1845}
1801 1846
1847void GMainWindow::OpenURL(const QUrl& url) {
1848 const bool open = QDesktopServices::openUrl(url);
1849 if (!open) {
1850 QMessageBox::warning(this, tr("Error opening URL"),
1851 tr("Unable to open the URL \"%1\".").arg(url.toString()));
1852 }
1853}
1854
1855void GMainWindow::OnOpenModsPage() {
1856 OpenURL(QUrl(QStringLiteral("https://github.com/yuzu-emu/yuzu/wiki/Switch-Mods")));
1857}
1858
1859void GMainWindow::OnOpenQuickstartGuide() {
1860 OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/help/quickstart/")));
1861}
1862
1863void GMainWindow::OnOpenFAQ() {
1864 OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/wiki/faq/")));
1865}
1866
1802void GMainWindow::ToggleFullscreen() { 1867void GMainWindow::ToggleFullscreen() {
1803 if (!emulation_running) { 1868 if (!emulation_running) {
1804 return; 1869 return;
@@ -1910,7 +1975,11 @@ void GMainWindow::OnConfigure() {
1910 } 1975 }
1911 1976
1912 dock_status_button->setChecked(Settings::values.use_docked_mode); 1977 dock_status_button->setChecked(Settings::values.use_docked_mode);
1978 multicore_status_button->setChecked(Settings::values.use_multi_core);
1979 Settings::values.use_asynchronous_gpu_emulation =
1980 Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
1913 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); 1981 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
1982
1914#ifdef HAS_VULKAN 1983#ifdef HAS_VULKAN
1915 renderer_status_button->setChecked(Settings::values.renderer_backend == 1984 renderer_status_button->setChecked(Settings::values.renderer_backend ==
1916 Settings::RendererBackend::Vulkan); 1985 Settings::RendererBackend::Vulkan);
@@ -2038,7 +2107,7 @@ void GMainWindow::UpdateStatusBar() {
2038 game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0)); 2107 game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0));
2039 emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); 2108 emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2));
2040 2109
2041 emu_speed_label->setVisible(true); 2110 emu_speed_label->setVisible(!Settings::values.use_multi_core);
2042 game_fps_label->setVisible(true); 2111 game_fps_label->setVisible(true);
2043 emu_frametime_label->setVisible(true); 2112 emu_frametime_label->setVisible(true);
2044} 2113}
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 9ad61cab8..66c84e5c0 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -181,6 +181,9 @@ private slots:
181 void OnPauseGame(); 181 void OnPauseGame();
182 void OnStopGame(); 182 void OnStopGame();
183 void OnMenuReportCompatibility(); 183 void OnMenuReportCompatibility();
184 void OnOpenModsPage();
185 void OnOpenQuickstartGuide();
186 void OnOpenFAQ();
184 /// Called whenever a user selects a game in the game list widget. 187 /// Called whenever a user selects a game in the game list widget.
185 void OnGameListLoadFile(QString game_path); 188 void OnGameListLoadFile(QString game_path);
186 void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path); 189 void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path);
@@ -220,6 +223,7 @@ private:
220 void UpdateStatusBar(); 223 void UpdateStatusBar();
221 void HideMouseCursor(); 224 void HideMouseCursor();
222 void ShowMouseCursor(); 225 void ShowMouseCursor();
226 void OpenURL(const QUrl& url);
223 227
224 Ui::MainWindow ui; 228 Ui::MainWindow ui;
225 229
@@ -235,6 +239,7 @@ private:
235 QLabel* game_fps_label = nullptr; 239 QLabel* game_fps_label = nullptr;
236 QLabel* emu_frametime_label = nullptr; 240 QLabel* emu_frametime_label = nullptr;
237 QPushButton* async_status_button = nullptr; 241 QPushButton* async_status_button = nullptr;
242 QPushButton* multicore_status_button = nullptr;
238 QPushButton* renderer_status_button = nullptr; 243 QPushButton* renderer_status_button = nullptr;
239 QPushButton* dock_status_button = nullptr; 244 QPushButton* dock_status_button = nullptr;
240 QTimer status_bar_update_timer; 245 QTimer status_bar_update_timer;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 97c90f50b..bee6e107e 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -113,6 +113,9 @@
113 <string>&amp;Help</string> 113 <string>&amp;Help</string>
114 </property> 114 </property>
115 <addaction name="action_Report_Compatibility"/> 115 <addaction name="action_Report_Compatibility"/>
116 <addaction name="action_Open_Mods_Page"/>
117 <addaction name="action_Open_Quickstart_Guide"/>
118 <addaction name="action_Open_FAQ"/>
116 <addaction name="separator"/> 119 <addaction name="separator"/>
117 <addaction name="action_About"/> 120 <addaction name="action_About"/>
118 </widget> 121 </widget>
@@ -256,6 +259,21 @@
256 <bool>false</bool> 259 <bool>false</bool>
257 </property> 260 </property>
258 </action> 261 </action>
262 <action name="action_Open_Mods_Page">
263 <property name="text">
264 <string>Open Mods Page</string>
265 </property>
266 </action>
267 <action name="action_Open_Quickstart_Guide">
268 <property name="text">
269 <string>Open Quickstart Guide</string>
270 </property>
271 </action>
272 <action name="action_Open_FAQ">
273 <property name="text">
274 <string>FAQ</string>
275 </property>
276 </action>
259 <action name="action_Open_yuzu_Folder"> 277 <action name="action_Open_yuzu_Folder">
260 <property name="text"> 278 <property name="text">
261 <string>Open yuzu Folder</string> 279 <string>Open yuzu Folder</string>
diff --git a/src/yuzu/yuzu.rc b/src/yuzu/yuzu.rc
index 1b253653f..4a3645a71 100644
--- a/src/yuzu/yuzu.rc
+++ b/src/yuzu/yuzu.rc
@@ -16,4 +16,4 @@ IDI_ICON1 ICON "../../dist/yuzu.ico"
16// RT_MANIFEST 16// RT_MANIFEST
17// 17//
18 18
191 RT_MANIFEST "../../dist/yuzu.manifest" 190 RT_MANIFEST "../../dist/yuzu.manifest"
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7240270f5..659b9f701 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -380,8 +380,6 @@ void Config::ReadValues() {
380 Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); 380 Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
381 Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); 381 Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
382 382
383 Settings::values.resolution_factor =
384 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
385 Settings::values.aspect_ratio = 383 Settings::values.aspect_ratio =
386 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 384 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
387 Settings::values.max_anisotropy = 385 Settings::values.max_anisotropy =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6f53e9659..45c07ed5d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -117,11 +117,6 @@ use_hw_renderer =
117# 0: Interpreter (slow), 1 (default): JIT (fast) 117# 0: Interpreter (slow), 1 (default): JIT (fast)
118use_shader_jit = 118use_shader_jit =
119 119
120# Resolution scale factor
121# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
122# factor for the Switch resolution
123resolution_factor =
124
125# Aspect ratio 120# Aspect ratio
126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 121# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
127aspect_ratio = 122aspect_ratio =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 4d2ea7e9e..e6c6a839d 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <chrono>
5#include <iostream> 6#include <iostream>
6#include <memory> 7#include <memory>
7#include <string> 8#include <string>
@@ -236,9 +237,11 @@ int main(int argc, char** argv) {
236 system.Renderer().Rasterizer().LoadDiskResources(); 237 system.Renderer().Rasterizer().LoadDiskResources();
237 238
238 std::thread render_thread([&emu_window] { emu_window->Present(); }); 239 std::thread render_thread([&emu_window] { emu_window->Present(); });
240 system.Run();
239 while (emu_window->IsOpen()) { 241 while (emu_window->IsOpen()) {
240 system.RunLoop(); 242 std::this_thread::sleep_for(std::chrono::milliseconds(1));
241 } 243 }
244 system.Pause();
242 render_thread.join(); 245 render_thread.join();
243 246
244 system.Shutdown(); 247 system.Shutdown();
diff --git a/src/yuzu_cmd/yuzu.rc b/src/yuzu_cmd/yuzu.rc
index 7de8ef3d9..0cde75e2f 100644
--- a/src/yuzu_cmd/yuzu.rc
+++ b/src/yuzu_cmd/yuzu.rc
@@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico"
14// RT_MANIFEST 14// RT_MANIFEST
15// 15//
16 16
171 RT_MANIFEST "../../dist/yuzu.manifest" 170 RT_MANIFEST "../../dist/yuzu.manifest"
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 3be58b15d..1566c2e3f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -116,8 +116,6 @@ void Config::ReadValues() {
116 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 116 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
117 117
118 // Renderer 118 // Renderer
119 Settings::values.resolution_factor =
120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
121 Settings::values.aspect_ratio = 119 Settings::values.aspect_ratio =
122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 120 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
123 Settings::values.max_anisotropy = 121 Settings::values.max_anisotropy =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index ca203b64d..41bbbbf60 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -21,11 +21,6 @@ use_hw_renderer =
21# 0: Interpreter (slow), 1 (default): JIT (fast) 21# 0: Interpreter (slow), 1 (default): JIT (fast)
22use_shader_jit = 22use_shader_jit =
23 23
24# Resolution scale factor
25# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
26# factor for the Switch resolution
27resolution_factor =
28
29# Aspect ratio 24# Aspect ratio
30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 25# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
31aspect_ratio = 26aspect_ratio =
diff --git a/src/yuzu_tester/service/yuzutest.cpp b/src/yuzu_tester/service/yuzutest.cpp
index 85d3f436b..2d3f6e3a7 100644
--- a/src/yuzu_tester/service/yuzutest.cpp
+++ b/src/yuzu_tester/service/yuzutest.cpp
@@ -53,7 +53,7 @@ private:
53 53
54 IPC::ResponseBuilder rb{ctx, 3}; 54 IPC::ResponseBuilder rb{ctx, 3};
55 rb.Push(RESULT_SUCCESS); 55 rb.Push(RESULT_SUCCESS);
56 rb.Push<u32>(write_size); 56 rb.Push<u32>(static_cast<u32>(write_size));
57 } 57 }
58 58
59 void StartIndividual(Kernel::HLERequestContext& ctx) { 59 void StartIndividual(Kernel::HLERequestContext& ctx) {
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index 676e70ebd..083667baf 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <chrono>
5#include <iostream> 6#include <iostream>
6#include <memory> 7#include <memory>
7#include <string> 8#include <string>
@@ -255,9 +256,11 @@ int main(int argc, char** argv) {
255 system.GPU().Start(); 256 system.GPU().Start();
256 system.Renderer().Rasterizer().LoadDiskResources(); 257 system.Renderer().Rasterizer().LoadDiskResources();
257 258
259 system.Run();
258 while (!finished) { 260 while (!finished) {
259 system.RunLoop(); 261 std::this_thread::sleep_for(std::chrono::milliseconds(1));
260 } 262 }
263 system.Pause();
261 264
262 detached_tasks.WaitForAllTasks(); 265 detached_tasks.WaitForAllTasks();
263 return return_value; 266 return return_value;
diff --git a/src/yuzu_tester/yuzu.rc b/src/yuzu_tester/yuzu.rc
index 7de8ef3d9..0cde75e2f 100644
--- a/src/yuzu_tester/yuzu.rc
+++ b/src/yuzu_tester/yuzu.rc
@@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico"
14// RT_MANIFEST 14// RT_MANIFEST
15// 15//
16 16
171 RT_MANIFEST "../../dist/yuzu.manifest" 170 RT_MANIFEST "../../dist/yuzu.manifest"