summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt61
-rw-r--r--src/audio_core/audio_out.cpp5
-rw-r--r--src/audio_core/audio_out.h8
-rw-r--r--src/audio_core/audio_renderer.cpp7
-rw-r--r--src/audio_core/audio_renderer.h20
-rw-r--r--src/audio_core/buffer.h2
-rw-r--r--src/audio_core/codec.cpp4
-rw-r--r--src/audio_core/cubeb_sink.cpp23
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/audio_core/stream.cpp16
-rw-r--r--src/audio_core/stream.h30
-rw-r--r--src/common/CMakeLists.txt11
-rw-r--r--src/common/bit_field.h28
-rw-r--r--src/common/bit_util.h39
-rw-r--r--src/common/color.h40
-rw-r--r--src/common/common_types.h7
-rw-r--r--src/common/detached_tasks.cpp8
-rw-r--r--src/common/logging/backend.cpp76
-rw-r--r--src/common/logging/backend.h5
-rw-r--r--src/common/logging/log.h1
-rw-r--r--src/common/lz4_compression.cpp78
-rw-r--r--src/common/lz4_compression.h55
-rw-r--r--src/common/math_util.h4
-rw-r--r--src/common/memory_hook.cpp (renamed from src/core/memory_hook.cpp)6
-rw-r--r--src/common/memory_hook.h (renamed from src/core/memory_hook.h)4
-rw-r--r--src/common/multi_level_queue.h337
-rw-r--r--src/common/page_table.cpp31
-rw-r--r--src/common/page_table.h84
-rw-r--r--src/common/quaternion.h10
-rw-r--r--src/common/swap.h180
-rw-r--r--src/common/thread.cpp37
-rw-r--r--src/common/thread.h14
-rw-r--r--src/common/thread_queue_list.h6
-rw-r--r--src/common/threadsafe_queue.h55
-rw-r--r--src/common/uint128.cpp45
-rw-r--r--src/common/uint128.h19
-rw-r--r--src/common/vector_math.h4
-rw-r--r--src/core/CMakeLists.txt19
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp15
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h12
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h8
-rw-r--r--src/core/core.cpp60
-rw-r--r--src/core/core.h18
-rw-r--r--src/core/core_cpu.cpp24
-rw-r--r--src/core/core_cpu.h12
-rw-r--r--src/core/core_timing.cpp201
-rw-r--r--src/core/core_timing.h215
-rw-r--r--src/core/core_timing_util.cpp10
-rw-r--r--src/core/core_timing_util.h7
-rw-r--r--src/core/cpu_core_manager.cpp2
-rw-r--r--src/core/crypto/key_manager.cpp3
-rw-r--r--src/core/file_sys/cheat_engine.cpp492
-rw-r--r--src/core/file_sys/cheat_engine.h234
-rw-r--r--src/core/file_sys/content_archive.h15
-rw-r--r--src/core/file_sys/errors.h3
-rw-r--r--src/core/file_sys/patch_manager.cpp79
-rw-r--r--src/core/file_sys/patch_manager.h9
-rw-r--r--src/core/file_sys/registered_cache.cpp2
-rw-r--r--src/core/file_sys/system_archive/system_archive.cpp3
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp52
-rw-r--r--src/core/file_sys/system_archive/system_version.h16
-rw-r--r--src/core/file_sys/vfs_vector.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp8
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/frontend/framebuffer_layout.cpp12
-rw-r--r--src/core/frontend/framebuffer_layout.h2
-rw-r--r--src/core/frontend/input.h2
-rw-r--r--src/core/hle/ipc.h48
-rw-r--r--src/core/hle/ipc_helpers.h50
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp187
-rw-r--r--src/core/hle/kernel/address_arbiter.h80
-rw-r--r--src/core/hle/kernel/client_port.cpp9
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h9
-rw-r--r--src/core/hle/kernel/code_set.cpp12
-rw-r--r--src/core/hle/kernel/code_set.h89
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/handle_table.cpp40
-rw-r--r--src/core/hle/kernel/handle_table.h25
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/kernel.cpp27
-rw-r--r--src/core/hle/kernel/kernel.h25
-rw-r--r--src/core/hle/kernel/mutex.cpp35
-rw-r--r--src/core/hle/kernel/mutex.h20
-rw-r--r--src/core/hle/kernel/object.cpp2
-rw-r--r--src/core/hle/kernel/object.h2
-rw-r--r--src/core/hle/kernel/process.cpp75
-rw-r--r--src/core/hle/kernel/process.h121
-rw-r--r--src/core/hle/kernel/process_capability.cpp4
-rw-r--r--src/core/hle/kernel/process_capability.h4
-rw-r--r--src/core/hle/kernel/readable_event.cpp2
-rw-r--r--src/core/hle/kernel/readable_event.h2
-rw-r--r--src/core/hle/kernel/resource_limit.cpp7
-rw-r--r--src/core/hle/kernel/resource_limit.h11
-rw-r--r--src/core/hle/kernel/scheduler.cpp80
-rw-r--r--src/core/hle/kernel/scheduler.h12
-rw-r--r--src/core/hle/kernel/server_port.cpp6
-rw-r--r--src/core/hle/kernel/server_port.h35
-rw-r--r--src/core/hle/kernel/server_session.cpp93
-rw-r--r--src/core/hle/kernel/server_session.h55
-rw-r--r--src/core/hle/kernel/shared_memory.cpp16
-rw-r--r--src/core/hle/kernel/shared_memory.h10
-rw-r--r--src/core/hle/kernel/svc.cpp361
-rw-r--r--src/core/hle/kernel/svc_wrap.h8
-rw-r--r--src/core/hle/kernel/thread.cpp113
-rw-r--r--src/core/hle/kernel/thread.h32
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp73
-rw-r--r--src/core/hle/kernel/transfer_memory.h91
-rw-r--r--src/core/hle/kernel/vm_manager.cpp145
-rw-r--r--src/core/hle/kernel/vm_manager.h93
-rw-r--r--src/core/hle/kernel/wait_object.h2
-rw-r--r--src/core/hle/result.h19
-rw-r--r--src/core/hle/service/am/am.cpp170
-rw-r--r--src/core/hle/service/am/am.h31
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/audio/audout_u.cpp27
-rw-r--r--src/core/hle/service/audio/audren_u.cpp73
-rw-r--r--src/core/hle/service/audio/audren_u.h3
-rw-r--r--src/core/hle/service/audio/errors.h15
-rw-r--r--src/core/hle/service/audio/hwopus.cpp206
-rw-r--r--src/core/hle/service/fatal/fatal.cpp89
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp5
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h1
-rw-r--r--src/core/hle/service/hid/controllers/controller_base.h7
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.h32
-rw-r--r--src/core/hle/service/hid/controllers/gesture.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/gesture.h2
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.h2
-rw-r--r--src/core/hle/service/hid/controllers/mouse.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/mouse.h2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/npad.h104
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.h2
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.cpp7
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.h6
-rw-r--r--src/core/hle/service/hid/controllers/xpad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/xpad.h2
-rw-r--r--src/core/hle/service/hid/hid.cpp21
-rw-r--r--src/core/hle/service/hid/hid.h11
-rw-r--r--src/core/hle/service/hid/irs.cpp2
-rw-r--r--src/core/hle/service/ldr/ldr.cpp8
-rw-r--r--src/core/hle/service/lm/lm.cpp2
-rw-r--r--src/core/hle/service/nfc/nfc.cpp2
-rw-r--r--src/core/hle/service/nfp/nfp.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp17
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp114
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h64
-rw-r--r--src/core/hle/service/service.cpp15
-rw-r--r--src/core/hle/service/service.h15
-rw-r--r--src/core/hle/service/set/set_sys.cpp79
-rw-r--r--src/core/hle/service/set/set_sys.h2
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/sm/sm.h2
-rw-r--r--src/core/hle/service/time/time.cpp9
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp71
-rw-r--r--src/core/hle/service/vi/display/vi_display.h98
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.cpp13
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.h52
-rw-r--r--src/core/hle/service/vi/vi.cpp67
-rw-r--r--src/core/hle/service/vi/vi.h40
-rw-r--r--src/core/hle/service/vi/vi_m.cpp12
-rw-r--r--src/core/hle/service/vi/vi_m.h19
-rw-r--r--src/core/hle/service/vi/vi_s.cpp12
-rw-r--r--src/core/hle/service/vi/vi_s.h19
-rw-r--r--src/core/hle/service/vi/vi_u.cpp12
-rw-r--r--src/core/hle/service/vi/vi_u.h19
-rw-r--r--src/core/loader/elf.cpp3
-rw-r--r--src/core/loader/linker.cpp147
-rw-r--r--src/core/loader/linker.h36
-rw-r--r--src/core/loader/nro.cpp3
-rw-r--r--src/core/loader/nro.h4
-rw-r--r--src/core/loader/nso.cpp117
-rw-r--r--src/core/loader/nso.h43
-rw-r--r--src/core/memory.cpp241
-rw-r--r--src/core/memory.h83
-rw-r--r--src/core/memory_setup.h19
-rw-r--r--src/core/perf_stats.cpp10
-rw-r--r--src/core/settings.cpp4
-rw-r--r--src/core/settings.h2
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/input_common/CMakeLists.txt15
-rw-r--r--src/input_common/keyboard.cpp8
-rw-r--r--src/input_common/main.cpp23
-rw-r--r--src/input_common/main.h2
-rw-r--r--src/input_common/motion_emu.cpp38
-rw-r--r--src/input_common/sdl/sdl.cpp636
-rw-r--r--src/input_common/sdl/sdl.h53
-rw-r--r--src/input_common/sdl/sdl_impl.cpp671
-rw-r--r--src/input_common/sdl/sdl_impl.h63
-rw-r--r--src/tests/CMakeLists.txt3
-rw-r--r--src/tests/common/bit_field.cpp90
-rw-r--r--src/tests/common/bit_utils.cpp23
-rw-r--r--src/tests/common/multi_level_queue.cpp55
-rw-r--r--src/tests/core/arm/arm_test_common.cpp11
-rw-r--r--src/tests/core/arm/arm_test_common.h8
-rw-r--r--src/tests/core/core_timing.cpp220
-rw-r--r--src/video_core/CMakeLists.txt41
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.h4
-rw-r--r--src/video_core/dma_pusher.cpp54
-rw-r--r--src/video_core/dma_pusher.h6
-rw-r--r--src/video_core/engines/fermi_2d.cpp69
-rw-r--r--src/video_core/engines/fermi_2d.h31
-rw-r--r--src/video_core/engines/kepler_compute.cpp33
-rw-r--r--src/video_core/engines/kepler_compute.h (renamed from src/video_core/engines/maxwell_compute.h)34
-rw-r--r--src/video_core/engines/kepler_memory.cpp22
-rw-r--r--src/video_core/engines/kepler_memory.h10
-rw-r--r--src/video_core/engines/maxwell_3d.cpp120
-rw-r--r--src/video_core/engines/maxwell_3d.h26
-rw-r--r--src/video_core/engines/maxwell_compute.cpp28
-rw-r--r--src/video_core/engines/maxwell_dma.cpp46
-rw-r--r--src/video_core/engines/maxwell_dma.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h34
-rw-r--r--src/video_core/engines/shader_header.h41
-rw-r--r--src/video_core/gpu.cpp38
-rw-r--r--src/video_core/gpu.h88
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp98
-rw-r--r--src/video_core/gpu_thread.h182
-rw-r--r--src/video_core/memory_manager.cpp477
-rw-r--r--src/video_core/memory_manager.h163
-rw-r--r--src/video_core/morton.cpp324
-rw-r--r--src/video_core/morton.h6
-rw-r--r--src/video_core/rasterizer_cache.h92
-rw-r--r--src/video_core/rasterizer_interface.h13
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp30
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp267
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp559
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h171
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp208
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp48
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h11
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp148
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp50
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/declarations.h45
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp122
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h103
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp238
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h116
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp252
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp285
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h180
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h56
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h69
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp210
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h92
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp6
-rw-r--r--src/video_core/shader/decode/memory.cpp534
-rw-r--r--src/video_core/shader/decode/other.cpp15
-rw-r--r--src/video_core/shader/decode/texture.cpp534
-rw-r--r--src/video_core/shader/shader_ir.h25
-rw-r--r--src/video_core/shader/track.cpp10
-rw-r--r--src/video_core/surface.cpp4
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp38
-rw-r--r--src/video_core/textures/decoders.h31
-rw-r--r--src/video_core/textures/texture.h83
-rw-r--r--src/web_service/verify_login.h2
-rw-r--r--src/web_service/web_backend.cpp5
-rw-r--r--src/yuzu/applets/profile_select.cpp7
-rw-r--r--src/yuzu/applets/software_keyboard.cpp18
-rw-r--r--src/yuzu/applets/web_browser.cpp6
-rw-r--r--src/yuzu/bootmanager.cpp17
-rw-r--r--src/yuzu/bootmanager.h2
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/config.cpp444
-rw-r--r--src/yuzu/configuration/config.h5
-rw-r--r--src/yuzu/configuration/configure_general.cpp2
-rw-r--r--src/yuzu/configuration/configure_general.ui20
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp9
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.h2
-rw-r--r--src/yuzu/debugger/profiler.cpp1
-rw-r--r--src/yuzu/debugger/profiler.h9
-rw-r--r--src/yuzu/debugger/wait_tree.cpp12
-rw-r--r--src/yuzu/debugger/wait_tree.h3
-rw-r--r--src/yuzu/main.cpp20
-rw-r--r--src/yuzu_cmd/config.cpp14
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp13
-rw-r--r--src/yuzu_cmd/yuzu.cpp6
322 files changed, 12773 insertions, 5418 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f69d00a2b..6c99dd5e2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,18 +1,79 @@
1# Enable modules to include each other's files 1# Enable modules to include each other's files
2include_directories(.) 2include_directories(.)
3 3
4# CMake seems to only define _DEBUG on Windows
5set_property(DIRECTORY APPEND PROPERTY
6 COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
7
8# Set compilation flags
9if (MSVC)
10 set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
11
12 # Silence "deprecation" warnings
13 add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
14
15 # Avoid windows.h junk
16 add_definitions(-DNOMINMAX)
17
18 # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
19 add_definitions(-DWIN32_LEAN_AND_MEAN)
20
21 # /W3 - Level 3 warnings
22 # /MP - Multi-threaded compilation
23 # /Zi - Output debugging information
24 # /Zo - enhanced debug info for optimized builds
25 # /permissive- - enables stricter C++ standards conformance checks
26 # /EHsc - C++-only exception handling semantics
27 # /Zc:throwingNew - let codegen assume `operator new` will never return null
28 # /Zc:inline - let codegen omit inline functions in object files
29 add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
30
31 # /GS- - No stack buffer overflow checks
32 add_compile_options("$<$<CONFIG:Release>:/GS->")
33
34 set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
35 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
36else()
37 add_compile_options("-Wno-attributes")
38
39 if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
40 add_compile_options("-stdlib=libc++")
41 endif()
42
43 # Set file offset size to 64 bits.
44 #
45 # On modern Unixes, this is typically already the case. The lone exception is
46 # glibc, which may default to 32 bits. glibc allows this to be configured
47 # by setting _FILE_OFFSET_BITS.
48 if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
49 add_definitions(-D_FILE_OFFSET_BITS=64)
50 endif()
51
52 if (MINGW)
53 add_definitions(-DMINGW_HAS_SECURE_API)
54
55 if (MINGW_STATIC_BUILD)
56 add_definitions(-DQT_STATICPLUGIN)
57 add_compile_options("-static")
58 endif()
59 endif()
60endif()
61
4add_subdirectory(common) 62add_subdirectory(common)
5add_subdirectory(core) 63add_subdirectory(core)
6add_subdirectory(audio_core) 64add_subdirectory(audio_core)
7add_subdirectory(video_core) 65add_subdirectory(video_core)
8add_subdirectory(input_common) 66add_subdirectory(input_common)
9add_subdirectory(tests) 67add_subdirectory(tests)
68
10if (ENABLE_SDL2) 69if (ENABLE_SDL2)
11 add_subdirectory(yuzu_cmd) 70 add_subdirectory(yuzu_cmd)
12endif() 71endif()
72
13if (ENABLE_QT) 73if (ENABLE_QT)
14 add_subdirectory(yuzu) 74 add_subdirectory(yuzu)
15endif() 75endif()
76
16if (ENABLE_WEB_SERVICE) 77if (ENABLE_WEB_SERVICE)
17 add_subdirectory(web_service) 78 add_subdirectory(web_service)
18endif() 79endif()
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 50d2a1ed3..8619a3f03 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
26 return {}; 26 return {};
27} 27}
28 28
29StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, 29StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
30 u32 num_channels, std::string&& name,
30 Stream::ReleaseCallback&& release_callback) { 31 Stream::ReleaseCallback&& release_callback) {
31 if (!sink) { 32 if (!sink) {
32 sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id); 33 sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
33 } 34 }
34 35
35 return std::make_shared<Stream>( 36 return std::make_shared<Stream>(
36 sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback), 37 core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
37 sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name)); 38 sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
38} 39}
39 40
diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h
index df9607ac7..b07588287 100644
--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
13#include "audio_core/stream.h" 13#include "audio_core/stream.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15 15
16namespace Core::Timing {
17class CoreTiming;
18}
19
16namespace AudioCore { 20namespace AudioCore {
17 21
18/** 22/**
@@ -21,8 +25,8 @@ namespace AudioCore {
21class AudioOut { 25class AudioOut {
22public: 26public:
23 /// Opens a new audio stream 27 /// Opens a new audio stream
24 StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, 28 StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
25 Stream::ReleaseCallback&& release_callback); 29 std::string&& name, Stream::ReleaseCallback&& release_callback);
26 30
27 /// Returns a vector of recently released buffers specified by tag for the specified stream 31 /// Returns a vector of recently released buffers specified by tag for the specified stream
28 std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count); 32 std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 00c026511..9a0939883 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/codec.h" 8#include "audio_core/codec.h"
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "core/core.h"
11#include "core/hle/kernel/writable_event.h" 12#include "core/hle/kernel/writable_event.h"
12#include "core/memory.h" 13#include "core/memory.h"
13 14
@@ -71,14 +72,14 @@ private:
71 EffectOutStatus out_status{}; 72 EffectOutStatus out_status{};
72 EffectInStatus info{}; 73 EffectInStatus info{};
73}; 74};
74AudioRenderer::AudioRenderer(AudioRendererParameter params, 75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
75 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) 76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
76 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 77 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
77 effects(params.effect_count) { 78 effects(params.effect_count) {
78 79
79 audio_out = std::make_unique<AudioCore::AudioOut>(); 80 audio_out = std::make_unique<AudioCore::AudioOut>();
80 stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer", 81 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
81 [=]() { buffer_event->Signal(); }); 82 "AudioRenderer", [=]() { buffer_event->Signal(); });
82 audio_out->StartStream(stream); 83 audio_out->StartStream(stream);
83 84
84 QueueMixedBuffer(0); 85 QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 7826881bf..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
14#include "common/swap.h" 14#include "common/swap.h"
15#include "core/hle/kernel/object.h" 15#include "core/hle/kernel/object.h"
16 16
17namespace Core::Timing {
18class CoreTiming;
19}
20
17namespace Kernel { 21namespace Kernel {
18class WritableEvent; 22class WritableEvent;
19} 23}
@@ -42,16 +46,18 @@ struct AudioRendererParameter {
42 u32_le sample_rate; 46 u32_le sample_rate;
43 u32_le sample_count; 47 u32_le sample_count;
44 u32_le mix_buffer_count; 48 u32_le mix_buffer_count;
45 u32_le unknown_c; 49 u32_le submix_count;
46 u32_le voice_count; 50 u32_le voice_count;
47 u32_le sink_count; 51 u32_le sink_count;
48 u32_le effect_count; 52 u32_le effect_count;
49 u32_le unknown_1c; 53 u32_le performance_frame_count;
50 u8 unknown_20; 54 u8 is_voice_drop_enabled;
51 INSERT_PADDING_BYTES(3); 55 u8 unknown_21;
56 u8 unknown_22;
57 u8 execution_mode;
52 u32_le splitter_count; 58 u32_le splitter_count;
53 u32_le unknown_2c; 59 u32_le num_splitter_send_channels;
54 INSERT_PADDING_WORDS(1); 60 u32_le unknown_30;
55 u32_le revision; 61 u32_le revision;
56}; 62};
57static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); 63static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
@@ -208,7 +214,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
208 214
209class AudioRenderer { 215class AudioRenderer {
210public: 216public:
211 AudioRenderer(AudioRendererParameter params, 217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
212 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); 218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
213 ~AudioRenderer(); 219 ~AudioRenderer();
214 220
diff --git a/src/audio_core/buffer.h b/src/audio_core/buffer.h
index a323b23ec..5ee09e9aa 100644
--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {} 21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
22 22
23 /// Returns the raw audio data for the buffer 23 /// Returns the raw audio data for the buffer
24 std::vector<s16>& Samples() { 24 std::vector<s16>& GetSamples() {
25 return samples; 25 return samples;
26 } 26 }
27 27
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index 454de798b..c5a0d98ce 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
68 } 68 }
69 } 69 }
70 70
71 state.yn1 = yn1; 71 state.yn1 = static_cast<s16>(yn1);
72 state.yn2 = yn2; 72 state.yn2 = static_cast<s16>(yn2);
73 73
74 return ret; 74 return ret;
75} 75}
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 097328901..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _WIN32
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
46 } 50 }
47 } 51 }
48 52
49 ~CubebSinkStream() { 53 ~CubebSinkStream() override {
50 if (!ctx) { 54 if (!ctx) {
51 return; 55 return;
52 } 56 }
@@ -75,11 +79,11 @@ public:
75 queue.Push(samples); 79 queue.Push(samples);
76 } 80 }
77 81
78 std::size_t SamplesInQueue(u32 num_channels) const override { 82 std::size_t SamplesInQueue(u32 channel_count) const override {
79 if (!ctx) 83 if (!ctx)
80 return 0; 84 return 0;
81 85
82 return queue.Size() / num_channels; 86 return queue.Size() / channel_count;
83 } 87 }
84 88
85 void Flush() override { 89 void Flush() override {
@@ -98,7 +102,7 @@ private:
98 u32 num_channels{}; 102 u32 num_channels{};
99 103
100 Common::RingBuffer<s16, 0x10000> queue; 104 Common::RingBuffer<s16, 0x10000> queue;
101 std::array<s16, 2> last_frame; 105 std::array<s16, 2> last_frame{};
102 std::atomic<bool> should_flush{}; 106 std::atomic<bool> should_flush{};
103 TimeStretcher time_stretch; 107 TimeStretcher time_stretch;
104 108
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _WIN32
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ce2d374e..22a3f8c84 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,13 +32,13 @@ u32 Stream::GetNumChannels() const {
32 return {}; 32 return {};
33} 33}
34 34
35Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, 35Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
36 SinkStream& sink_stream, std::string&& name_) 36 ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
37 : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)}, 37 : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
38 sink_stream{sink_stream}, name{std::move(name_)} { 38 sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
39 39
40 release_event = CoreTiming::RegisterEvent( 40 release_event = core_timing.RegisterEvent(
41 name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); }); 41 name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
42} 42}
43 43
44void Stream::Play() { 44void Stream::Play() {
@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {
57 57
58s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { 58s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
59 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; 59 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
60 return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate); 60 return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
61} 61}
62 62
63static void VolumeAdjustSamples(std::vector<s16>& samples) { 63static void VolumeAdjustSamples(std::vector<s16>& samples) {
@@ -95,11 +95,11 @@ void Stream::PlayNextBuffer() {
95 active_buffer = queued_buffers.front(); 95 active_buffer = queued_buffers.front();
96 queued_buffers.pop(); 96 queued_buffers.pop();
97 97
98 VolumeAdjustSamples(active_buffer->Samples()); 98 VolumeAdjustSamples(active_buffer->GetSamples());
99 99
100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
101 101
102 CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {}); 102 core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
103} 103}
104 104
105void Stream::ReleaseActiveBuffer() { 105void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index aebfeb51d..05071243b 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -13,9 +13,10 @@
13#include "audio_core/buffer.h" 13#include "audio_core/buffer.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15 15
16namespace CoreTiming { 16namespace Core::Timing {
17class CoreTiming;
17struct EventType; 18struct EventType;
18} 19} // namespace Core::Timing
19 20
20namespace AudioCore { 21namespace AudioCore {
21 22
@@ -42,8 +43,8 @@ public:
42 /// Callback function type, used to change guest state on a buffer being released 43 /// Callback function type, used to change guest state on a buffer being released
43 using ReleaseCallback = std::function<void()>; 44 using ReleaseCallback = std::function<void()>;
44 45
45 Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, 46 Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
46 SinkStream& sink_stream, std::string&& name_); 47 ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);
47 48
48 /// Plays the audio stream 49 /// Plays the audio stream
49 void Play(); 50 void Play();
@@ -91,16 +92,17 @@ private:
91 /// Gets the number of core cycles when the specified buffer will be released 92 /// Gets the number of core cycles when the specified buffer will be released
92 s64 GetBufferReleaseCycles(const Buffer& buffer) const; 93 s64 GetBufferReleaseCycles(const Buffer& buffer) const;
93 94
94 u32 sample_rate; ///< Sample rate of the stream 95 u32 sample_rate; ///< Sample rate of the stream
95 Format format; ///< Format of the stream 96 Format format; ///< Format of the stream
96 ReleaseCallback release_callback; ///< Buffer release callback for the stream 97 ReleaseCallback release_callback; ///< Buffer release callback for the stream
97 State state{State::Stopped}; ///< Playback state of the stream 98 State state{State::Stopped}; ///< Playback state of the stream
98 CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream 99 Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
99 BufferPtr active_buffer; ///< Actively playing buffer in the stream 100 BufferPtr active_buffer; ///< Actively playing buffer in the stream
100 std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream 101 std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream
101 std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream 102 std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
102 SinkStream& sink_stream; ///< Output sink for the stream 103 SinkStream& sink_stream; ///< Output sink for the stream
103 std::string name; ///< Name of the stream, must be unique 104 Core::Timing::CoreTiming& core_timing; ///< Core timing instance.
105 std::string name; ///< Name of the stream, must be unique
104}; 106};
105 107
106using StreamPtr = std::shared_ptr<Stream>; 108using StreamPtr = std::shared_ptr<Stream>;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bdd885273..5639021d3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp" 51 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
@@ -90,11 +91,18 @@ add_library(common STATIC
90 logging/log.h 91 logging/log.h
91 logging/text_formatter.cpp 92 logging/text_formatter.cpp
92 logging/text_formatter.h 93 logging/text_formatter.h
94 lz4_compression.cpp
95 lz4_compression.h
93 math_util.h 96 math_util.h
97 memory_hook.cpp
98 memory_hook.h
94 microprofile.cpp 99 microprofile.cpp
95 microprofile.h 100 microprofile.h
96 microprofileui.h 101 microprofileui.h
97 misc.cpp 102 misc.cpp
103 multi_level_queue.h
104 page_table.cpp
105 page_table.h
98 param_package.cpp 106 param_package.cpp
99 param_package.h 107 param_package.h
100 quaternion.h 108 quaternion.h
@@ -113,6 +121,8 @@ add_library(common STATIC
113 threadsafe_queue.h 121 threadsafe_queue.h
114 timer.cpp 122 timer.cpp
115 timer.h 123 timer.h
124 uint128.cpp
125 uint128.h
116 vector_math.h 126 vector_math.h
117 web_result.h 127 web_result.h
118) 128)
@@ -128,3 +138,4 @@ endif()
128create_target_directory_groups(common) 138create_target_directory_groups(common)
129 139
130target_link_libraries(common PUBLIC Boost::boost fmt microprofile) 140target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
141target_link_libraries(common PRIVATE lz4_static)
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..902e668e3 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -34,6 +34,7 @@
34#include <limits> 34#include <limits>
35#include <type_traits> 35#include <type_traits>
36#include "common/common_funcs.h" 36#include "common/common_funcs.h"
37#include "common/swap.h"
37 38
38/* 39/*
39 * Abstract bitfield class 40 * Abstract bitfield class
@@ -108,15 +109,9 @@
108 * symptoms. 109 * symptoms.
109 */ 110 */
110#pragma pack(1) 111#pragma pack(1)
111template <std::size_t Position, std::size_t Bits, typename T> 112template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
112struct BitField { 113struct BitField {
113private: 114private:
114 // We hide the copy assigment operator here, because the default copy
115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable.
118 constexpr BitField& operator=(const BitField&) = default;
119
120 // UnderlyingType is T for non-enum types and the underlying type of T if 115 // UnderlyingType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 116 // T is an enumeration. Note that T is wrapped within an enable_if in the
122 // former case to workaround compile errors which arise when using 117 // former case to workaround compile errors which arise when using
@@ -127,6 +122,8 @@ private:
127 // We store the value as the unsigned type to avoid undefined behaviour on value shifting 122 // We store the value as the unsigned type to avoid undefined behaviour on value shifting
128 using StorageType = std::make_unsigned_t<UnderlyingType>; 123 using StorageType = std::make_unsigned_t<UnderlyingType>;
129 124
125 using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
126
130public: 127public:
131 /// Constants to allow limited introspection of fields if needed 128 /// Constants to allow limited introspection of fields if needed
132 static constexpr std::size_t position = Position; 129 static constexpr std::size_t position = Position;
@@ -163,16 +160,20 @@ public:
163 BitField(T val) = delete; 160 BitField(T val) = delete;
164 BitField& operator=(T val) = delete; 161 BitField& operator=(T val) = delete;
165 162
166 // Force default constructor to be created 163 constexpr BitField() noexcept = default;
167 // so that we can use this within unions 164
168 constexpr BitField() = default; 165 constexpr BitField(const BitField&) noexcept = default;
166 constexpr BitField& operator=(const BitField&) noexcept = default;
167
168 constexpr BitField(BitField&&) noexcept = default;
169 constexpr BitField& operator=(BitField&&) noexcept = default;
169 170
170 constexpr FORCE_INLINE operator T() const { 171 constexpr FORCE_INLINE operator T() const {
171 return Value(); 172 return Value();
172 } 173 }
173 174
174 constexpr FORCE_INLINE void Assign(const T& value) { 175 constexpr FORCE_INLINE void Assign(const T& value) {
175 storage = (storage & ~mask) | FormatValue(value); 176 storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
176 } 177 }
177 178
178 constexpr T Value() const { 179 constexpr T Value() const {
@@ -184,7 +185,7 @@ public:
184 } 185 }
185 186
186private: 187private:
187 StorageType storage; 188 StorageTypeWithEndian storage;
188 189
189 static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range"); 190 static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
190 191
@@ -195,3 +196,6 @@ private:
195 static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField"); 196 static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
196}; 197};
197#pragma pack() 198#pragma pack()
199
200template <std::size_t Position, std::size_t Bits, typename T>
201using BitFieldBE = BitField<Position, Bits, T, BETag>;
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 1eea17ba1..a4f9ed4aa 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -58,4 +58,43 @@ inline u64 CountLeadingZeroes64(u64 value) {
58 return __builtin_clzll(value); 58 return __builtin_clzll(value);
59} 59}
60#endif 60#endif
61
62#ifdef _MSC_VER
63inline u32 CountTrailingZeroes32(u32 value) {
64 unsigned long trailing_zero = 0;
65
66 if (_BitScanForward(&trailing_zero, value) != 0) {
67 return trailing_zero;
68 }
69
70 return 32;
71}
72
73inline u64 CountTrailingZeroes64(u64 value) {
74 unsigned long trailing_zero = 0;
75
76 if (_BitScanForward64(&trailing_zero, value) != 0) {
77 return trailing_zero;
78 }
79
80 return 64;
81}
82#else
83inline u32 CountTrailingZeroes32(u32 value) {
84 if (value == 0) {
85 return 32;
86 }
87
88 return __builtin_ctz(value);
89}
90
91inline u64 CountTrailingZeroes64(u64 value) {
92 if (value == 0) {
93 return 64;
94 }
95
96 return __builtin_ctzll(value);
97}
98#endif
99
61} // namespace Common 100} // namespace Common
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
55/** 55/**
56 * Decode a color stored in RGBA8 format 56 * Decode a color stored in RGBA8 format
57 * @param bytes Pointer to encoded source color 57 * @param bytes Pointer to encoded source color
58 * @return Result color decoded as Math::Vec4<u8> 58 * @return Result color decoded as Common::Vec4<u8>
59 */ 59 */
60inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { 60inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
61 return {bytes[3], bytes[2], bytes[1], bytes[0]}; 61 return {bytes[3], bytes[2], bytes[1], bytes[0]};
62} 62}
63 63
64/** 64/**
65 * Decode a color stored in RGB8 format 65 * Decode a color stored in RGB8 format
66 * @param bytes Pointer to encoded source color 66 * @param bytes Pointer to encoded source color
67 * @return Result color decoded as Math::Vec4<u8> 67 * @return Result color decoded as Common::Vec4<u8>
68 */ 68 */
69inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { 69inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
70 return {bytes[2], bytes[1], bytes[0], 255}; 70 return {bytes[2], bytes[1], bytes[0], 255};
71} 71}
72 72
73/** 73/**
74 * Decode a color stored in RG8 (aka HILO8) format 74 * Decode a color stored in RG8 (aka HILO8) format
75 * @param bytes Pointer to encoded source color 75 * @param bytes Pointer to encoded source color
76 * @return Result color decoded as Math::Vec4<u8> 76 * @return Result color decoded as Common::Vec4<u8>
77 */ 77 */
78inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { 78inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
79 return {bytes[1], bytes[0], 0, 255}; 79 return {bytes[1], bytes[0], 0, 255};
80} 80}
81 81
82/** 82/**
83 * Decode a color stored in RGB565 format 83 * Decode a color stored in RGB565 format
84 * @param bytes Pointer to encoded source color 84 * @param bytes Pointer to encoded source color
85 * @return Result color decoded as Math::Vec4<u8> 85 * @return Result color decoded as Common::Vec4<u8>
86 */ 86 */
87inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { 87inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
88 u16_le pixel; 88 u16_le pixel;
89 std::memcpy(&pixel, bytes, sizeof(pixel)); 89 std::memcpy(&pixel, bytes, sizeof(pixel));
90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), 90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
94/** 94/**
95 * Decode a color stored in RGB5A1 format 95 * Decode a color stored in RGB5A1 format
96 * @param bytes Pointer to encoded source color 96 * @param bytes Pointer to encoded source color
97 * @return Result color decoded as Math::Vec4<u8> 97 * @return Result color decoded as Common::Vec4<u8>
98 */ 98 */
99inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { 99inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
100 u16_le pixel; 100 u16_le pixel;
101 std::memcpy(&pixel, bytes, sizeof(pixel)); 101 std::memcpy(&pixel, bytes, sizeof(pixel));
102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), 102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
106/** 106/**
107 * Decode a color stored in RGBA4 format 107 * Decode a color stored in RGBA4 format
108 * @param bytes Pointer to encoded source color 108 * @param bytes Pointer to encoded source color
109 * @return Result color decoded as Math::Vec4<u8> 109 * @return Result color decoded as Common::Vec4<u8>
110 */ 110 */
111inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { 111inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
112 u16_le pixel; 112 u16_le pixel;
113 std::memcpy(&pixel, bytes, sizeof(pixel)); 113 std::memcpy(&pixel, bytes, sizeof(pixel));
114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), 114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
138/** 138/**
139 * Decode a depth value and a stencil value stored in D24S8 format 139 * Decode a depth value and a stencil value stored in D24S8 format
140 * @param bytes Pointer to encoded source values 140 * @param bytes Pointer to encoded source values
141 * @return Resulting values stored as a Math::Vec2 141 * @return Resulting values stored as a Common::Vec2
142 */ 142 */
143inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { 143inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; 144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
145} 145}
146 146
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
149 * @param color Source color to encode 149 * @param color Source color to encode
150 * @param bytes Destination pointer to store encoded color 150 * @param bytes Destination pointer to store encoded color
151 */ 151 */
152inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { 152inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
153 bytes[3] = color.r(); 153 bytes[3] = color.r();
154 bytes[2] = color.g(); 154 bytes[2] = color.g();
155 bytes[1] = color.b(); 155 bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
161 * @param color Source color to encode 161 * @param color Source color to encode
162 * @param bytes Destination pointer to store encoded color 162 * @param bytes Destination pointer to store encoded color
163 */ 163 */
164inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { 164inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
165 bytes[2] = color.r(); 165 bytes[2] = color.r();
166 bytes[1] = color.g(); 166 bytes[1] = color.g();
167 bytes[0] = color.b(); 167 bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
172 * @param color Source color to encode 172 * @param color Source color to encode
173 * @param bytes Destination pointer to store encoded color 173 * @param bytes Destination pointer to store encoded color
174 */ 174 */
175inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { 175inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
176 bytes[1] = color.r(); 176 bytes[1] = color.r();
177 bytes[0] = color.g(); 177 bytes[0] = color.g();
178} 178}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
181 * @param color Source color to encode 181 * @param color Source color to encode
182 * @param bytes Destination pointer to store encoded color 182 * @param bytes Destination pointer to store encoded color
183 */ 183 */
184inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { 184inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
185 const u16_le data = 185 const u16_le data =
186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); 186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
187 187
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
193 * @param color Source color to encode 193 * @param color Source color to encode
194 * @param bytes Destination pointer to store encoded color 194 * @param bytes Destination pointer to store encoded color
195 */ 195 */
196inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { 196inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | 197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); 198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
199 199
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
205 * @param color Source color to encode 205 * @param color Source color to encode
206 * @param bytes Destination pointer to store encoded color 206 * @param bytes Destination pointer to store encoded color
207 */ 207 */
208inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { 208inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | 209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
211 211
diff --git a/src/common/common_types.h b/src/common/common_types.h
index 6b1766dca..4cec89fbd 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
40using f32 = float; ///< 32-bit floating point 40using f32 = float; ///< 32-bit floating point
41using f64 = double; ///< 64-bit floating point 41using f64 = double; ///< 64-bit floating point
42 42
43// TODO: It would be nice to eventually replace these with strong types that prevent accidental 43using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
44// conversion between each other. 44using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
45using VAddr = u64; ///< Represents a pointer in the userspace virtual address space. 45using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
46using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
47 46
48using u128 = std::array<std::uint64_t, 2>; 47using u128 = std::array<std::uint64_t, 2>;
49static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide"); 48static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp
index a347d9e02..f268d6021 100644
--- a/src/common/detached_tasks.cpp
+++ b/src/common/detached_tasks.cpp
@@ -16,22 +16,22 @@ DetachedTasks::DetachedTasks() {
16} 16}
17 17
18void DetachedTasks::WaitForAllTasks() { 18void DetachedTasks::WaitForAllTasks() {
19 std::unique_lock<std::mutex> lock(mutex); 19 std::unique_lock lock{mutex};
20 cv.wait(lock, [this]() { return count == 0; }); 20 cv.wait(lock, [this]() { return count == 0; });
21} 21}
22 22
23DetachedTasks::~DetachedTasks() { 23DetachedTasks::~DetachedTasks() {
24 std::unique_lock<std::mutex> lock(mutex); 24 std::unique_lock lock{mutex};
25 ASSERT(count == 0); 25 ASSERT(count == 0);
26 instance = nullptr; 26 instance = nullptr;
27} 27}
28 28
29void DetachedTasks::AddTask(std::function<void()> task) { 29void DetachedTasks::AddTask(std::function<void()> task) {
30 std::unique_lock<std::mutex> lock(instance->mutex); 30 std::unique_lock lock{instance->mutex};
31 ++instance->count; 31 ++instance->count;
32 std::thread([task{std::move(task)}]() { 32 std::thread([task{std::move(task)}]() {
33 task(); 33 task();
34 std::unique_lock<std::mutex> lock(instance->mutex); 34 std::unique_lock lock{instance->mutex};
35 --instance->count; 35 --instance->count;
36 std::notify_all_at_thread_exit(instance->cv, std::move(lock)); 36 std::notify_all_at_thread_exit(instance->cv, std::move(lock));
37 }) 37 })
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 12f6d0114..a03179520 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,19 +39,19 @@ public:
39 Impl(Impl const&) = delete; 39 Impl(Impl const&) = delete;
40 const Impl& operator=(Impl const&) = delete; 40 const Impl& operator=(Impl const&) = delete;
41 41
42 void PushEntry(Entry e) { 42 void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
43 std::lock_guard<std::mutex> lock(message_mutex); 43 const char* function, std::string message) {
44 message_queue.Push(std::move(e)); 44 message_queue.Push(
45 message_cv.notify_one(); 45 CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
46 } 46 }
47 47
48 void AddBackend(std::unique_ptr<Backend> backend) { 48 void AddBackend(std::unique_ptr<Backend> backend) {
49 std::lock_guard<std::mutex> lock(writing_mutex); 49 std::lock_guard lock{writing_mutex};
50 backends.push_back(std::move(backend)); 50 backends.push_back(std::move(backend));
51 } 51 }
52 52
53 void RemoveBackend(std::string_view backend_name) { 53 void RemoveBackend(std::string_view backend_name) {
54 std::lock_guard<std::mutex> lock(writing_mutex); 54 std::lock_guard lock{writing_mutex};
55 const auto it = 55 const auto it =
56 std::remove_if(backends.begin(), backends.end(), 56 std::remove_if(backends.begin(), backends.end(),
57 [&backend_name](const auto& i) { return backend_name == i->GetName(); }); 57 [&backend_name](const auto& i) { return backend_name == i->GetName(); });
@@ -80,21 +80,19 @@ private:
80 backend_thread = std::thread([&] { 80 backend_thread = std::thread([&] {
81 Entry entry; 81 Entry entry;
82 auto write_logs = [&](Entry& e) { 82 auto write_logs = [&](Entry& e) {
83 std::lock_guard<std::mutex> lock(writing_mutex); 83 std::lock_guard lock{writing_mutex};
84 for (const auto& backend : backends) { 84 for (const auto& backend : backends) {
85 backend->Write(e); 85 backend->Write(e);
86 } 86 }
87 }; 87 };
88 while (true) { 88 while (true) {
89 { 89 entry = message_queue.PopWait();
90 std::unique_lock<std::mutex> lock(message_mutex); 90 if (entry.final_entry) {
91 message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
92 }
93 if (!running) {
94 break; 91 break;
95 } 92 }
96 write_logs(entry); 93 write_logs(entry);
97 } 94 }
95
98 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case 96 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
99 // where a system is repeatedly spamming logs even on close. 97 // where a system is repeatedly spamming logs even on close.
100 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100; 98 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
106 } 104 }
107 105
108 ~Impl() { 106 ~Impl() {
109 running = false; 107 Entry entry;
110 message_cv.notify_one(); 108 entry.final_entry = true;
109 message_queue.Push(entry);
111 backend_thread.join(); 110 backend_thread.join();
112 } 111 }
113 112
114 std::atomic_bool running{true}; 113 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
115 std::mutex message_mutex, writing_mutex; 114 const char* function, std::string message) const {
116 std::condition_variable message_cv; 115 using std::chrono::duration_cast;
116 using std::chrono::steady_clock;
117
118 Entry entry;
119 entry.timestamp =
120 duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
121 entry.log_class = log_class;
122 entry.log_level = log_level;
123 entry.filename = Common::TrimSourcePath(filename);
124 entry.line_num = line_nr;
125 entry.function = function;
126 entry.message = std::move(message);
127
128 return entry;
129 }
130
131 std::mutex writing_mutex;
117 std::thread backend_thread; 132 std::thread backend_thread;
118 std::vector<std::unique_ptr<Backend>> backends; 133 std::vector<std::unique_ptr<Backend>> backends;
119 Common::MPSCQueue<Log::Entry> message_queue; 134 Common::MPSCQueue<Log::Entry> message_queue;
120 Filter filter; 135 Filter filter;
136 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
121}; 137};
122 138
123void ConsoleBackend::Write(const Entry& entry) { 139void ConsoleBackend::Write(const Entry& entry) {
@@ -232,6 +248,7 @@ void DebuggerBackend::Write(const Entry& entry) {
232 CLS(Render) \ 248 CLS(Render) \
233 SUB(Render, Software) \ 249 SUB(Render, Software) \
234 SUB(Render, OpenGL) \ 250 SUB(Render, OpenGL) \
251 SUB(Render, Vulkan) \
235 CLS(Audio) \ 252 CLS(Audio) \
236 SUB(Audio, DSP) \ 253 SUB(Audio, DSP) \
237 SUB(Audio, Sink) \ 254 SUB(Audio, Sink) \
@@ -275,25 +292,6 @@ const char* GetLevelName(Level log_level) {
275#undef LVL 292#undef LVL
276} 293}
277 294
278Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
279 const char* function, std::string message) {
280 using std::chrono::duration_cast;
281 using std::chrono::steady_clock;
282
283 static steady_clock::time_point time_origin = steady_clock::now();
284
285 Entry entry;
286 entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
287 entry.log_class = log_class;
288 entry.log_level = log_level;
289 entry.filename = Common::TrimSourcePath(filename);
290 entry.line_num = line_nr;
291 entry.function = function;
292 entry.message = std::move(message);
293
294 return entry;
295}
296
297void SetGlobalFilter(const Filter& filter) { 295void SetGlobalFilter(const Filter& filter) {
298 Impl::Instance().SetGlobalFilter(filter); 296 Impl::Instance().SetGlobalFilter(filter);
299} 297}
@@ -318,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
318 if (!filter.CheckMessage(log_class, log_level)) 316 if (!filter.CheckMessage(log_class, log_level))
319 return; 317 return;
320 318
321 Entry entry = 319 instance.PushEntry(log_class, log_level, filename, line_num, function,
322 CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); 320 fmt::vformat(format, args));
323
324 instance.PushEntry(std::move(entry));
325} 321}
326} // namespace Log 322} // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index 91bb0c309..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
27 unsigned int line_num; 27 unsigned int line_num;
28 std::string function; 28 std::string function;
29 std::string message; 29 std::string message;
30 bool final_entry = false;
30 31
31 Entry() = default; 32 Entry() = default;
32 Entry(Entry&& o) = default; 33 Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
134 */ 135 */
135const char* GetLevelName(Level log_level); 136const char* GetLevelName(Level log_level);
136 137
137/// Creates a log entry by formatting the given source location, and message.
138Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
139 const char* function, std::string message);
140
141/** 138/**
142 * The global filter will prevent any messages from even being processed if they are filtered. Each 139 * The global filter will prevent any messages from even being processed if they are filtered. Each
143 * backend can have a filter, but if the level is lower than the global filter, the backend will 140 * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index d4ec31ec3..8ed6d5050 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
112 Render, ///< Emulator video output and hardware acceleration 112 Render, ///< Emulator video output and hardware acceleration
113 Render_Software, ///< Software renderer backend 113 Render_Software, ///< Software renderer backend
114 Render_OpenGL, ///< OpenGL backend 114 Render_OpenGL, ///< OpenGL backend
115 Render_Vulkan, ///< Vulkan backend
115 Audio, ///< Audio emulation 116 Audio, ///< Audio emulation
116 Audio_DSP, ///< The HLE implementation of the DSP 117 Audio_DSP, ///< The HLE implementation of the DSP
117 Audio_Sink, ///< Emulator audio output backend 118 Audio_Sink, ///< Emulator audio output backend
diff --git a/src/common/lz4_compression.cpp b/src/common/lz4_compression.cpp
new file mode 100644
index 000000000..dc9b4a916
--- /dev/null
+++ b/src/common/lz4_compression.cpp
@@ -0,0 +1,78 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <lz4hc.h>
9
10#include "common/assert.h"
11#include "common/lz4_compression.h"
12
13namespace Common::Compression {
14
15std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size) {
16 ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
17
18 const auto source_size_int = static_cast<int>(source_size);
19 const int max_compressed_size = LZ4_compressBound(source_size_int);
20 std::vector<u8> compressed(max_compressed_size);
21
22 const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
23 reinterpret_cast<char*>(compressed.data()),
24 source_size_int, max_compressed_size);
25
26 if (compressed_size <= 0) {
27 // Compression failed
28 return {};
29 }
30
31 compressed.resize(compressed_size);
32
33 return compressed;
34}
35
36std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size,
37 s32 compression_level) {
38 ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
39
40 compression_level = std::clamp(compression_level, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
41
42 const auto source_size_int = static_cast<int>(source_size);
43 const int max_compressed_size = LZ4_compressBound(source_size_int);
44 std::vector<u8> compressed(max_compressed_size);
45
46 const int compressed_size = LZ4_compress_HC(
47 reinterpret_cast<const char*>(source), reinterpret_cast<char*>(compressed.data()),
48 source_size_int, max_compressed_size, compression_level);
49
50 if (compressed_size <= 0) {
51 // Compression failed
52 return {};
53 }
54
55 compressed.resize(compressed_size);
56
57 return compressed;
58}
59
60std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size) {
61 return CompressDataLZ4HC(source, source_size, LZ4HC_CLEVEL_MAX);
62}
63
64std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed,
65 std::size_t uncompressed_size) {
66 std::vector<u8> uncompressed(uncompressed_size);
67 const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
68 reinterpret_cast<char*>(uncompressed.data()),
69 static_cast<int>(compressed.size()),
70 static_cast<int>(uncompressed.size()));
71 if (static_cast<int>(uncompressed_size) != size_check) {
72 // Decompression failed
73 return {};
74 }
75 return uncompressed;
76}
77
78} // namespace Common::Compression
diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h
new file mode 100644
index 000000000..fe2231a6c
--- /dev/null
+++ b/src/common/lz4_compression.h
@@ -0,0 +1,55 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_types.h"
8
9namespace Common::Compression {
10
11/**
12 * Compresses a source memory region with LZ4 and returns the compressed data in a vector.
13 *
14 * @param source the uncompressed source memory region.
15 * @param source_size the size in bytes of the uncompressed source memory region.
16 *
17 * @return the compressed data.
18 */
19std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size);
20
21/**
22 * Utilizes the LZ4 subalgorithm LZ4HC with the specified compression level. Higher compression
23 * levels result in a smaller compressed size, but require more CPU time for compression. The
24 * compression level has almost no impact on decompression speed. Data compressed with LZ4HC can
25 * also be decompressed with the default LZ4 decompression.
26 *
27 * @param source the uncompressed source memory region.
28 * @param source_size the size in bytes of the uncompressed source memory region.
29 * @param compression_level the used compression level. Should be between 3 and 12.
30 *
31 * @return the compressed data.
32 */
33std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size, s32 compression_level);
34
35/**
36 * Utilizes the LZ4 subalgorithm LZ4HC with the highest possible compression level.
37 *
38 * @param source the uncompressed source memory region.
39 * @param source_size the size in bytes of the uncompressed source memory region.
40 *
41 * @return the compressed data.
42 */
43std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size);
44
45/**
46 * Decompresses a source memory region with LZ4 and returns the uncompressed data in a vector.
47 *
48 * @param compressed the compressed source memory region.
49 * @param uncompressed_size the size in bytes of the uncompressed data.
50 *
51 * @return the decompressed data.
52 */
53std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed, std::size_t uncompressed_size);
54
55} // namespace Common::Compression \ No newline at end of file
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
7#include <cstdlib> 7#include <cstdlib>
8#include <type_traits> 8#include <type_traits>
9 9
10namespace MathUtil { 10namespace Common {
11 11
12constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
13 13
@@ -41,4 +41,4 @@ struct Rectangle {
41 } 41 }
42}; 42};
43 43
44} // namespace MathUtil 44} // namespace Common
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/memory_hook.h" 5#include "common/memory_hook.h"
6 6
7namespace Memory { 7namespace Common {
8 8
9MemoryHook::~MemoryHook() = default; 9MemoryHook::~MemoryHook() = default;
10 10
11} // namespace Memory 11} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11 11
12namespace Memory { 12namespace Common {
13 13
14/** 14/**
15 * Memory hooks have two purposes: 15 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
44}; 44};
45 45
46using MemoryHookPointer = std::shared_ptr<MemoryHook>; 46using MemoryHookPointer = std::shared_ptr<MemoryHook>;
47} // namespace Memory 47} // namespace Common
diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
new file mode 100644
index 000000000..2b61b91e0
--- /dev/null
+++ b/src/common/multi_level_queue.h
@@ -0,0 +1,337 @@
1// Copyright 2019 TuxSH
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <iterator>
9#include <list>
10#include <utility>
11
12#include "common/bit_util.h"
13#include "common/common_types.h"
14
15namespace Common {
16
17/**
18 * A MultiLevelQueue is a type of priority queue which has the following characteristics:
19 * - iteratable through each of its elements.
20 * - back can be obtained.
21 * - O(1) add, lookup (both front and back)
22 * - discrete priorities and a max of 64 priorities (limited domain)
23 * This type of priority queue is normaly used for managing threads within an scheduler
24 */
25template <typename T, std::size_t Depth>
26class MultiLevelQueue {
27public:
28 using value_type = T;
29 using reference = value_type&;
30 using const_reference = const value_type&;
31 using pointer = value_type*;
32 using const_pointer = const value_type*;
33
34 using difference_type = typename std::pointer_traits<pointer>::difference_type;
35 using size_type = std::size_t;
36
37 template <bool is_constant>
38 class iterator_impl {
39 public:
40 using iterator_category = std::bidirectional_iterator_tag;
41 using value_type = T;
42 using pointer = std::conditional_t<is_constant, T*, const T*>;
43 using reference = std::conditional_t<is_constant, const T&, T&>;
44 using difference_type = typename std::pointer_traits<pointer>::difference_type;
45
46 friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
47 if (lhs.IsEnd() && rhs.IsEnd())
48 return true;
49 return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
50 }
51
52 friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
53 return !operator==(lhs, rhs);
54 }
55
56 reference operator*() const {
57 return *it;
58 }
59
60 pointer operator->() const {
61 return it.operator->();
62 }
63
64 iterator_impl& operator++() {
65 if (IsEnd()) {
66 return *this;
67 }
68
69 ++it;
70
71 if (it == GetEndItForPrio()) {
72 u64 prios = mlq.used_priorities;
73 prios &= ~((1ULL << (current_priority + 1)) - 1);
74 if (prios == 0) {
75 current_priority = mlq.depth();
76 } else {
77 current_priority = CountTrailingZeroes64(prios);
78 it = GetBeginItForPrio();
79 }
80 }
81 return *this;
82 }
83
84 iterator_impl& operator--() {
85 if (IsEnd()) {
86 if (mlq.used_priorities != 0) {
87 current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
88 it = GetEndItForPrio();
89 --it;
90 }
91 } else if (it == GetBeginItForPrio()) {
92 u64 prios = mlq.used_priorities;
93 prios &= (1ULL << current_priority) - 1;
94 if (prios != 0) {
95 current_priority = CountTrailingZeroes64(prios);
96 it = GetEndItForPrio();
97 --it;
98 }
99 } else {
100 --it;
101 }
102 return *this;
103 }
104
105 iterator_impl operator++(int) {
106 const iterator_impl v{*this};
107 ++(*this);
108 return v;
109 }
110
111 iterator_impl operator--(int) {
112 const iterator_impl v{*this};
113 --(*this);
114 return v;
115 }
116
117 // allow implicit const->non-const
118 iterator_impl(const iterator_impl<false>& other)
119 : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
120
121 iterator_impl(const iterator_impl<true>& other)
122 : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
123
124 iterator_impl& operator=(const iterator_impl<false>& other) {
125 mlq = other.mlq;
126 it = other.it;
127 current_priority = other.current_priority;
128 return *this;
129 }
130
131 friend class iterator_impl<true>;
132 iterator_impl() = default;
133
134 private:
135 friend class MultiLevelQueue;
136 using container_ref =
137 std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
138 using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
139 typename std::list<T>::iterator>;
140
141 explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
142 : mlq(mlq), it(it), current_priority(current_priority) {}
143 explicit iterator_impl(container_ref mlq, u32 current_priority)
144 : mlq(mlq), it(), current_priority(current_priority) {}
145
146 bool IsEnd() const {
147 return current_priority == mlq.depth();
148 }
149
150 list_iterator GetBeginItForPrio() const {
151 return mlq.levels[current_priority].begin();
152 }
153
154 list_iterator GetEndItForPrio() const {
155 return mlq.levels[current_priority].end();
156 }
157
158 container_ref mlq;
159 list_iterator it;
160 u32 current_priority;
161 };
162
163 using iterator = iterator_impl<false>;
164 using const_iterator = iterator_impl<true>;
165
166 void add(const T& element, u32 priority, bool send_back = true) {
167 if (send_back)
168 levels[priority].push_back(element);
169 else
170 levels[priority].push_front(element);
171 used_priorities |= 1ULL << priority;
172 }
173
174 void remove(const T& element, u32 priority) {
175 auto it = ListIterateTo(levels[priority], element);
176 if (it == levels[priority].end())
177 return;
178 levels[priority].erase(it);
179 if (levels[priority].empty()) {
180 used_priorities &= ~(1ULL << priority);
181 }
182 }
183
184 void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
185 remove(element, old_priority);
186 add(element, new_priority, !adjust_front);
187 }
188 void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
189 adjust(*it, old_priority, new_priority, adjust_front);
190 }
191
192 void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
193 ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
194 ListIterateTo(levels[priority], element));
195
196 other.used_priorities |= 1ULL << priority;
197
198 if (levels[priority].empty()) {
199 used_priorities &= ~(1ULL << priority);
200 }
201 }
202
203 void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
204 transfer_to_front(*it, priority, other);
205 }
206
207 void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
208 ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
209 ListIterateTo(levels[priority], element));
210
211 other.used_priorities |= 1ULL << priority;
212
213 if (levels[priority].empty()) {
214 used_priorities &= ~(1ULL << priority);
215 }
216 }
217
218 void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
219 transfer_to_back(*it, priority, other);
220 }
221
222 void yield(u32 priority, std::size_t n = 1) {
223 ListShiftForward(levels[priority], n);
224 }
225
226 std::size_t depth() const {
227 return Depth;
228 }
229
230 std::size_t size(u32 priority) const {
231 return levels[priority].size();
232 }
233
234 std::size_t size() const {
235 u64 priorities = used_priorities;
236 std::size_t size = 0;
237 while (priorities != 0) {
238 const u64 current_priority = CountTrailingZeroes64(priorities);
239 size += levels[current_priority].size();
240 priorities &= ~(1ULL << current_priority);
241 }
242 return size;
243 }
244
245 bool empty() const {
246 return used_priorities == 0;
247 }
248
249 bool empty(u32 priority) const {
250 return (used_priorities & (1ULL << priority)) == 0;
251 }
252
253 u32 highest_priority_set(u32 max_priority = 0) const {
254 const u64 priorities =
255 max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
256 return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
257 }
258
259 u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
260 const u64 priorities = min_priority >= Depth - 1
261 ? used_priorities
262 : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
263 return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
264 }
265
266 const_iterator cbegin(u32 max_prio = 0) const {
267 const u32 priority = highest_priority_set(max_prio);
268 return priority == Depth ? cend()
269 : const_iterator{*this, levels[priority].cbegin(), priority};
270 }
271 const_iterator begin(u32 max_prio = 0) const {
272 return cbegin(max_prio);
273 }
274 iterator begin(u32 max_prio = 0) {
275 const u32 priority = highest_priority_set(max_prio);
276 return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
277 }
278
279 const_iterator cend(u32 min_prio = Depth - 1) const {
280 return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
281 }
282 const_iterator end(u32 min_prio = Depth - 1) const {
283 return cend(min_prio);
284 }
285 iterator end(u32 min_prio = Depth - 1) {
286 return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
287 }
288
289 T& front(u32 max_priority = 0) {
290 const u32 priority = highest_priority_set(max_priority);
291 return levels[priority == Depth ? 0 : priority].front();
292 }
293 const T& front(u32 max_priority = 0) const {
294 const u32 priority = highest_priority_set(max_priority);
295 return levels[priority == Depth ? 0 : priority].front();
296 }
297
298 T back(u32 min_priority = Depth - 1) {
299 const u32 priority = lowest_priority_set(min_priority); // intended
300 return levels[priority == Depth ? 63 : priority].back();
301 }
302 const T& back(u32 min_priority = Depth - 1) const {
303 const u32 priority = lowest_priority_set(min_priority); // intended
304 return levels[priority == Depth ? 63 : priority].back();
305 }
306
307private:
308 using const_list_iterator = typename std::list<T>::const_iterator;
309
310 static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
311 if (shift >= list.size()) {
312 return;
313 }
314
315 const auto begin_range = list.begin();
316 const auto end_range = std::next(begin_range, shift);
317 list.splice(list.end(), list, begin_range, end_range);
318 }
319
320 static void ListSplice(std::list<T>& in_list, const_list_iterator position,
321 std::list<T>& out_list, const_list_iterator element) {
322 in_list.splice(position, out_list, element);
323 }
324
325 static const_list_iterator ListIterateTo(const std::list<T>& list, const T& element) {
326 auto it = list.cbegin();
327 while (it != list.cend() && *it != element) {
328 ++it;
329 }
330 return it;
331 }
332
333 std::array<std::list<T>, Depth> levels;
334 u64 used_priorities = 0;
335};
336
337} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..69b7abc54
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,31 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/page_table.h"
6
7namespace Common {
8
9PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
10
11PageTable::~PageTable() = default;
12
13void PageTable::Resize(std::size_t address_space_width_in_bits) {
14 const std::size_t num_page_table_entries = 1ULL
15 << (address_space_width_in_bits - page_size_in_bits);
16
17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries);
19 backing_addr.resize(num_page_table_entries);
20
21 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
22 // vector size is subsequently decreased (via resize), the vector might not automatically
23 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
24 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
25
26 pointers.shrink_to_fit();
27 attributes.shrink_to_fit();
28 backing_addr.shrink_to_fit();
29}
30
31} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8b8ff0bb8
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,84 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include <boost/icl/interval_map.hpp>
9#include "common/common_types.h"
10#include "common/memory_hook.h"
11
12namespace Common {
13
14enum class PageType : u8 {
15 /// Page is unmapped and should cause an access error.
16 Unmapped,
17 /// Page is mapped to regular memory. This is the only type you can get pointers to.
18 Memory,
19 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
20 /// invalidation
21 RasterizerCachedMemory,
22 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
23 Special,
24 /// Page is allocated for use.
25 Allocated,
26};
27
28struct SpecialRegion {
29 enum class Type {
30 DebugHook,
31 IODevice,
32 } type;
33
34 MemoryHookPointer handler;
35
36 bool operator<(const SpecialRegion& other) const {
37 return std::tie(type, handler) < std::tie(other.type, other.handler);
38 }
39
40 bool operator==(const SpecialRegion& other) const {
41 return std::tie(type, handler) == std::tie(other.type, other.handler);
42 }
43};
44
45/**
46 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
47 * mimics the way a real CPU page table works.
48 */
49struct PageTable {
50 explicit PageTable(std::size_t page_size_in_bits);
51 ~PageTable();
52
53 /**
54 * Resizes the page table to be able to accomodate enough pages within
55 * a given address space.
56 *
57 * @param address_space_width_in_bits The address size width in bits.
58 */
59 void Resize(std::size_t address_space_width_in_bits);
60
61 /**
62 * Vector of memory pointers backing each page. An entry can only be non-null if the
63 * corresponding entry in the `attributes` vector is of type `Memory`.
64 */
65 std::vector<u8*> pointers;
66
67 /**
68 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
69 * of type `Special`.
70 */
71 boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;
72
73 /**
74 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
75 * the corresponding entry in `pointers` MUST be set to null.
76 */
77 std::vector<PageType> attributes;
78
79 std::vector<u64> backing_addr;
80
81 const std::size_t page_size_in_bits{};
82};
83
84} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
6 6
7#include "common/vector_math.h" 7#include "common/vector_math.h"
8 8
9namespace Math { 9namespace Common {
10 10
11template <typename T> 11template <typename T>
12class Quaternion { 12class Quaternion {
13public: 13public:
14 Math::Vec3<T> xyz; 14 Vec3<T> xyz;
15 T w{}; 15 T w{};
16 16
17 Quaternion<decltype(-T{})> Inverse() const { 17 Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
38}; 38};
39 39
40template <typename T> 40template <typename T>
41auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { 41auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); 42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
43} 43}
44 44
45inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { 45inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
46 return {axis * std::sin(angle / 2), std::cos(angle / 2)}; 46 return {axis * std::sin(angle / 2), std::cos(angle / 2)};
47} 47}
48 48
49} // namespace Math 49} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index 32af0b6ac..b3eab1324 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -17,6 +17,8 @@
17 17
18#pragma once 18#pragma once
19 19
20#include <type_traits>
21
20#if defined(_MSC_VER) 22#if defined(_MSC_VER)
21#include <cstdlib> 23#include <cstdlib>
22#elif defined(__linux__) 24#elif defined(__linux__)
@@ -28,8 +30,8 @@
28#include <cstring> 30#include <cstring>
29#include "common/common_types.h" 31#include "common/common_types.h"
30 32
31// GCC 4.6+ 33// GCC
32#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 34#ifdef __GNUC__
33 35
34#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) 36#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
35#define COMMON_LITTLE_ENDIAN 1 37#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +40,7 @@
38#endif 40#endif
39 41
40// LLVM/clang 42// LLVM/clang
41#elif __clang__ 43#elif defined(__clang__)
42 44
43#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) 45#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
44#define COMMON_LITTLE_ENDIAN 1 46#define COMMON_LITTLE_ENDIAN 1
@@ -170,7 +172,7 @@ struct swap_struct_t {
170 using swapped_t = swap_struct_t; 172 using swapped_t = swap_struct_t;
171 173
172protected: 174protected:
173 T value = T(); 175 T value;
174 176
175 static T swap(T v) { 177 static T swap(T v) {
176 return F::swap(v); 178 return F::swap(v);
@@ -605,52 +607,154 @@ struct swap_double_t {
605 } 607 }
606}; 608};
607 609
608#if COMMON_LITTLE_ENDIAN 610template <typename T>
609using u16_le = u16; 611struct swap_enum_t {
610using u32_le = u32; 612 static_assert(std::is_enum_v<T>);
611using u64_le = u64; 613 using base = std::underlying_type_t<T>;
614
615public:
616 swap_enum_t() = default;
617 swap_enum_t(const T& v) : value(swap(v)) {}
618
619 swap_enum_t& operator=(const T& v) {
620 value = swap(v);
621 return *this;
622 }
623
624 operator T() const {
625 return swap(value);
626 }
627
628 explicit operator base() const {
629 return static_cast<base>(swap(value));
630 }
612 631
613using s16_le = s16; 632protected:
614using s32_le = s32; 633 T value{};
615using s64_le = s64; 634 // clang-format off
635 using swap_t = std::conditional_t<
636 std::is_same_v<base, u16>, swap_16_t<u16>, std::conditional_t<
637 std::is_same_v<base, s16>, swap_16_t<s16>, std::conditional_t<
638 std::is_same_v<base, u32>, swap_32_t<u32>, std::conditional_t<
639 std::is_same_v<base, s32>, swap_32_t<s32>, std::conditional_t<
640 std::is_same_v<base, u64>, swap_64_t<u64>, std::conditional_t<
641 std::is_same_v<base, s64>, swap_64_t<s64>, void>>>>>>;
642 // clang-format on
643 static T swap(T x) {
644 return static_cast<T>(swap_t::swap(static_cast<base>(x)));
645 }
646};
616 647
617using float_le = float; 648struct SwapTag {}; // Use the different endianness from the system
618using double_le = double; 649struct KeepTag {}; // Use the same endianness as the system
619 650
620using u64_be = swap_struct_t<u64, swap_64_t<u64>>; 651template <typename T, typename Tag>
621using s64_be = swap_struct_t<s64, swap_64_t<s64>>; 652struct AddEndian;
622 653
623using u32_be = swap_struct_t<u32, swap_32_t<u32>>; 654// KeepTag specializations
624using s32_be = swap_struct_t<s32, swap_32_t<s32>>;
625 655
626using u16_be = swap_struct_t<u16, swap_16_t<u16>>; 656template <typename T>
627using s16_be = swap_struct_t<s16, swap_16_t<s16>>; 657struct AddEndian<T, KeepTag> {
658 using type = T;
659};
628 660
629using float_be = swap_struct_t<float, swap_float_t<float>>; 661// SwapTag specializations
630using double_be = swap_struct_t<double, swap_double_t<double>>; 662
631#else 663template <>
664struct AddEndian<u8, SwapTag> {
665 using type = u8;
666};
667
668template <>
669struct AddEndian<u16, SwapTag> {
670 using type = swap_struct_t<u16, swap_16_t<u16>>;
671};
672
673template <>
674struct AddEndian<u32, SwapTag> {
675 using type = swap_struct_t<u32, swap_32_t<u32>>;
676};
632 677
633using u64_le = swap_struct_t<u64, swap_64_t<u64>>; 678template <>
634using s64_le = swap_struct_t<s64, swap_64_t<s64>>; 679struct AddEndian<u64, SwapTag> {
680 using type = swap_struct_t<u64, swap_64_t<u64>>;
681};
682
683template <>
684struct AddEndian<s8, SwapTag> {
685 using type = s8;
686};
635 687
636using u32_le = swap_struct_t<u32, swap_32_t<u32>>; 688template <>
637using s32_le = swap_struct_t<s32, swap_32_t<s32>>; 689struct AddEndian<s16, SwapTag> {
690 using type = swap_struct_t<s16, swap_16_t<s16>>;
691};
638 692
639using u16_le = swap_struct_t<u16, swap_16_t<u16>>; 693template <>
640using s16_le = swap_struct_t<s16, swap_16_t<s16>>; 694struct AddEndian<s32, SwapTag> {
695 using type = swap_struct_t<s32, swap_32_t<s32>>;
696};
697
698template <>
699struct AddEndian<s64, SwapTag> {
700 using type = swap_struct_t<s64, swap_64_t<s64>>;
701};
702
703template <>
704struct AddEndian<float, SwapTag> {
705 using type = swap_struct_t<float, swap_float_t<float>>;
706};
707
708template <>
709struct AddEndian<double, SwapTag> {
710 using type = swap_struct_t<double, swap_double_t<double>>;
711};
712
713template <typename T>
714struct AddEndian<T, SwapTag> {
715 static_assert(std::is_enum_v<T>);
716 using type = swap_enum_t<T>;
717};
641 718
642using float_le = swap_struct_t<float, swap_float_t<float>>; 719// Alias LETag/BETag as KeepTag/SwapTag depending on the system
643using double_le = swap_struct_t<double, swap_double_t<double>>; 720#if COMMON_LITTLE_ENDIAN
644 721
645using u16_be = u16; 722using LETag = KeepTag;
646using u32_be = u32; 723using BETag = SwapTag;
647using u64_be = u64;
648 724
649using s16_be = s16; 725#else
650using s32_be = s32;
651using s64_be = s64;
652 726
653using float_be = float; 727using BETag = KeepTag;
654using double_be = double; 728using LETag = SwapTag;
655 729
656#endif 730#endif
731
732// Aliases for LE types
733using u16_le = AddEndian<u16, LETag>::type;
734using u32_le = AddEndian<u32, LETag>::type;
735using u64_le = AddEndian<u64, LETag>::type;
736
737using s16_le = AddEndian<s16, LETag>::type;
738using s32_le = AddEndian<s32, LETag>::type;
739using s64_le = AddEndian<s64, LETag>::type;
740
741template <typename T>
742using enum_le = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, LETag>::type>;
743
744using float_le = AddEndian<float, LETag>::type;
745using double_le = AddEndian<double, LETag>::type;
746
747// Aliases for BE types
748using u16_be = AddEndian<u16, BETag>::type;
749using u32_be = AddEndian<u32, BETag>::type;
750using u64_be = AddEndian<u64, BETag>::type;
751
752using s16_be = AddEndian<s16, BETag>::type;
753using s32_be = AddEndian<s32, BETag>::type;
754using s64_be = AddEndian<s64, BETag>::type;
755
756template <typename T>
757using enum_be = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, BETag>::type>;
758
759using float_be = AddEndian<float, BETag>::type;
760using double_be = AddEndian<double, BETag>::type;
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 5144c0d9f..fe7a420cc 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -27,18 +27,6 @@ namespace Common {
27 27
28#ifdef _MSC_VER 28#ifdef _MSC_VER
29 29
30void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
31 SetThreadAffinityMask(thread, mask);
32}
33
34void SetCurrentThreadAffinity(u32 mask) {
35 SetThreadAffinityMask(GetCurrentThread(), mask);
36}
37
38void SwitchCurrentThread() {
39 SwitchToThread();
40}
41
42// Sets the debugger-visible name of the current thread. 30// Sets the debugger-visible name of the current thread.
43// Uses undocumented (actually, it is now documented) trick. 31// Uses undocumented (actually, it is now documented) trick.
44// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp 32// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
@@ -70,31 +58,6 @@ void SetCurrentThreadName(const char* name) {
70 58
71#else // !MSVC_VER, so must be POSIX threads 59#else // !MSVC_VER, so must be POSIX threads
72 60
73void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
74#ifdef __APPLE__
75 thread_policy_set(pthread_mach_thread_np(thread), THREAD_AFFINITY_POLICY, (integer_t*)&mask, 1);
76#elif (defined __linux__ || defined __FreeBSD__) && !(defined ANDROID)
77 cpu_set_t cpu_set;
78 CPU_ZERO(&cpu_set);
79
80 for (int i = 0; i != sizeof(mask) * 8; ++i)
81 if ((mask >> i) & 1)
82 CPU_SET(i, &cpu_set);
83
84 pthread_setaffinity_np(thread, sizeof(cpu_set), &cpu_set);
85#endif
86}
87
88void SetCurrentThreadAffinity(u32 mask) {
89 SetThreadAffinity(pthread_self(), mask);
90}
91
92#ifndef _WIN32
93void SwitchCurrentThread() {
94 usleep(1000 * 1);
95}
96#endif
97
98// MinGW with the POSIX threading model does not support pthread_setname_np 61// MinGW with the POSIX threading model does not support pthread_setname_np
99#if !defined(_WIN32) || defined(_MSC_VER) 62#if !defined(_WIN32) || defined(_MSC_VER)
100void SetCurrentThreadName(const char* name) { 63void SetCurrentThreadName(const char* name) {
diff --git a/src/common/thread.h b/src/common/thread.h
index 2cf74452d..0cfd98be6 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,14 +9,13 @@
9#include <cstddef> 9#include <cstddef>
10#include <mutex> 10#include <mutex>
11#include <thread> 11#include <thread>
12#include "common/common_types.h"
13 12
14namespace Common { 13namespace Common {
15 14
16class Event { 15class Event {
17public: 16public:
18 void Set() { 17 void Set() {
19 std::lock_guard<std::mutex> lk(mutex); 18 std::lock_guard lk{mutex};
20 if (!is_set) { 19 if (!is_set) {
21 is_set = true; 20 is_set = true;
22 condvar.notify_one(); 21 condvar.notify_one();
@@ -24,14 +23,14 @@ public:
24 } 23 }
25 24
26 void Wait() { 25 void Wait() {
27 std::unique_lock<std::mutex> lk(mutex); 26 std::unique_lock lk{mutex};
28 condvar.wait(lk, [&] { return is_set; }); 27 condvar.wait(lk, [&] { return is_set; });
29 is_set = false; 28 is_set = false;
30 } 29 }
31 30
32 template <class Clock, class Duration> 31 template <class Clock, class Duration>
33 bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) { 32 bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
34 std::unique_lock<std::mutex> lk(mutex); 33 std::unique_lock lk{mutex};
35 if (!condvar.wait_until(lk, time, [this] { return is_set; })) 34 if (!condvar.wait_until(lk, time, [this] { return is_set; }))
36 return false; 35 return false;
37 is_set = false; 36 is_set = false;
@@ -39,7 +38,7 @@ public:
39 } 38 }
40 39
41 void Reset() { 40 void Reset() {
42 std::unique_lock<std::mutex> lk(mutex); 41 std::unique_lock lk{mutex};
43 // no other action required, since wait loops on the predicate and any lingering signal will 42 // no other action required, since wait loops on the predicate and any lingering signal will
44 // get cleared on the first iteration 43 // get cleared on the first iteration
45 is_set = false; 44 is_set = false;
@@ -57,7 +56,7 @@ public:
57 56
58 /// Blocks until all "count" threads have called Sync() 57 /// Blocks until all "count" threads have called Sync()
59 void Sync() { 58 void Sync() {
60 std::unique_lock<std::mutex> lk(mutex); 59 std::unique_lock lk{mutex};
61 const std::size_t current_generation = generation; 60 const std::size_t current_generation = generation;
62 61
63 if (++waiting == count) { 62 if (++waiting == count) {
@@ -78,9 +77,6 @@ private:
78 std::size_t generation = 0; // Incremented once each time the barrier is used 77 std::size_t generation = 0; // Incremented once each time the barrier is used
79}; 78};
80 79
81void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
82void SetCurrentThreadAffinity(u32 mask);
83void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
84void SetCurrentThreadName(const char* name); 80void SetCurrentThreadName(const char* name);
85 81
86} // namespace Common 82} // namespace Common
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index e7594db68..791f99a8c 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,7 +6,6 @@
6 6
7#include <array> 7#include <array>
8#include <deque> 8#include <deque>
9#include <boost/range/algorithm_ext/erase.hpp>
10 9
11namespace Common { 10namespace Common {
12 11
@@ -111,8 +110,9 @@ struct ThreadQueueList {
111 } 110 }
112 111
113 void remove(Priority priority, const T& thread_id) { 112 void remove(Priority priority, const T& thread_id) {
114 Queue* cur = &queues[priority]; 113 Queue* const cur = &queues[priority];
115 boost::remove_erase(cur->data, thread_id); 114 const auto iter = std::remove(cur->data.begin(), cur->data.end(), thread_id);
115 cur->data.erase(iter, cur->data.end());
116 } 116 }
117 117
118 void rotate(Priority priority) { 118 void rotate(Priority priority) {
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index edf13bc49..e714ba5b3 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,17 @@
7// a simple lockless thread-safe, 7// a simple lockless thread-safe,
8// single reader, single writer queue 8// single reader, single writer queue
9 9
10#include <algorithm>
11#include <atomic> 10#include <atomic>
11#include <condition_variable>
12#include <cstddef> 12#include <cstddef>
13#include <mutex> 13#include <mutex>
14#include "common/common_types.h" 14#include <utility>
15 15
16namespace Common { 16namespace Common {
17template <typename T, bool NeedSize = true> 17template <typename T>
18class SPSCQueue { 18class SPSCQueue {
19public: 19public:
20 SPSCQueue() : size(0) { 20 SPSCQueue() {
21 write_ptr = read_ptr = new ElementPtr(); 21 write_ptr = read_ptr = new ElementPtr();
22 } 22 }
23 ~SPSCQueue() { 23 ~SPSCQueue() {
@@ -25,13 +25,12 @@ public:
25 delete read_ptr; 25 delete read_ptr;
26 } 26 }
27 27
28 u32 Size() const { 28 std::size_t Size() const {
29 static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
30 return size.load(); 29 return size.load();
31 } 30 }
32 31
33 bool Empty() const { 32 bool Empty() const {
34 return !read_ptr->next.load(); 33 return Size() == 0;
35 } 34 }
36 35
37 T& Front() const { 36 T& Front() const {
@@ -47,13 +46,14 @@ public:
47 ElementPtr* new_ptr = new ElementPtr(); 46 ElementPtr* new_ptr = new ElementPtr();
48 write_ptr->next.store(new_ptr, std::memory_order_release); 47 write_ptr->next.store(new_ptr, std::memory_order_release);
49 write_ptr = new_ptr; 48 write_ptr = new_ptr;
50 if (NeedSize) 49 cv.notify_one();
51 size++; 50
51 ++size;
52 } 52 }
53 53
54 void Pop() { 54 void Pop() {
55 if (NeedSize) 55 --size;
56 size--; 56
57 ElementPtr* tmpptr = read_ptr; 57 ElementPtr* tmpptr = read_ptr;
58 // advance the read pointer 58 // advance the read pointer
59 read_ptr = tmpptr->next.load(); 59 read_ptr = tmpptr->next.load();
@@ -66,8 +66,7 @@ public:
66 if (Empty()) 66 if (Empty())
67 return false; 67 return false;
68 68
69 if (NeedSize) 69 --size;
70 size--;
71 70
72 ElementPtr* tmpptr = read_ptr; 71 ElementPtr* tmpptr = read_ptr;
73 read_ptr = tmpptr->next.load(std::memory_order_acquire); 72 read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -77,6 +76,16 @@ public:
77 return true; 76 return true;
78 } 77 }
79 78
79 T PopWait() {
80 if (Empty()) {
81 std::unique_lock lock{cv_mutex};
82 cv.wait(lock, [this]() { return !Empty(); });
83 }
84 T t;
85 Pop(t);
86 return t;
87 }
88
80 // not thread-safe 89 // not thread-safe
81 void Clear() { 90 void Clear() {
82 size.store(0); 91 size.store(0);
@@ -89,7 +98,7 @@ private:
89 // and a pointer to the next ElementPtr 98 // and a pointer to the next ElementPtr
90 class ElementPtr { 99 class ElementPtr {
91 public: 100 public:
92 ElementPtr() : next(nullptr) {} 101 ElementPtr() {}
93 ~ElementPtr() { 102 ~ElementPtr() {
94 ElementPtr* next_ptr = next.load(); 103 ElementPtr* next_ptr = next.load();
95 104
@@ -98,21 +107,23 @@ private:
98 } 107 }
99 108
100 T current; 109 T current;
101 std::atomic<ElementPtr*> next; 110 std::atomic<ElementPtr*> next{nullptr};
102 }; 111 };
103 112
104 ElementPtr* write_ptr; 113 ElementPtr* write_ptr;
105 ElementPtr* read_ptr; 114 ElementPtr* read_ptr;
106 std::atomic<u32> size; 115 std::atomic_size_t size{0};
116 std::mutex cv_mutex;
117 std::condition_variable cv;
107}; 118};
108 119
109// a simple thread-safe, 120// a simple thread-safe,
110// single reader, multiple writer queue 121// single reader, multiple writer queue
111 122
112template <typename T, bool NeedSize = true> 123template <typename T>
113class MPSCQueue { 124class MPSCQueue {
114public: 125public:
115 u32 Size() const { 126 std::size_t Size() const {
116 return spsc_queue.Size(); 127 return spsc_queue.Size();
117 } 128 }
118 129
@@ -126,7 +137,7 @@ public:
126 137
127 template <typename Arg> 138 template <typename Arg>
128 void Push(Arg&& t) { 139 void Push(Arg&& t) {
129 std::lock_guard<std::mutex> lock(write_lock); 140 std::lock_guard lock{write_lock};
130 spsc_queue.Push(t); 141 spsc_queue.Push(t);
131 } 142 }
132 143
@@ -138,13 +149,17 @@ public:
138 return spsc_queue.Pop(t); 149 return spsc_queue.Pop(t);
139 } 150 }
140 151
152 T PopWait() {
153 return spsc_queue.PopWait();
154 }
155
141 // not thread-safe 156 // not thread-safe
142 void Clear() { 157 void Clear() {
143 spsc_queue.Clear(); 158 spsc_queue.Clear();
144 } 159 }
145 160
146private: 161private:
147 SPSCQueue<T, NeedSize> spsc_queue; 162 SPSCQueue<T> spsc_queue;
148 std::mutex write_lock; 163 std::mutex write_lock;
149}; 164};
150} // namespace Common 165} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..32bf56730
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,45 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef _MSC_VER
6#include <intrin.h>
7
8#pragma intrinsic(_umul128)
9#endif
10#include <cstring>
11#include "common/uint128.h"
12
13namespace Common {
14
15u128 Multiply64Into128(u64 a, u64 b) {
16 u128 result;
17#ifdef _MSC_VER
18 result[0] = _umul128(a, b, &result[1]);
19#else
20 unsigned __int128 tmp = a;
21 tmp *= b;
22 std::memcpy(&result, &tmp, sizeof(u128));
23#endif
24 return result;
25}
26
27std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
28 u64 remainder = dividend[0] % divisor;
29 u64 accum = dividend[0] / divisor;
30 if (dividend[1] == 0)
31 return {accum, remainder};
32 // We ignore dividend[1] / divisor as that overflows
33 const u64 first_segment = (dividend[1] % divisor) << 32;
34 accum += (first_segment / divisor) << 32;
35 const u64 second_segment = (first_segment % divisor) << 32;
36 accum += (second_segment / divisor);
37 remainder += second_segment % divisor;
38 if (remainder >= divisor) {
39 accum++;
40 remainder -= divisor;
41 }
42 return {accum, remainder};
43}
44
45} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..a3be2a2cb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,19 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9
10namespace Common {
11
12// This function multiplies 2 u64 values and produces a u128 value;
13u128 Multiply64Into128(u64 a, u64 b);
14
15// This function divides a u128 by a u32 value and produces two u64 values:
16// the result of division and the remainder
17std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
18
19} // namespace Common
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
33#include <cmath> 33#include <cmath>
34#include <type_traits> 34#include <type_traits>
35 35
36namespace Math { 36namespace Common {
37 37
38template <typename T> 38template <typename T>
39class Vec2; 39class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
690 return MakeVec(x, yzw[0], yzw[1], yzw[2]); 690 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
691} 691}
692 692
693} // namespace Math 693} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f61bcd40d..c59107102 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -31,6 +31,8 @@ add_library(core STATIC
31 file_sys/bis_factory.h 31 file_sys/bis_factory.h
32 file_sys/card_image.cpp 32 file_sys/card_image.cpp
33 file_sys/card_image.h 33 file_sys/card_image.h
34 file_sys/cheat_engine.cpp
35 file_sys/cheat_engine.h
34 file_sys/content_archive.cpp 36 file_sys/content_archive.cpp
35 file_sys/content_archive.h 37 file_sys/content_archive.h
36 file_sys/control_metadata.cpp 38 file_sys/control_metadata.cpp
@@ -68,6 +70,8 @@ add_library(core STATIC
68 file_sys/system_archive/ng_word.h 70 file_sys/system_archive/ng_word.h
69 file_sys/system_archive/system_archive.cpp 71 file_sys/system_archive/system_archive.cpp
70 file_sys/system_archive/system_archive.h 72 file_sys/system_archive/system_archive.h
73 file_sys/system_archive/system_version.cpp
74 file_sys/system_archive/system_version.h
71 file_sys/vfs.cpp 75 file_sys/vfs.cpp
72 file_sys/vfs.h 76 file_sys/vfs.h
73 file_sys/vfs_concat.cpp 77 file_sys/vfs_concat.cpp
@@ -107,6 +111,8 @@ add_library(core STATIC
107 hle/kernel/client_port.h 111 hle/kernel/client_port.h
108 hle/kernel/client_session.cpp 112 hle/kernel/client_session.cpp
109 hle/kernel/client_session.h 113 hle/kernel/client_session.h
114 hle/kernel/code_set.cpp
115 hle/kernel/code_set.h
110 hle/kernel/errors.h 116 hle/kernel/errors.h
111 hle/kernel/handle_table.cpp 117 hle/kernel/handle_table.cpp
112 hle/kernel/handle_table.h 118 hle/kernel/handle_table.h
@@ -140,6 +146,8 @@ add_library(core STATIC
140 hle/kernel/svc_wrap.h 146 hle/kernel/svc_wrap.h
141 hle/kernel/thread.cpp 147 hle/kernel/thread.cpp
142 hle/kernel/thread.h 148 hle/kernel/thread.h
149 hle/kernel/transfer_memory.cpp
150 hle/kernel/transfer_memory.h
143 hle/kernel/vm_manager.cpp 151 hle/kernel/vm_manager.cpp
144 hle/kernel/vm_manager.h 152 hle/kernel/vm_manager.h
145 hle/kernel/wait_object.cpp 153 hle/kernel/wait_object.cpp
@@ -217,6 +225,7 @@ add_library(core STATIC
217 hle/service/audio/audren_u.h 225 hle/service/audio/audren_u.h
218 hle/service/audio/codecctl.cpp 226 hle/service/audio/codecctl.cpp
219 hle/service/audio/codecctl.h 227 hle/service/audio/codecctl.h
228 hle/service/audio/errors.h
220 hle/service/audio/hwopus.cpp 229 hle/service/audio/hwopus.cpp
221 hle/service/audio/hwopus.h 230 hle/service/audio/hwopus.h
222 hle/service/bcat/bcat.cpp 231 hle/service/bcat/bcat.cpp
@@ -400,6 +409,10 @@ add_library(core STATIC
400 hle/service/time/time.h 409 hle/service/time/time.h
401 hle/service/usb/usb.cpp 410 hle/service/usb/usb.cpp
402 hle/service/usb/usb.h 411 hle/service/usb/usb.h
412 hle/service/vi/display/vi_display.cpp
413 hle/service/vi/display/vi_display.h
414 hle/service/vi/layer/vi_layer.cpp
415 hle/service/vi/layer/vi_layer.h
403 hle/service/vi/vi.cpp 416 hle/service/vi/vi.cpp
404 hle/service/vi/vi.h 417 hle/service/vi/vi.h
405 hle/service/vi/vi_m.cpp 418 hle/service/vi/vi_m.cpp
@@ -414,8 +427,6 @@ add_library(core STATIC
414 loader/deconstructed_rom_directory.h 427 loader/deconstructed_rom_directory.h
415 loader/elf.cpp 428 loader/elf.cpp
416 loader/elf.h 429 loader/elf.h
417 loader/linker.cpp
418 loader/linker.h
419 loader/loader.cpp 430 loader/loader.cpp
420 loader/loader.h 431 loader/loader.h
421 loader/nax.cpp 432 loader/nax.cpp
@@ -432,8 +443,6 @@ add_library(core STATIC
432 loader/xci.h 443 loader/xci.h
433 memory.cpp 444 memory.cpp
434 memory.h 445 memory.h
435 memory_hook.cpp
436 memory_hook.h
437 memory_setup.h 446 memory_setup.h
438 perf_stats.cpp 447 perf_stats.cpp
439 perf_stats.h 448 perf_stats.h
@@ -449,7 +458,7 @@ add_library(core STATIC
449create_target_directory_groups(core) 458create_target_directory_groups(core)
450 459
451target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) 460target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
452target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt lz4_static mbedtls opus unicorn open_source_archives) 461target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt mbedtls opus unicorn open_source_archives)
453if (ENABLE_WEB_SERVICE) 462if (ENABLE_WEB_SERVICE)
454 target_compile_definitions(core PRIVATE -DENABLE_WEB_SERVICE) 463 target_compile_definitions(core PRIVATE -DENABLE_WEB_SERVICE)
455 target_link_libraries(core PRIVATE web_service) 464 target_link_libraries(core PRIVATE web_service)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index afbda8d8b..4fdc12f11 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_cpu.h" 13#include "core/core_cpu.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/core_timing_util.h"
15#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
16#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/svc.h" 18#include "core/hle/kernel/svc.h"
@@ -112,14 +113,14 @@ public:
112 // Always execute at least one tick. 113 // Always execute at least one tick.
113 amortized_ticks = std::max<u64>(amortized_ticks, 1); 114 amortized_ticks = std::max<u64>(amortized_ticks, 1);
114 115
115 CoreTiming::AddTicks(amortized_ticks); 116 parent.core_timing.AddTicks(amortized_ticks);
116 num_interpreted_instructions = 0; 117 num_interpreted_instructions = 0;
117 } 118 }
118 u64 GetTicksRemaining() override { 119 u64 GetTicksRemaining() override {
119 return std::max(CoreTiming::GetDowncount(), 0); 120 return std::max(parent.core_timing.GetDowncount(), 0);
120 } 121 }
121 u64 GetCNTPCT() override { 122 u64 GetCNTPCT() override {
122 return CoreTiming::GetTicks(); 123 return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
123 } 124 }
124 125
125 ARM_Dynarmic& parent; 126 ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
151 config.tpidr_el0 = &cb->tpidr_el0; 152 config.tpidr_el0 = &cb->tpidr_el0;
152 config.dczid_el0 = 4; 153 config.dczid_el0 = 4;
153 config.ctr_el0 = 0x8444c004; 154 config.ctr_el0 = 0x8444c004;
154 config.cntfrq_el0 = 19200000; // Value from fusee. 155 config.cntfrq_el0 = Timing::CNTFREQ;
155 156
156 // Unpredictable instructions 157 // Unpredictable instructions
157 config.define_unpredictable_behaviour = true; 158 config.define_unpredictable_behaviour = true;
@@ -172,8 +173,10 @@ void ARM_Dynarmic::Step() {
172 cb->InterpreterFallback(jit->GetPC(), 1); 173 cb->InterpreterFallback(jit->GetPC(), 1);
173} 174}
174 175
175ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index) 176ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
176 : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index}, 177 std::size_t core_index)
178 : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
179 core_index{core_index}, core_timing{core_timing},
177 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { 180 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
178 ThreadContext ctx{}; 181 ThreadContext ctx{};
179 inner_unicorn.SaveContext(ctx); 182 inner_unicorn.SaveContext(ctx);
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 512bf8ce9..aada1e862 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,10 +12,14 @@
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
19namespace Core::Timing {
20class CoreTiming;
21}
22
19namespace Core { 23namespace Core {
20 24
21class ARM_Dynarmic_Callbacks; 25class ARM_Dynarmic_Callbacks;
@@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor;
23 27
24class ARM_Dynarmic final : public ARM_Interface { 28class ARM_Dynarmic final : public ARM_Interface {
25public: 29public:
26 ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 30 ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
31 std::size_t core_index);
27 ~ARM_Dynarmic(); 32 ~ARM_Dynarmic();
28 33
29 void MapBackingMemory(VAddr address, std::size_t size, u8* memory, 34 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -62,9 +67,10 @@ private:
62 ARM_Unicorn inner_unicorn; 67 ARM_Unicorn inner_unicorn;
63 68
64 std::size_t core_index; 69 std::size_t core_index;
70 Timing::CoreTiming& core_timing;
65 DynarmicExclusiveMonitor& exclusive_monitor; 71 DynarmicExclusiveMonitor& exclusive_monitor;
66 72
67 Memory::PageTable* current_page_table = nullptr; 73 Common::PageTable* current_page_table = nullptr;
68}; 74};
69 75
70class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 76class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c455c81fb..a542a098b 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
72 return {}; 72 return {};
73} 73}
74 74
75ARM_Unicorn::ARM_Unicorn() { 75ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
76 CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc)); 76 CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
77 77
78 auto fpv = 3 << 20; 78 auto fpv = 3 << 20;
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
177 if (GDBStub::IsServerEnabled()) { 177 if (GDBStub::IsServerEnabled()) {
178 ExecuteInstructions(std::max(4000000, 0)); 178 ExecuteInstructions(std::max(4000000, 0));
179 } else { 179 } else {
180 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); 180 ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
181 } 181 }
182} 182}
183 183
@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
190void ARM_Unicorn::ExecuteInstructions(int num_instructions) { 190void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
191 MICROPROFILE_SCOPE(ARM_Jit_Unicorn); 191 MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
192 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 192 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
193 CoreTiming::AddTicks(num_instructions); 193 core_timing.AddTicks(num_instructions);
194 if (GDBStub::IsServerEnabled()) { 194 if (GDBStub::IsServerEnabled()) {
195 if (last_bkpt_hit) { 195 if (last_bkpt_hit) {
196 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); 196 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 75761950b..dbd6955ea 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,12 +9,17 @@
9#include "core/arm/arm_interface.h" 9#include "core/arm/arm_interface.h"
10#include "core/gdbstub/gdbstub.h" 10#include "core/gdbstub/gdbstub.h"
11 11
12namespace Core::Timing {
13class CoreTiming;
14}
15
12namespace Core { 16namespace Core {
13 17
14class ARM_Unicorn final : public ARM_Interface { 18class ARM_Unicorn final : public ARM_Interface {
15public: 19public:
16 ARM_Unicorn(); 20 explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
17 ~ARM_Unicorn(); 21 ~ARM_Unicorn();
22
18 void MapBackingMemory(VAddr address, std::size_t size, u8* memory, 23 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
19 Kernel::VMAPermission perms) override; 24 Kernel::VMAPermission perms) override;
20 void UnmapMemory(VAddr address, std::size_t size) override; 25 void UnmapMemory(VAddr address, std::size_t size) override;
@@ -43,6 +48,7 @@ public:
43 48
44private: 49private:
45 uc_engine* uc{}; 50 uc_engine* uc{};
51 Timing::CoreTiming& core_timing;
46 GDBStub::BreakpointAddress last_bkpt{}; 52 GDBStub::BreakpointAddress last_bkpt{};
47 bool last_bkpt_hit; 53 bool last_bkpt_hit;
48}; 54};
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 1dd576c26..4fe77c25b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -32,11 +32,13 @@
32#include "core/perf_stats.h" 32#include "core/perf_stats.h"
33#include "core/settings.h" 33#include "core/settings.h"
34#include "core/telemetry_session.h" 34#include "core/telemetry_session.h"
35#include "file_sys/cheat_engine.h"
35#include "frontend/applets/profile_select.h" 36#include "frontend/applets/profile_select.h"
36#include "frontend/applets/software_keyboard.h" 37#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 38#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 39#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 40#include "video_core/gpu_asynch.h"
41#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 42#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 43#include "video_core/video_core.h"
42 44
@@ -78,6 +80,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
78 return vfs->OpenFile(path, FileSys::Mode::Read); 80 return vfs->OpenFile(path, FileSys::Mode::Read);
79} 81}
80struct System::Impl { 82struct System::Impl {
83 explicit Impl(System& system) : kernel{system} {}
81 84
82 Cpu& CurrentCpuCore() { 85 Cpu& CurrentCpuCore() {
83 return cpu_core_manager.GetCurrentCore(); 86 return cpu_core_manager.GetCurrentCore();
@@ -94,7 +97,7 @@ struct System::Impl {
94 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) { 97 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
95 LOG_DEBUG(HW_Memory, "initialized OK"); 98 LOG_DEBUG(HW_Memory, "initialized OK");
96 99
97 CoreTiming::Init(); 100 core_timing.Initialize();
98 kernel.Initialize(); 101 kernel.Initialize();
99 102
100 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( 103 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -114,13 +117,13 @@ struct System::Impl {
114 if (web_browser == nullptr) 117 if (web_browser == nullptr)
115 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); 118 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
116 119
117 auto main_process = Kernel::Process::Create(kernel, "main"); 120 auto main_process = Kernel::Process::Create(system, "main");
118 kernel.MakeCurrentProcess(main_process.get()); 121 kernel.MakeCurrentProcess(main_process.get());
119 122
120 telemetry_session = std::make_unique<Core::TelemetrySession>(); 123 telemetry_session = std::make_unique<Core::TelemetrySession>();
121 service_manager = std::make_shared<Service::SM::ServiceManager>(); 124 service_manager = std::make_shared<Service::SM::ServiceManager>();
122 125
123 Service::Init(service_manager, *virtual_filesystem); 126 Service::Init(service_manager, system, *virtual_filesystem);
124 GDBStub::Init(); 127 GDBStub::Init();
125 128
126 renderer = VideoCore::CreateRenderer(emu_window, system); 129 renderer = VideoCore::CreateRenderer(emu_window, system);
@@ -128,10 +131,16 @@ struct System::Impl {
128 return ResultStatus::ErrorVideoCore; 131 return ResultStatus::ErrorVideoCore;
129 } 132 }
130 133
131 gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer()); 134 is_powered_on = true;
135
136 if (Settings::values.use_asynchronous_gpu_emulation) {
137 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
138 } else {
139 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
140 }
132 141
133 cpu_core_manager.Initialize(system); 142 cpu_core_manager.Initialize(system);
134 is_powered_on = true; 143
135 LOG_DEBUG(Core, "Initialized OK"); 144 LOG_DEBUG(Core, "Initialized OK");
136 145
137 // Reset counters and set time origin to current frame 146 // Reset counters and set time origin to current frame
@@ -182,13 +191,13 @@ struct System::Impl {
182 191
183 void Shutdown() { 192 void Shutdown() {
184 // Log last frame performance stats 193 // Log last frame performance stats
185 auto perf_results = GetAndResetPerfStats(); 194 const auto perf_results = GetAndResetPerfStats();
186 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 195 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
187 perf_results.emulation_speed * 100.0); 196 perf_results.emulation_speed * 100.0);
188 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 197 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
189 perf_results.game_fps); 198 perf_results.game_fps);
190 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 199 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
191 perf_results.frametime * 1000.0); 200 perf_results.frametime * 1000.0);
192 201
193 is_powered_on = false; 202 is_powered_on = false;
194 203
@@ -197,6 +206,7 @@ struct System::Impl {
197 GDBStub::Shutdown(); 206 GDBStub::Shutdown();
198 Service::Shutdown(); 207 Service::Shutdown();
199 service_manager.reset(); 208 service_manager.reset();
209 cheat_engine.reset();
200 telemetry_session.reset(); 210 telemetry_session.reset();
201 gpu_core.reset(); 211 gpu_core.reset();
202 212
@@ -205,7 +215,7 @@ struct System::Impl {
205 215
206 // Shutdown kernel and core timing 216 // Shutdown kernel and core timing
207 kernel.Shutdown(); 217 kernel.Shutdown();
208 CoreTiming::Shutdown(); 218 core_timing.Shutdown();
209 219
210 // Close app loader 220 // Close app loader
211 app_loader.reset(); 221 app_loader.reset();
@@ -232,9 +242,10 @@ struct System::Impl {
232 } 242 }
233 243
234 PerfStatsResults GetAndResetPerfStats() { 244 PerfStatsResults GetAndResetPerfStats() {
235 return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs()); 245 return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
236 } 246 }
237 247
248 Timing::CoreTiming core_timing;
238 Kernel::KernelCore kernel; 249 Kernel::KernelCore kernel;
239 /// RealVfsFilesystem instance 250 /// RealVfsFilesystem instance
240 FileSys::VirtualFilesystem virtual_filesystem; 251 FileSys::VirtualFilesystem virtual_filesystem;
@@ -246,6 +257,8 @@ struct System::Impl {
246 CpuCoreManager cpu_core_manager; 257 CpuCoreManager cpu_core_manager;
247 bool is_powered_on = false; 258 bool is_powered_on = false;
248 259
260 std::unique_ptr<FileSys::CheatEngine> cheat_engine;
261
249 /// Frontend applets 262 /// Frontend applets
250 std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector; 263 std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector;
251 std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard; 264 std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
@@ -264,7 +277,7 @@ struct System::Impl {
264 Core::FrameLimiter frame_limiter; 277 Core::FrameLimiter frame_limiter;
265}; 278};
266 279
267System::System() : impl{std::make_unique<Impl>()} {} 280System::System() : impl{std::make_unique<Impl>(*this)} {}
268System::~System() = default; 281System::~System() = default;
269 282
270Cpu& System::CurrentCpuCore() { 283Cpu& System::CurrentCpuCore() {
@@ -396,6 +409,14 @@ const Kernel::KernelCore& System::Kernel() const {
396 return impl->kernel; 409 return impl->kernel;
397} 410}
398 411
412Timing::CoreTiming& System::CoreTiming() {
413 return impl->core_timing;
414}
415
416const Timing::CoreTiming& System::CoreTiming() const {
417 return impl->core_timing;
418}
419
399Core::PerfStats& System::GetPerfStats() { 420Core::PerfStats& System::GetPerfStats() {
400 return impl->perf_stats; 421 return impl->perf_stats;
401} 422}
@@ -436,6 +457,13 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
436 return impl->debug_context.get(); 457 return impl->debug_context.get();
437} 458}
438 459
460void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
461 const std::string& build_id, VAddr code_region_start,
462 VAddr code_region_end) {
463 impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
464 code_region_start, code_region_end);
465}
466
439void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) { 467void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
440 impl->virtual_filesystem = std::move(vfs); 468 impl->virtual_filesystem = std::move(vfs);
441} 469}
diff --git a/src/core/core.h b/src/core/core.h
index 511a5ad3a..4d83b93cc 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -20,6 +20,7 @@ class WebBrowserApplet;
20} // namespace Core::Frontend 20} // namespace Core::Frontend
21 21
22namespace FileSys { 22namespace FileSys {
23class CheatList;
23class VfsFilesystem; 24class VfsFilesystem;
24} // namespace FileSys 25} // namespace FileSys
25 26
@@ -47,6 +48,10 @@ namespace VideoCore {
47class RendererBase; 48class RendererBase;
48} // namespace VideoCore 49} // namespace VideoCore
49 50
51namespace Core::Timing {
52class CoreTiming;
53}
54
50namespace Core { 55namespace Core {
51 56
52class ARM_Interface; 57class ARM_Interface;
@@ -205,6 +210,12 @@ public:
205 /// Provides a constant pointer to the current process. 210 /// Provides a constant pointer to the current process.
206 const Kernel::Process* CurrentProcess() const; 211 const Kernel::Process* CurrentProcess() const;
207 212
213 /// Provides a reference to the core timing instance.
214 Timing::CoreTiming& CoreTiming();
215
216 /// Provides a constant reference to the core timing instance.
217 const Timing::CoreTiming& CoreTiming() const;
218
208 /// Provides a reference to the kernel instance. 219 /// Provides a reference to the kernel instance.
209 Kernel::KernelCore& Kernel(); 220 Kernel::KernelCore& Kernel();
210 221
@@ -243,6 +254,9 @@ public:
243 254
244 std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const; 255 std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
245 256
257 void RegisterCheatList(const std::vector<FileSys::CheatList>& list, const std::string& build_id,
258 VAddr code_region_start, VAddr code_region_end);
259
246 void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet); 260 void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
247 261
248 const Frontend::ProfileSelectApplet& GetProfileSelector() const; 262 const Frontend::ProfileSelectApplet& GetProfileSelector() const;
@@ -283,10 +297,6 @@ inline ARM_Interface& CurrentArmInterface() {
283 return System::GetInstance().CurrentArmInterface(); 297 return System::GetInstance().CurrentArmInterface();
284} 298}
285 299
286inline TelemetrySession& Telemetry() {
287 return System::GetInstance().TelemetrySession();
288}
289
290inline Kernel::Process* CurrentProcess() { 300inline Kernel::Process* CurrentProcess() {
291 return System::GetInstance().CurrentProcess(); 301 return System::GetInstance().CurrentProcess();
292} 302}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index fffda8a99..e75741db0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -21,7 +22,7 @@
21namespace Core { 22namespace Core {
22 23
23void CpuBarrier::NotifyEnd() { 24void CpuBarrier::NotifyEnd() {
24 std::unique_lock<std::mutex> lock(mutex); 25 std::unique_lock lock{mutex};
25 end = true; 26 end = true;
26 condition.notify_all(); 27 condition.notify_all();
27} 28}
@@ -33,7 +34,7 @@ bool CpuBarrier::Rendezvous() {
33 } 34 }
34 35
35 if (!end) { 36 if (!end) {
36 std::unique_lock<std::mutex> lock(mutex); 37 std::unique_lock lock{mutex};
37 38
38 --cores_waiting; 39 --cores_waiting;
39 if (!cores_waiting) { 40 if (!cores_waiting) {
@@ -49,20 +50,21 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index) 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 : cpu_barrier{cpu_barrier}, core_index{core_index} { 54 std::size_t core_index)
55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
54 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
55#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
56 arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
57#else 59#else
58 arm_interface = std::make_unique<ARM_Unicorn>(); 60 arm_interface = std::make_unique<ARM_Unicorn>();
59 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 61 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
60#endif 62#endif
61 } else { 63 } else {
62 arm_interface = std::make_unique<ARM_Unicorn>(); 64 arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
63 } 65 }
64 66
65 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
66} 68}
67 69
68Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
@@ -93,14 +95,14 @@ void Cpu::RunLoop(bool tight_loop) {
93 95
94 if (IsMainCore()) { 96 if (IsMainCore()) {
95 // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. 97 // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
96 CoreTiming::Idle(); 98 core_timing.Idle();
97 CoreTiming::Advance(); 99 core_timing.Advance();
98 } 100 }
99 101
100 PrepareReschedule(); 102 PrepareReschedule();
101 } else { 103 } else {
102 if (IsMainCore()) { 104 if (IsMainCore()) {
103 CoreTiming::Advance(); 105 core_timing.Advance();
104 } 106 }
105 107
106 if (tight_loop) { 108 if (tight_loop) {
@@ -129,7 +131,7 @@ void Cpu::Reschedule() {
129 131
130 reschedule_pending = false; 132 reschedule_pending = false;
131 // Lock the global kernel mutex when we manipulate the HLE state 133 // Lock the global kernel mutex when we manipulate the HLE state
132 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 134 std::lock_guard lock{HLE::g_hle_lock};
133 scheduler->Reschedule(); 135 scheduler->Reschedule();
134} 136}
135 137
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 1d2bdc6cd..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -16,6 +16,14 @@ class Scheduler;
16} 16}
17 17
18namespace Core { 18namespace Core {
19class System;
20}
21
22namespace Core::Timing {
23class CoreTiming;
24}
25
26namespace Core {
19 27
20class ARM_Interface; 28class ARM_Interface;
21class ExclusiveMonitor; 29class ExclusiveMonitor;
@@ -41,7 +49,8 @@ private:
41 49
42class Cpu { 50class Cpu {
43public: 51public:
44 Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index); 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 std::size_t core_index);
45 ~Cpu(); 54 ~Cpu();
46 55
47 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
@@ -82,6 +91,7 @@ private:
82 std::unique_ptr<ARM_Interface> arm_interface; 91 std::unique_ptr<ARM_Interface> arm_interface;
83 CpuBarrier& cpu_barrier; 92 CpuBarrier& cpu_barrier;
84 std::unique_ptr<Kernel::Scheduler> scheduler; 93 std::unique_ptr<Kernel::Scheduler> scheduler;
94 Timing::CoreTiming& core_timing;
85 95
86 std::atomic<bool> reschedule_pending = false; 96 std::atomic<bool> reschedule_pending = false;
87 std::size_t core_index; 97 std::size_t core_index;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 7953c8720..41adb2302 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,149 +8,98 @@
8#include <mutex> 8#include <mutex>
9#include <string> 9#include <string>
10#include <tuple> 10#include <tuple>
11#include <unordered_map> 11
12#include <vector>
13#include "common/assert.h" 12#include "common/assert.h"
14#include "common/thread.h" 13#include "common/thread.h"
15#include "common/threadsafe_queue.h"
16#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
17 15
18namespace CoreTiming { 16namespace Core::Timing {
19
20static s64 global_timer;
21static int slice_length;
22static int downcount;
23 17
24struct EventType { 18constexpr int MAX_SLICE_LENGTH = 20000;
25 TimedCallback callback;
26 const std::string* name;
27};
28 19
29struct Event { 20struct CoreTiming::Event {
30 s64 time; 21 s64 time;
31 u64 fifo_order; 22 u64 fifo_order;
32 u64 userdata; 23 u64 userdata;
33 const EventType* type; 24 const EventType* type;
34};
35
36// Sort by time, unless the times are the same, in which case sort by the order added to the queue
37static bool operator>(const Event& left, const Event& right) {
38 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
39}
40
41static bool operator<(const Event& left, const Event& right) {
42 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
43}
44
45// unordered_map stores each element separately as a linked list node so pointers to elements
46// remain stable regardless of rehashes/resizing.
47static std::unordered_map<std::string, EventType> event_types;
48 25
49// The queue is a min-heap using std::make_heap/push_heap/pop_heap. 26 // Sort by time, unless the times are the same, in which case sort by
50// We don't use std::priority_queue because we need to be able to serialize, unserialize and 27 // the order added to the queue
51// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated 28 friend bool operator>(const Event& left, const Event& right) {
52// by the standard adaptor class. 29 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
53static std::vector<Event> event_queue; 30 }
54static u64 event_fifo_id;
55// the queue for storing the events from other threads threadsafe until they will be added
56// to the event_queue by the emu thread
57static Common::MPSCQueue<Event, false> ts_queue;
58
59// the queue for unscheduling the events from other threads threadsafe
60static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
61
62constexpr int MAX_SLICE_LENGTH = 20000;
63
64static s64 idled_cycles;
65
66// Are we in a function that has been called from Advance()
67// If events are sheduled from a function that gets called from Advance(),
68// don't change slice_length and downcount.
69static bool is_global_timer_sane;
70
71static EventType* ev_lost = nullptr;
72
73static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
74
75EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
76 // check for existing type with same name.
77 // we want event type names to remain unique so that we can use them for serialization.
78 ASSERT_MSG(event_types.find(name) == event_types.end(),
79 "CoreTiming Event \"{}\" is already registered. Events should only be registered "
80 "during Init to avoid breaking save states.",
81 name.c_str());
82 31
83 auto info = event_types.emplace(name, EventType{callback, nullptr}); 32 friend bool operator<(const Event& left, const Event& right) {
84 EventType* event_type = &info.first->second; 33 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
85 event_type->name = &info.first->first; 34 }
86 return event_type; 35};
87}
88 36
89void UnregisterAllEvents() { 37CoreTiming::CoreTiming() = default;
90 ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending"); 38CoreTiming::~CoreTiming() = default;
91 event_types.clear();
92}
93 39
94void Init() { 40void CoreTiming::Initialize() {
95 downcount = MAX_SLICE_LENGTH; 41 downcount = MAX_SLICE_LENGTH;
96 slice_length = MAX_SLICE_LENGTH; 42 slice_length = MAX_SLICE_LENGTH;
97 global_timer = 0; 43 global_timer = 0;
98 idled_cycles = 0; 44 idled_cycles = 0;
99 45
100 // The time between CoreTiming being intialized and the first call to Advance() is considered 46 // The time between CoreTiming being initialized and the first call to Advance() is considered
101 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before 47 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
102 // executing the first cycle of each slice to prepare the slice length and downcount for 48 // executing the first cycle of each slice to prepare the slice length and downcount for
103 // that slice. 49 // that slice.
104 is_global_timer_sane = true; 50 is_global_timer_sane = true;
105 51
106 event_fifo_id = 0; 52 event_fifo_id = 0;
107 ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); 53
54 const auto empty_timed_callback = [](u64, s64) {};
55 ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
108} 56}
109 57
110void Shutdown() { 58void CoreTiming::Shutdown() {
111 MoveEvents(); 59 MoveEvents();
112 ClearPendingEvents(); 60 ClearPendingEvents();
113 UnregisterAllEvents(); 61 UnregisterAllEvents();
114} 62}
115 63
116// This should only be called from the CPU thread. If you are calling 64EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
117// it from any other thread, you are doing something evil 65 // check for existing type with same name.
118u64 GetTicks() { 66 // we want event type names to remain unique so that we can use them for serialization.
119 u64 ticks = static_cast<u64>(global_timer); 67 ASSERT_MSG(event_types.find(name) == event_types.end(),
120 if (!is_global_timer_sane) { 68 "CoreTiming Event \"{}\" is already registered. Events should only be registered "
121 ticks += slice_length - downcount; 69 "during Init to avoid breaking save states.",
122 } 70 name.c_str());
123 return ticks;
124}
125
126void AddTicks(u64 ticks) {
127 downcount -= static_cast<int>(ticks);
128}
129 71
130u64 GetIdleTicks() { 72 auto info = event_types.emplace(name, EventType{callback, nullptr});
131 return static_cast<u64>(idled_cycles); 73 EventType* event_type = &info.first->second;
74 event_type->name = &info.first->first;
75 return event_type;
132} 76}
133 77
134void ClearPendingEvents() { 78void CoreTiming::UnregisterAllEvents() {
135 event_queue.clear(); 79 ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
80 event_types.clear();
136} 81}
137 82
138void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 83void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
139 ASSERT(event_type != nullptr); 84 ASSERT(event_type != nullptr);
140 s64 timeout = GetTicks() + cycles_into_future; 85 const s64 timeout = GetTicks() + cycles_into_future;
86
141 // If this event needs to be scheduled before the next advance(), force one early 87 // If this event needs to be scheduled before the next advance(), force one early
142 if (!is_global_timer_sane) 88 if (!is_global_timer_sane) {
143 ForceExceptionCheck(cycles_into_future); 89 ForceExceptionCheck(cycles_into_future);
90 }
91
144 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); 92 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
145 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 93 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
146} 94}
147 95
148void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 96void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
97 u64 userdata) {
149 ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type}); 98 ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
150} 99}
151 100
152void UnscheduleEvent(const EventType* event_type, u64 userdata) { 101void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
153 auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { 102 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
154 return e.type == event_type && e.userdata == userdata; 103 return e.type == event_type && e.userdata == userdata;
155 }); 104 });
156 105
@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
161 } 110 }
162} 111}
163 112
164void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) { 113void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
165 unschedule_queue.Push(std::make_pair(event_type, userdata)); 114 unschedule_queue.Push(std::make_pair(event_type, userdata));
166} 115}
167 116
168void RemoveEvent(const EventType* event_type) { 117u64 CoreTiming::GetTicks() const {
169 auto itr = std::remove_if(event_queue.begin(), event_queue.end(), 118 u64 ticks = static_cast<u64>(global_timer);
170 [&](const Event& e) { return e.type == event_type; }); 119 if (!is_global_timer_sane) {
120 ticks += slice_length - downcount;
121 }
122 return ticks;
123}
124
125u64 CoreTiming::GetIdleTicks() const {
126 return static_cast<u64>(idled_cycles);
127}
128
129void CoreTiming::AddTicks(u64 ticks) {
130 downcount -= static_cast<int>(ticks);
131}
132
133void CoreTiming::ClearPendingEvents() {
134 event_queue.clear();
135}
136
137void CoreTiming::RemoveEvent(const EventType* event_type) {
138 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
139 [&](const Event& e) { return e.type == event_type; });
171 140
172 // Removing random items breaks the invariant so we have to re-establish it. 141 // Removing random items breaks the invariant so we have to re-establish it.
173 if (itr != event_queue.end()) { 142 if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
176 } 145 }
177} 146}
178 147
179void RemoveNormalAndThreadsafeEvent(const EventType* event_type) { 148void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
180 MoveEvents(); 149 MoveEvents();
181 RemoveEvent(event_type); 150 RemoveEvent(event_type);
182} 151}
183 152
184void ForceExceptionCheck(s64 cycles) { 153void CoreTiming::ForceExceptionCheck(s64 cycles) {
185 cycles = std::max<s64>(0, cycles); 154 cycles = std::max<s64>(0, cycles);
186 if (downcount > cycles) { 155 if (downcount <= cycles) {
187 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int 156 return;
188 // here. Account for cycles already executed by adjusting the g.slice_length
189 slice_length -= downcount - static_cast<int>(cycles);
190 downcount = static_cast<int>(cycles);
191 } 157 }
158
159 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
160 // here. Account for cycles already executed by adjusting the g.slice_length
161 slice_length -= downcount - static_cast<int>(cycles);
162 downcount = static_cast<int>(cycles);
192} 163}
193 164
194void MoveEvents() { 165void CoreTiming::MoveEvents() {
195 for (Event ev; ts_queue.Pop(ev);) { 166 for (Event ev; ts_queue.Pop(ev);) {
196 ev.fifo_order = event_fifo_id++; 167 ev.fifo_order = event_fifo_id++;
197 event_queue.emplace_back(std::move(ev)); 168 event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
199 } 170 }
200} 171}
201 172
202void Advance() { 173void CoreTiming::Advance() {
203 MoveEvents(); 174 MoveEvents();
204 for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) { 175 for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
205 UnscheduleEvent(ev.first, ev.second); 176 UnscheduleEvent(ev.first, ev.second);
206 } 177 }
207 178
208 int cycles_executed = slice_length - downcount; 179 const int cycles_executed = slice_length - downcount;
209 global_timer += cycles_executed; 180 global_timer += cycles_executed;
210 slice_length = MAX_SLICE_LENGTH; 181 slice_length = MAX_SLICE_LENGTH;
211 182
@@ -215,7 +186,7 @@ void Advance() {
215 Event evt = std::move(event_queue.front()); 186 Event evt = std::move(event_queue.front());
216 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 187 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
217 event_queue.pop_back(); 188 event_queue.pop_back();
218 evt.type->callback(evt.userdata, static_cast<int>(global_timer - evt.time)); 189 evt.type->callback(evt.userdata, global_timer - evt.time);
219 } 190 }
220 191
221 is_global_timer_sane = false; 192 is_global_timer_sane = false;
@@ -229,17 +200,17 @@ void Advance() {
229 downcount = slice_length; 200 downcount = slice_length;
230} 201}
231 202
232void Idle() { 203void CoreTiming::Idle() {
233 idled_cycles += downcount; 204 idled_cycles += downcount;
234 downcount = 0; 205 downcount = 0;
235} 206}
236 207
237std::chrono::microseconds GetGlobalTimeUs() { 208std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
238 return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; 209 return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
239} 210}
240 211
241int GetDowncount() { 212int CoreTiming::GetDowncount() const {
242 return downcount; 213 return downcount;
243} 214}
244 215
245} // namespace CoreTiming 216} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 9ed757bd7..9d2efde37 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
8#include <functional>
9#include <string>
10#include <unordered_map>
11#include <vector>
12#include "common/common_types.h"
13#include "common/threadsafe_queue.h"
14
15namespace Core::Timing {
16
17/// A callback that may be scheduled for a particular core timing event.
18using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
19
20/// Contains the characteristics of a particular event.
21struct EventType {
22 /// The event's callback function.
23 TimedCallback callback;
24 /// A pointer to the name of the event.
25 const std::string* name;
26};
27
7/** 28/**
8 * This is a system to schedule events into the emulated machine's future. Time is measured 29 * This is a system to schedule events into the emulated machine's future. Time is measured
9 * in main CPU clock cycles. 30 * in main CPU clock cycles.
@@ -16,80 +37,120 @@
16 * inside callback: 37 * inside callback:
17 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever") 38 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
18 */ 39 */
19 40class CoreTiming {
20#include <chrono> 41public:
21#include <functional> 42 CoreTiming();
22#include <string> 43 ~CoreTiming();
23#include "common/common_types.h" 44
24 45 CoreTiming(const CoreTiming&) = delete;
25namespace CoreTiming { 46 CoreTiming(CoreTiming&&) = delete;
26 47
27struct EventType; 48 CoreTiming& operator=(const CoreTiming&) = delete;
28 49 CoreTiming& operator=(CoreTiming&&) = delete;
29using TimedCallback = std::function<void(u64 userdata, int cycles_late)>; 50
30 51 /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
31/** 52 /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
32 * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is 53 void Initialize();
33 * required to end slice -1 and start slice 0 before the first cycle of code is executed. 54
34 */ 55 /// Tears down all timing related functionality.
35void Init(); 56 void Shutdown();
36void Shutdown(); 57
37 58 /// Registers a core timing event with the given name and callback.
38/** 59 ///
39 * This should only be called from the emu thread, if you are calling it any other thread, you are 60 /// @param name The name of the core timing event to register.
40 * doing something evil 61 /// @param callback The callback to execute for the event.
41 */ 62 ///
42u64 GetTicks(); 63 /// @returns An EventType instance representing the registered event.
43u64 GetIdleTicks(); 64 ///
44void AddTicks(u64 ticks); 65 /// @pre The name of the event being registered must be unique among all
45 66 /// registered events.
46/** 67 ///
47 * Returns the event_type identifier. if name is not unique, it will assert. 68 EventType* RegisterEvent(const std::string& name, TimedCallback callback);
48 */ 69
49EventType* RegisterEvent(const std::string& name, TimedCallback callback); 70 /// Unregisters all registered events thus far.
50void UnregisterAllEvents(); 71 void UnregisterAllEvents();
51 72
52/** 73 /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
53 * After the first Advance, the slice lengths and the downcount will be reduced whenever an event 74 /// event is scheduled earlier than the current values.
54 * is scheduled earlier than the current values. 75 ///
55 * Scheduling from a callback will not update the downcount until the Advance() completes. 76 /// Scheduling from a callback will not update the downcount until the Advance() completes.
56 */ 77 void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
57void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0); 78
58 79 /// This is to be called when outside of hle threads, such as the graphics thread, wants to
59/** 80 /// schedule things to be executed on the main thread.
60 * This is to be called when outside of hle threads, such as the graphics thread, wants to 81 ///
61 * schedule things to be executed on the main thread. 82 /// @note This doesn't change slice_length and thus events scheduled by this might be
62 * Not that this doesn't change slice_length and thus events scheduled by this might be called 83 /// called with a delay of up to MAX_SLICE_LENGTH
63 * with a delay of up to MAX_SLICE_LENGTH 84 void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
64 */ 85 u64 userdata = 0);
65void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata); 86
66 87 void UnscheduleEvent(const EventType* event_type, u64 userdata);
67void UnscheduleEvent(const EventType* event_type, u64 userdata); 88 void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
68void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata); 89
69 90 /// We only permit one event of each type in the queue at a time.
70/// We only permit one event of each type in the queue at a time. 91 void RemoveEvent(const EventType* event_type);
71void RemoveEvent(const EventType* event_type); 92 void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
72void RemoveNormalAndThreadsafeEvent(const EventType* event_type); 93
73 94 void ForceExceptionCheck(s64 cycles);
74/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends 95
75 * the previous timing slice and begins the next one, you must Advance from the previous 96 /// This should only be called from the emu thread, if you are calling it any other thread,
76 * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an 97 /// you are doing something evil
77 * Advance() is required to initialize the slice length before the first cycle of emulated 98 u64 GetTicks() const;
78 * instructions is executed. 99
79 */ 100 u64 GetIdleTicks() const;
80void Advance(); 101
81void MoveEvents(); 102 void AddTicks(u64 ticks);
82 103
83/// Pretend that the main CPU has executed enough cycles to reach the next event. 104 /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
84void Idle(); 105 /// the previous timing slice and begins the next one, you must Advance from the previous
85 106 /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
86/// Clear all pending events. This should ONLY be done on exit. 107 /// Advance() is required to initialize the slice length before the first cycle of emulated
87void ClearPendingEvents(); 108 /// instructions is executed.
88 109 void Advance();
89void ForceExceptionCheck(s64 cycles); 110
90 111 /// Pretend that the main CPU has executed enough cycles to reach the next event.
91std::chrono::microseconds GetGlobalTimeUs(); 112 void Idle();
92 113
93int GetDowncount(); 114 std::chrono::microseconds GetGlobalTimeUs() const;
94 115
95} // namespace CoreTiming 116 int GetDowncount() const;
117
118private:
119 struct Event;
120
121 /// Clear all pending events. This should ONLY be done on exit.
122 void ClearPendingEvents();
123 void MoveEvents();
124
125 s64 global_timer = 0;
126 s64 idled_cycles = 0;
127 int slice_length = 0;
128 int downcount = 0;
129
130 // Are we in a function that has been called from Advance()
131 // If events are scheduled from a function that gets called from Advance(),
132 // don't change slice_length and downcount.
133 bool is_global_timer_sane = false;
134
135 // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
136 // We don't use std::priority_queue because we need to be able to serialize, unserialize and
137 // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
138 // accomodated by the standard adaptor class.
139 std::vector<Event> event_queue;
140 u64 event_fifo_id = 0;
141
142 // Stores each element separately as a linked list node so pointers to elements
143 // remain stable regardless of rehashes/resizing.
144 std::unordered_map<std::string, EventType> event_types;
145
146 // The queue for storing the events from other threads threadsafe until they will be added
147 // to the event_queue by the emu thread
148 Common::MPSCQueue<Event> ts_queue;
149
150 // The queue for unscheduling the events from other threads threadsafe
151 Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
152
153 EventType* ev_lost = nullptr;
154};
155
156} // namespace Core::Timing
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 73dea4edb..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,8 +7,9 @@
7#include <cinttypes> 7#include <cinttypes>
8#include <limits> 8#include <limits>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/uint128.h"
10 11
11namespace CoreTiming { 12namespace Core::Timing {
12 13
13constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE; 14constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
14 15
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
60 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; 61 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
61} 62}
62 63
63} // namespace CoreTiming 64u64 CpuCyclesToClockCycles(u64 ticks) {
65 const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
66 return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
67}
68
69} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 5c3718782..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,11 +6,12 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9namespace CoreTiming { 9namespace Core::Timing {
10 10
11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz 11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
12// The exact value used is of course unverified. 12// The exact value used is of course unverified.
13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked 13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
14constexpr u64 CNTFREQ = 19200000; // Value from fusee.
14 15
15inline s64 msToCycles(int ms) { 16inline s64 msToCycles(int ms) {
16 // since ms is int there is no way to overflow 17 // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
61 return cycles * 1000 / BASE_CLOCK_RATE; 62 return cycles * 1000 / BASE_CLOCK_RATE;
62} 63}
63 64
64} // namespace CoreTiming 65u64 CpuCyclesToClockCycles(u64 ticks);
66
67} // namespace Core::Timing
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 769a6fefa..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index); 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 } 31 }
32 32
33 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index ca12fb4ab..dfac9a4b3 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
398} 398}
399 399
400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) { 400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
401 std::ifstream file(filename); 401 std::ifstream file;
402 OpenFStream(file, filename, std::ios_base::in);
402 if (!file.is_open()) 403 if (!file.is_open())
403 return; 404 return;
404 405
diff --git a/src/core/file_sys/cheat_engine.cpp b/src/core/file_sys/cheat_engine.cpp
new file mode 100644
index 000000000..b06c2f20a
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.cpp
@@ -0,0 +1,492 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <locale>
6#include "common/hex_util.h"
7#include "common/microprofile.h"
8#include "common/swap.h"
9#include "core/core.h"
10#include "core/core_timing.h"
11#include "core/core_timing_util.h"
12#include "core/file_sys/cheat_engine.h"
13#include "core/hle/kernel/process.h"
14#include "core/hle/service/hid/controllers/npad.h"
15#include "core/hle/service/hid/hid.h"
16#include "core/hle/service/sm/sm.h"
17
18namespace FileSys {
19
20constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
21constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
22
23u64 Cheat::Address() const {
24 u64 out;
25 std::memcpy(&out, raw.data(), sizeof(u64));
26 return Common::swap64(out) & 0xFFFFFFFFFF;
27}
28
29u64 Cheat::ValueWidth(u64 offset) const {
30 return Value(offset, width);
31}
32
33u64 Cheat::Value(u64 offset, u64 width) const {
34 u64 out;
35 std::memcpy(&out, raw.data() + offset, sizeof(u64));
36 out = Common::swap64(out);
37 if (width == 8)
38 return out;
39 return out & ((1ull << (width * CHAR_BIT)) - 1);
40}
41
42u32 Cheat::KeypadValue() const {
43 u32 out;
44 std::memcpy(&out, raw.data(), sizeof(u32));
45 return Common::swap32(out) & 0x0FFFFFFF;
46}
47
48void CheatList::SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end,
49 VAddr heap_end, MemoryWriter writer, MemoryReader reader) {
50 this->main_region_begin = main_begin;
51 this->main_region_end = main_end;
52 this->heap_region_begin = heap_begin;
53 this->heap_region_end = heap_end;
54 this->writer = writer;
55 this->reader = reader;
56}
57
58MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
59
60void CheatList::Execute() {
61 MICROPROFILE_SCOPE(Cheat_Engine);
62
63 std::fill(scratch.begin(), scratch.end(), 0);
64 in_standard = false;
65 for (std::size_t i = 0; i < master_list.size(); ++i) {
66 LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, master_list[i].first);
67 current_block = i;
68 ExecuteBlock(master_list[i].second);
69 }
70
71 in_standard = true;
72 for (std::size_t i = 0; i < standard_list.size(); ++i) {
73 LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, standard_list[i].first);
74 current_block = i;
75 ExecuteBlock(standard_list[i].second);
76 }
77}
78
79CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
80 : master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}
81
82bool CheatList::EvaluateConditional(const Cheat& cheat) const {
83 using ComparisonFunction = bool (*)(u64, u64);
84 constexpr std::array<ComparisonFunction, 6> comparison_functions{
85 [](u64 a, u64 b) { return a > b; }, [](u64 a, u64 b) { return a >= b; },
86 [](u64 a, u64 b) { return a < b; }, [](u64 a, u64 b) { return a <= b; },
87 [](u64 a, u64 b) { return a == b; }, [](u64 a, u64 b) { return a != b; },
88 };
89
90 if (cheat.type == CodeType::ConditionalInput) {
91 const auto applet_resource =
92 system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
93 if (applet_resource == nullptr) {
94 LOG_WARNING(
95 Common_Filesystem,
96 "Attempted to evaluate input conditional, but applet resource is not initialized!");
97 return false;
98 }
99
100 const auto press_state =
101 applet_resource
102 ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)
103 .GetAndResetPressState();
104 return ((press_state & cheat.KeypadValue()) & KEYPAD_BITMASK) != 0;
105 }
106
107 ASSERT(cheat.type == CodeType::Conditional);
108
109 const auto offset =
110 cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
111 ASSERT(static_cast<u8>(cheat.comparison_op.Value()) < 6);
112 auto* function = comparison_functions[static_cast<u8>(cheat.comparison_op.Value())];
113 const auto addr = cheat.Address() + offset;
114
115 return function(reader(cheat.width, SanitizeAddress(addr)), cheat.ValueWidth(8));
116}
117
118void CheatList::ProcessBlockPairs(const Block& block) {
119 block_pairs.clear();
120
121 u64 scope = 0;
122 std::map<u64, u64> pairs;
123
124 for (std::size_t i = 0; i < block.size(); ++i) {
125 const auto& cheat = block[i];
126
127 switch (cheat.type) {
128 case CodeType::Conditional:
129 case CodeType::ConditionalInput:
130 pairs.insert_or_assign(scope, i);
131 ++scope;
132 break;
133 case CodeType::EndConditional: {
134 --scope;
135 const auto idx = pairs.at(scope);
136 block_pairs.insert_or_assign(idx, i);
137 break;
138 }
139 case CodeType::Loop: {
140 if (cheat.end_of_loop) {
141 --scope;
142 const auto idx = pairs.at(scope);
143 block_pairs.insert_or_assign(idx, i);
144 } else {
145 pairs.insert_or_assign(scope, i);
146 ++scope;
147 }
148 break;
149 }
150 }
151 }
152}
153
154void CheatList::WriteImmediate(const Cheat& cheat) {
155 const auto offset =
156 cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
157 const auto& register_3 = scratch.at(cheat.register_3);
158
159 const auto addr = cheat.Address() + offset + register_3;
160 LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}", addr,
161 cheat.Value(8, cheat.width));
162 writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(8));
163}
164
165void CheatList::BeginConditional(const Cheat& cheat) {
166 if (EvaluateConditional(cheat)) {
167 return;
168 }
169
170 const auto iter = block_pairs.find(current_index);
171 ASSERT(iter != block_pairs.end());
172 current_index = iter->second - 1;
173}
174
175void CheatList::EndConditional(const Cheat& cheat) {
176 LOG_DEBUG(Common_Filesystem, "Ending conditional block.");
177}
178
179void CheatList::Loop(const Cheat& cheat) {
180 if (cheat.end_of_loop.Value())
181 ASSERT(!cheat.end_of_loop.Value());
182
183 auto& register_3 = scratch.at(cheat.register_3);
184 const auto iter = block_pairs.find(current_index);
185 ASSERT(iter != block_pairs.end());
186 ASSERT(iter->first < iter->second);
187
188 const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
189 for (s32 i = initial_value; i >= 0; --i) {
190 register_3 = static_cast<u64>(i);
191 for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
192 current_index = c;
193 ExecuteSingleCheat(
194 (in_standard ? standard_list : master_list)[current_block].second[c]);
195 }
196 }
197
198 current_index = iter->second;
199}
200
201void CheatList::LoadImmediate(const Cheat& cheat) {
202 auto& register_3 = scratch.at(cheat.register_3);
203
204 LOG_DEBUG(Common_Filesystem, "setting register={:01X} equal to value={:016X}", cheat.register_3,
205 cheat.Value(4, 8));
206 register_3 = cheat.Value(4, 8);
207}
208
209void CheatList::LoadIndexed(const Cheat& cheat) {
210 const auto offset =
211 cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
212 auto& register_3 = scratch.at(cheat.register_3);
213
214 const auto addr = (cheat.load_from_register.Value() ? register_3 : offset) + cheat.Address();
215 LOG_DEBUG(Common_Filesystem, "writing indexed value to register={:01X}, addr={:016X}",
216 cheat.register_3, addr);
217 register_3 = reader(cheat.width, SanitizeAddress(addr));
218}
219
220void CheatList::StoreIndexed(const Cheat& cheat) {
221 const auto& register_3 = scratch.at(cheat.register_3);
222
223 const auto addr =
224 register_3 + (cheat.add_additional_register.Value() ? scratch.at(cheat.register_6) : 0);
225 LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}",
226 cheat.Value(4, cheat.width), addr);
227 writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(4));
228}
229
230void CheatList::RegisterArithmetic(const Cheat& cheat) {
231 using ArithmeticFunction = u64 (*)(u64, u64);
232 constexpr std::array<ArithmeticFunction, 5> arithmetic_functions{
233 [](u64 a, u64 b) { return a + b; }, [](u64 a, u64 b) { return a - b; },
234 [](u64 a, u64 b) { return a * b; }, [](u64 a, u64 b) { return a << b; },
235 [](u64 a, u64 b) { return a >> b; },
236 };
237
238 using ArithmeticOverflowCheck = bool (*)(u64, u64);
239 constexpr std::array<ArithmeticOverflowCheck, 5> arithmetic_overflow_checks{
240 [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() - b); }, // a + b
241 [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() + b); }, // a - b
242 [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() / b); }, // a * b
243 [](u64 a, u64 b) { return b >= 64 || (a & ~((1ull << (64 - b)) - 1)) != 0; }, // a << b
244 [](u64 a, u64 b) { return b >= 64 || (a & ((1ull << b) - 1)) != 0; }, // a >> b
245 };
246
247 static_assert(sizeof(arithmetic_functions) == sizeof(arithmetic_overflow_checks),
248 "Missing or have extra arithmetic overflow checks compared to functions!");
249
250 auto& register_3 = scratch.at(cheat.register_3);
251
252 ASSERT(static_cast<u8>(cheat.arithmetic_op.Value()) < 5);
253 auto* function = arithmetic_functions[static_cast<u8>(cheat.arithmetic_op.Value())];
254 auto* overflow_function =
255 arithmetic_overflow_checks[static_cast<u8>(cheat.arithmetic_op.Value())];
256 LOG_DEBUG(Common_Filesystem, "performing arithmetic with register={:01X}, value={:016X}",
257 cheat.register_3, cheat.ValueWidth(4));
258
259 if (overflow_function(register_3, cheat.ValueWidth(4))) {
260 LOG_WARNING(Common_Filesystem,
261 "overflow will occur when performing arithmetic operation={:02X} with operands "
262 "a={:016X}, b={:016X}!",
263 static_cast<u8>(cheat.arithmetic_op.Value()), register_3, cheat.ValueWidth(4));
264 }
265
266 register_3 = function(register_3, cheat.ValueWidth(4));
267}
268
269void CheatList::BeginConditionalInput(const Cheat& cheat) {
270 if (EvaluateConditional(cheat))
271 return;
272
273 const auto iter = block_pairs.find(current_index);
274 ASSERT(iter != block_pairs.end());
275 current_index = iter->second - 1;
276}
277
278VAddr CheatList::SanitizeAddress(VAddr in) const {
279 if ((in < main_region_begin || in >= main_region_end) &&
280 (in < heap_region_begin || in >= heap_region_end)) {
281 LOG_ERROR(Common_Filesystem,
282 "Cheat attempting to access memory at invalid address={:016X}, if this persists, "
283 "the cheat may be incorrect. However, this may be normal early in execution if "
284 "the game has not properly set up yet.",
285 in);
286 return 0; ///< Invalid addresses will hard crash
287 }
288
289 return in;
290}
291
292void CheatList::ExecuteSingleCheat(const Cheat& cheat) {
293 using CheatOperationFunction = void (CheatList::*)(const Cheat&);
294 constexpr std::array<CheatOperationFunction, 9> cheat_operation_functions{
295 &CheatList::WriteImmediate, &CheatList::BeginConditional,
296 &CheatList::EndConditional, &CheatList::Loop,
297 &CheatList::LoadImmediate, &CheatList::LoadIndexed,
298 &CheatList::StoreIndexed, &CheatList::RegisterArithmetic,
299 &CheatList::BeginConditionalInput,
300 };
301
302 const auto index = static_cast<u8>(cheat.type.Value());
303 ASSERT(index < sizeof(cheat_operation_functions));
304 const auto op = cheat_operation_functions[index];
305 (this->*op)(cheat);
306}
307
308void CheatList::ExecuteBlock(const Block& block) {
309 encountered_loops.clear();
310
311 ProcessBlockPairs(block);
312 for (std::size_t i = 0; i < block.size(); ++i) {
313 current_index = i;
314 ExecuteSingleCheat(block[i]);
315 i = current_index;
316 }
317}
318
319CheatParser::~CheatParser() = default;
320
321CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
322 CheatList::ProgramSegment standard) const {
323 return {system, std::move(master), std::move(standard)};
324}
325
326TextCheatParser::~TextCheatParser() = default;
327
328CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
329 std::stringstream ss;
330 ss.write(reinterpret_cast<const char*>(data.data()), data.size());
331
332 std::vector<std::string> lines;
333 std::string stream_line;
334 while (std::getline(ss, stream_line)) {
335 // Remove a trailing \r
336 if (!stream_line.empty() && stream_line.back() == '\r')
337 stream_line.pop_back();
338 lines.push_back(std::move(stream_line));
339 }
340
341 CheatList::ProgramSegment master_list;
342 CheatList::ProgramSegment standard_list;
343
344 for (std::size_t i = 0; i < lines.size(); ++i) {
345 auto line = lines[i];
346
347 if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
348 const auto master = line[0] == '{';
349 const auto begin = master ? line.find('{') : line.find('[');
350 const auto end = master ? line.rfind('}') : line.rfind(']');
351
352 ASSERT(begin != std::string::npos && end != std::string::npos);
353
354 const std::string patch_name{line.begin() + begin + 1, line.begin() + end};
355 CheatList::Block block{};
356
357 while (i < lines.size() - 1) {
358 line = lines[++i];
359 if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
360 --i;
361 break;
362 }
363
364 if (line.size() < 8)
365 continue;
366
367 Cheat out{};
368 out.raw = ParseSingleLineCheat(line);
369 block.push_back(out);
370 }
371
372 (master ? master_list : standard_list).emplace_back(patch_name, block);
373 }
374 }
375
376 return MakeCheatList(system, master_list, standard_list);
377}
378
379std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
380 std::array<u8, 16> out{};
381
382 if (line.size() < 8)
383 return out;
384
385 const auto word1 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data(), 8});
386 std::memcpy(out.data(), word1.data(), sizeof(u32));
387
388 if (line.size() < 17 || line[8] != ' ')
389 return out;
390
391 const auto word2 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 9, 8});
392 std::memcpy(out.data() + sizeof(u32), word2.data(), sizeof(u32));
393
394 if (line.size() < 26 || line[17] != ' ') {
395 // Perform shifting in case value is truncated early.
396 const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
397 if (type == CodeType::Loop || type == CodeType::LoadImmediate ||
398 type == CodeType::StoreIndexed || type == CodeType::RegisterArithmetic) {
399 std::memcpy(out.data() + 8, out.data() + 4, sizeof(u32));
400 std::memset(out.data() + 4, 0, sizeof(u32));
401 }
402
403 return out;
404 }
405
406 const auto word3 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 18, 8});
407 std::memcpy(out.data() + 2 * sizeof(u32), word3.data(), sizeof(u32));
408
409 if (line.size() < 35 || line[26] != ' ') {
410 // Perform shifting in case value is truncated early.
411 const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
412 if (type == CodeType::WriteImmediate || type == CodeType::Conditional) {
413 std::memcpy(out.data() + 12, out.data() + 8, sizeof(u32));
414 std::memset(out.data() + 8, 0, sizeof(u32));
415 }
416
417 return out;
418 }
419
420 const auto word4 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 27, 8});
421 std::memcpy(out.data() + 3 * sizeof(u32), word4.data(), sizeof(u32));
422
423 return out;
424}
425
426namespace {
427u64 MemoryReadImpl(u32 width, VAddr addr) {
428 switch (width) {
429 case 1:
430 return Memory::Read8(addr);
431 case 2:
432 return Memory::Read16(addr);
433 case 4:
434 return Memory::Read32(addr);
435 case 8:
436 return Memory::Read64(addr);
437 default:
438 UNREACHABLE();
439 return 0;
440 }
441}
442
443void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
444 switch (width) {
445 case 1:
446 Memory::Write8(addr, static_cast<u8>(value));
447 break;
448 case 2:
449 Memory::Write16(addr, static_cast<u16>(value));
450 break;
451 case 4:
452 Memory::Write32(addr, static_cast<u32>(value));
453 break;
454 case 8:
455 Memory::Write64(addr, value);
456 break;
457 default:
458 UNREACHABLE();
459 }
460}
461} // Anonymous namespace
462
463CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
464 const std::string& build_id, VAddr code_region_start,
465 VAddr code_region_end)
466 : cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
467 event = core_timing.RegisterEvent(
468 "CheatEngine::FrameCallback::" + build_id,
469 [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
470 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
471
472 const auto& vm_manager = system.CurrentProcess()->VMManager();
473 for (auto& list : this->cheats) {
474 list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
475 code_region_end, vm_manager.GetHeapRegionEndAddress(),
476 &MemoryWriteImpl, &MemoryReadImpl);
477 }
478}
479
480CheatEngine::~CheatEngine() {
481 core_timing.UnscheduleEvent(event, 0);
482}
483
484void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
485 for (auto& list : cheats) {
486 list.Execute();
487 }
488
489 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
490}
491
492} // namespace FileSys
diff --git a/src/core/file_sys/cheat_engine.h b/src/core/file_sys/cheat_engine.h
new file mode 100644
index 000000000..ac22a82cb
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.h
@@ -0,0 +1,234 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <set>
9#include <vector>
10#include "common/bit_field.h"
11#include "common/common_types.h"
12
13namespace Core {
14class System;
15}
16
17namespace Core::Timing {
18class CoreTiming;
19struct EventType;
20} // namespace Core::Timing
21
22namespace FileSys {
23
24enum class CodeType : u32 {
25 // 0TMR00AA AAAAAAAA YYYYYYYY YYYYYYYY
26 // Writes a T sized value Y to the address A added to the value of register R in memory domain M
27 WriteImmediate = 0,
28
29 // 1TMC00AA AAAAAAAA YYYYYYYY YYYYYYYY
30 // Compares the T sized value Y to the value at address A in memory domain M using the
31 // conditional function C. If success, continues execution. If failure, jumps to the matching
32 // EndConditional statement.
33 Conditional = 1,
34
35 // 20000000
36 // Terminates a Conditional or ConditionalInput block.
37 EndConditional = 2,
38
39 // 300R0000 VVVVVVVV
40 // Starts looping V times, storing the current count in register R.
41 // Loop block is terminated with a matching 310R0000.
42 Loop = 3,
43
44 // 400R0000 VVVVVVVV VVVVVVVV
45 // Sets the value of register R to the value V.
46 LoadImmediate = 4,
47
48 // 5TMRI0AA AAAAAAAA
49 // Sets the value of register R to the value of width T at address A in memory domain M, with
50 // the current value of R added to the address if I == 1.
51 LoadIndexed = 5,
52
53 // 6T0RIFG0 VVVVVVVV VVVVVVVV
54 // Writes the value V of width T to the memory address stored in register R. Adds the value of
55 // register G to the final calculation if F is nonzero. Increments the value of register R by T
56 // after operation if I is nonzero.
57 StoreIndexed = 6,
58
59 // 7T0RA000 VVVVVVVV
60 // Performs the arithmetic operation A on the value in register R and the value V of width T,
61 // storing the result in register R.
62 RegisterArithmetic = 7,
63
64 // 8KKKKKKK
65 // Checks to see if any of the buttons defined by the bitmask K are pressed. If any are,
66 // execution continues. If none are, execution skips to the next EndConditional command.
67 ConditionalInput = 8,
68};
69
70enum class MemoryType : u32 {
71 // Addressed relative to start of main NSO
72 MainNSO = 0,
73
74 // Addressed relative to start of heap
75 Heap = 1,
76};
77
78enum class ArithmeticOp : u32 {
79 Add = 0,
80 Sub = 1,
81 Mult = 2,
82 LShift = 3,
83 RShift = 4,
84};
85
86enum class ComparisonOp : u32 {
87 GreaterThan = 1,
88 GreaterThanEqual = 2,
89 LessThan = 3,
90 LessThanEqual = 4,
91 Equal = 5,
92 Inequal = 6,
93};
94
95union Cheat {
96 std::array<u8, 16> raw;
97
98 BitField<4, 4, CodeType> type;
99 BitField<0, 4, u32> width; // Can be 1, 2, 4, or 8. Measured in bytes.
100 BitField<0, 4, u32> end_of_loop;
101 BitField<12, 4, MemoryType> memory_type;
102 BitField<8, 4, u32> register_3;
103 BitField<8, 4, ComparisonOp> comparison_op;
104 BitField<20, 4, u32> load_from_register;
105 BitField<20, 4, u32> increment_register;
106 BitField<20, 4, ArithmeticOp> arithmetic_op;
107 BitField<16, 4, u32> add_additional_register;
108 BitField<28, 4, u32> register_6;
109
110 u64 Address() const;
111 u64 ValueWidth(u64 offset) const;
112 u64 Value(u64 offset, u64 width) const;
113 u32 KeypadValue() const;
114};
115
116class CheatParser;
117
118// Represents a full collection of cheats for a game. The Execute function should be called every
119// interval that all cheats should be executed. Clients should not directly instantiate this class
120// (hence private constructor), they should instead receive an instance from CheatParser, which
121// guarantees the list is always in an acceptable state.
122class CheatList {
123public:
124 friend class CheatParser;
125
126 using Block = std::vector<Cheat>;
127 using ProgramSegment = std::vector<std::pair<std::string, Block>>;
128
129 // (width in bytes, address, value)
130 using MemoryWriter = void (*)(u32, VAddr, u64);
131 // (width in bytes, address) -> value
132 using MemoryReader = u64 (*)(u32, VAddr);
133
134 void SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end, VAddr heap_end,
135 MemoryWriter writer, MemoryReader reader);
136
137 void Execute();
138
139private:
140 CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);
141
142 void ProcessBlockPairs(const Block& block);
143 void ExecuteSingleCheat(const Cheat& cheat);
144
145 void ExecuteBlock(const Block& block);
146
147 bool EvaluateConditional(const Cheat& cheat) const;
148
149 // Individual cheat operations
150 void WriteImmediate(const Cheat& cheat);
151 void BeginConditional(const Cheat& cheat);
152 void EndConditional(const Cheat& cheat);
153 void Loop(const Cheat& cheat);
154 void LoadImmediate(const Cheat& cheat);
155 void LoadIndexed(const Cheat& cheat);
156 void StoreIndexed(const Cheat& cheat);
157 void RegisterArithmetic(const Cheat& cheat);
158 void BeginConditionalInput(const Cheat& cheat);
159
160 VAddr SanitizeAddress(VAddr in) const;
161
162 // Master Codes are defined as codes that cannot be disabled and are run prior to all
163 // others.
164 ProgramSegment master_list;
165 // All other codes
166 ProgramSegment standard_list;
167
168 bool in_standard = false;
169
170 // 16 (0x0-0xF) scratch registers that can be used by cheats
171 std::array<u64, 16> scratch{};
172
173 MemoryWriter writer = nullptr;
174 MemoryReader reader = nullptr;
175
176 u64 main_region_begin{};
177 u64 heap_region_begin{};
178 u64 main_region_end{};
179 u64 heap_region_end{};
180
181 u64 current_block{};
182 // The current index of the cheat within the current Block
183 u64 current_index{};
184
185 // The 'stack' of the program. When a conditional or loop statement is encountered, its index is
186 // pushed onto this queue. When a end block is encountered, the condition is checked.
187 std::map<u64, u64> block_pairs;
188
189 std::set<u64> encountered_loops;
190
191 const Core::System* system;
192};
193
194// Intermediary class that parses a text file or other disk format for storing cheats into a
195// CheatList object, that can be used for execution.
196class CheatParser {
197public:
198 virtual ~CheatParser();
199
200 virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;
201
202protected:
203 CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
204 CheatList::ProgramSegment standard) const;
205};
206
207// CheatParser implementation that parses text files
208class TextCheatParser final : public CheatParser {
209public:
210 ~TextCheatParser() override;
211
212 CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;
213
214private:
215 std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
216};
217
218// Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
219class CheatEngine final {
220public:
221 CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
222 VAddr code_region_start, VAddr code_region_end);
223 ~CheatEngine();
224
225private:
226 void FrameCallback(u64 userdata, s64 cycles_late);
227
228 std::vector<CheatList> cheats;
229
230 Core::Timing::EventType* event;
231 Core::Timing::CoreTiming& core_timing;
232};
233
234} // namespace FileSys
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 5d4d05c82..15b9e6624 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -24,13 +24,26 @@ namespace FileSys {
24 24
25union NCASectionHeader; 25union NCASectionHeader;
26 26
27/// Describes the type of content within an NCA archive.
27enum class NCAContentType : u8 { 28enum class NCAContentType : u8 {
29 /// Executable-related data
28 Program = 0, 30 Program = 0,
31
32 /// Metadata.
29 Meta = 1, 33 Meta = 1,
34
35 /// Access control data.
30 Control = 2, 36 Control = 2,
37
38 /// Information related to the game manual
39 /// e.g. Legal information, etc.
31 Manual = 3, 40 Manual = 3,
41
42 /// System data.
32 Data = 4, 43 Data = 4,
33 Data_Unknown5 = 5, ///< Seems to be used on some system archives 44
45 /// Data that can be accessed by applications.
46 PublicData = 5,
34}; 47};
35 48
36enum class NCASectionCryptoType : u8 { 49enum class NCASectionCryptoType : u8 {
diff --git a/src/core/file_sys/errors.h b/src/core/file_sys/errors.h
index e4a4ee4ab..bb4654366 100644
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -11,6 +11,9 @@ namespace FileSys {
11constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1}; 11constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1};
12constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002}; 12constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002};
13constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001}; 13constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001};
14constexpr ResultCode ERROR_OUT_OF_BOUNDS{ErrorModule::FS, 3005};
15constexpr ResultCode ERROR_FAILED_MOUNT_ARCHIVE{ErrorModule::FS, 3223};
16constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::FS, 6001};
14constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061}; 17constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061};
15constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062}; 18constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062};
16 19
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 61706966e..e11217708 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -7,6 +7,7 @@
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9 9
10#include "common/file_util.h"
10#include "common/hex_util.h" 11#include "common/hex_util.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
12#include "core/file_sys/content_archive.h" 13#include "core/file_sys/content_archive.h"
@@ -19,6 +20,7 @@
19#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
20#include "core/hle/service/filesystem/filesystem.h" 21#include "core/hle/service/filesystem/filesystem.h"
21#include "core/loader/loader.h" 22#include "core/loader/loader.h"
23#include "core/loader/nso.h"
22#include "core/settings.h" 24#include "core/settings.h"
23 25
24namespace FileSys { 26namespace FileSys {
@@ -31,14 +33,6 @@ constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
31 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9", 33 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
32}; 34};
33 35
34struct NSOBuildHeader {
35 u32_le magic;
36 INSERT_PADDING_BYTES(0x3C);
37 std::array<u8, 0x20> build_id;
38 INSERT_PADDING_BYTES(0xA0);
39};
40static_assert(sizeof(NSOBuildHeader) == 0x100, "NSOBuildHeader has incorrect size.");
41
42std::string FormatTitleVersion(u32 version, TitleVersionFormat format) { 36std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
43 std::array<u8, sizeof(u32)> bytes{}; 37 std::array<u8, sizeof(u32)> bytes{};
44 bytes[0] = version % SINGLE_BYTE_MODULUS; 38 bytes[0] = version % SINGLE_BYTE_MODULUS;
@@ -162,14 +156,16 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
162} 156}
163 157
164std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const { 158std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
165 if (nso.size() < 0x100) 159 if (nso.size() < sizeof(Loader::NSOHeader)) {
166 return nso; 160 return nso;
161 }
167 162
168 NSOBuildHeader header; 163 Loader::NSOHeader header;
169 std::memcpy(&header, nso.data(), sizeof(NSOBuildHeader)); 164 std::memcpy(&header, nso.data(), sizeof(header));
170 165
171 if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) 166 if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
172 return nso; 167 return nso;
168 }
173 169
174 const auto build_id_raw = Common::HexArrayToString(header.build_id); 170 const auto build_id_raw = Common::HexArrayToString(header.build_id);
175 const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1); 171 const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);
@@ -212,9 +208,11 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
212 } 208 }
213 } 209 }
214 210
215 if (out.size() < 0x100) 211 if (out.size() < sizeof(Loader::NSOHeader)) {
216 return nso; 212 return nso;
217 std::memcpy(out.data(), &header, sizeof(NSOBuildHeader)); 213 }
214
215 std::memcpy(out.data(), &header, sizeof(header));
218 return out; 216 return out;
219} 217}
220 218
@@ -232,6 +230,57 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
232 return !CollectPatches(patch_dirs, build_id).empty(); 230 return !CollectPatches(patch_dirs, build_id).empty();
233} 231}
234 232
233static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
234 const std::array<u8, 0x20>& build_id_,
235 const VirtualDir& base_path, bool upper) {
236 const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
237 const auto build_id = build_id_raw.substr(0, sizeof(u64) * 2);
238 const auto file = base_path->GetFile(fmt::format("{}.txt", build_id));
239
240 if (file == nullptr) {
241 LOG_INFO(Common_Filesystem, "No cheats file found for title_id={:016X}, build_id={}",
242 title_id, build_id);
243 return std::nullopt;
244 }
245
246 std::vector<u8> data(file->GetSize());
247 if (file->Read(data.data(), data.size()) != data.size()) {
248 LOG_INFO(Common_Filesystem, "Failed to read cheats file for title_id={:016X}, build_id={}",
249 title_id, build_id);
250 return std::nullopt;
251 }
252
253 TextCheatParser parser;
254 return parser.Parse(system, data);
255}
256
257std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
258 const std::array<u8, 32>& build_id_) const {
259 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
260 auto patch_dirs = load_dir->GetSubdirectories();
261 std::sort(patch_dirs.begin(), patch_dirs.end(),
262 [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
263
264 std::vector<CheatList> out;
265 out.reserve(patch_dirs.size());
266 for (const auto& subdir : patch_dirs) {
267 auto cheats_dir = subdir->GetSubdirectory("cheats");
268 if (cheats_dir != nullptr) {
269 auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
270 if (res.has_value()) {
271 out.push_back(std::move(*res));
272 continue;
273 }
274
275 res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
276 if (res.has_value())
277 out.push_back(std::move(*res));
278 }
279 }
280
281 return out;
282}
283
235static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) { 284static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) {
236 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id); 285 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
237 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || 286 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
@@ -403,6 +452,8 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
403 } 452 }
404 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) 453 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
405 AppendCommaIfNotEmpty(types, "LayeredFS"); 454 AppendCommaIfNotEmpty(types, "LayeredFS");
455 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
456 AppendCommaIfNotEmpty(types, "Cheats");
406 457
407 if (types.empty()) 458 if (types.empty())
408 continue; 459 continue;
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index b8a1652fd..de2672c76 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -8,9 +8,14 @@
8#include <memory> 8#include <memory>
9#include <string> 9#include <string>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/file_sys/cheat_engine.h"
11#include "core/file_sys/nca_metadata.h" 12#include "core/file_sys/nca_metadata.h"
12#include "core/file_sys/vfs.h" 13#include "core/file_sys/vfs.h"
13 14
15namespace Core {
16class System;
17}
18
14namespace FileSys { 19namespace FileSys {
15 20
16class NCA; 21class NCA;
@@ -45,6 +50,10 @@ public:
45 // Used to prevent expensive copies in NSO loader. 50 // Used to prevent expensive copies in NSO loader.
46 bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const; 51 bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;
47 52
53 // Creates a CheatList object with all
54 std::vector<CheatList> CreateCheatList(const Core::System& system,
55 const std::array<u8, 0x20>& build_id) const;
56
48 // Currently tracked RomFS patches: 57 // Currently tracked RomFS patches:
49 // - Game Updates 58 // - Game Updates
50 // - LayeredFS 59 // - LayeredFS
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 128199063..1c6bacace 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -94,7 +94,7 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
94 case NCAContentType::Control: 94 case NCAContentType::Control:
95 return ContentRecordType::Control; 95 return ContentRecordType::Control;
96 case NCAContentType::Data: 96 case NCAContentType::Data:
97 case NCAContentType::Data_Unknown5: 97 case NCAContentType::PublicData:
98 return ContentRecordType::Data; 98 return ContentRecordType::Data;
99 case NCAContentType::Manual: 99 case NCAContentType::Manual:
100 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. 100 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index e3e79f40a..c9722ed77 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -6,6 +6,7 @@
6#include "core/file_sys/romfs.h" 6#include "core/file_sys/romfs.h"
7#include "core/file_sys/system_archive/ng_word.h" 7#include "core/file_sys/system_archive/ng_word.h"
8#include "core/file_sys/system_archive/system_archive.h" 8#include "core/file_sys/system_archive/system_archive.h"
9#include "core/file_sys/system_archive/system_version.h"
9 10
10namespace FileSys::SystemArchive { 11namespace FileSys::SystemArchive {
11 12
@@ -30,7 +31,7 @@ constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHI
30 {0x0100000000000806, "NgWord", &NgWord1}, 31 {0x0100000000000806, "NgWord", &NgWord1},
31 {0x0100000000000807, "SsidList", nullptr}, 32 {0x0100000000000807, "SsidList", nullptr},
32 {0x0100000000000808, "Dictionary", nullptr}, 33 {0x0100000000000808, "Dictionary", nullptr},
33 {0x0100000000000809, "SystemVersion", nullptr}, 34 {0x0100000000000809, "SystemVersion", &SystemVersion},
34 {0x010000000000080A, "AvatarImage", nullptr}, 35 {0x010000000000080A, "AvatarImage", nullptr},
35 {0x010000000000080B, "LocalNews", nullptr}, 36 {0x010000000000080B, "LocalNews", nullptr},
36 {0x010000000000080C, "Eula", nullptr}, 37 {0x010000000000080C, "Eula", nullptr},
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
new file mode 100644
index 000000000..6e22f97b0
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -0,0 +1,52 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/file_sys/system_archive/system_version.h"
6#include "core/file_sys/vfs_vector.h"
7
8namespace FileSys::SystemArchive {
9
10namespace SystemVersionData {
11
12// This section should reflect the best system version to describe yuzu's HLE api.
13// TODO(DarkLordZach): Update when HLE gets better.
14
15constexpr u8 VERSION_MAJOR = 5;
16constexpr u8 VERSION_MINOR = 1;
17constexpr u8 VERSION_MICRO = 0;
18
19constexpr u8 REVISION_MAJOR = 3;
20constexpr u8 REVISION_MINOR = 0;
21
22constexpr char PLATFORM_STRING[] = "NX";
23constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
24constexpr char DISPLAY_VERSION[] = "5.1.0";
25constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
26
27} // namespace SystemVersionData
28
29std::string GetLongDisplayVersion() {
30 return SystemVersionData::DISPLAY_TITLE;
31}
32
33VirtualDir SystemVersion() {
34 VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
35 file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
36 file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
37 file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
38 file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
39 file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
40 file->WriteArray(SystemVersionData::PLATFORM_STRING,
41 std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
42 file->WriteArray(SystemVersionData::VERSION_HASH,
43 std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
44 file->WriteArray(SystemVersionData::DISPLAY_VERSION,
45 std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
46 file->WriteArray(SystemVersionData::DISPLAY_TITLE,
47 std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
48 return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
49 std::vector<VirtualDir>{}, "data");
50}
51
52} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_version.h b/src/core/file_sys/system_archive/system_version.h
new file mode 100644
index 000000000..deed79b26
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.h
@@ -0,0 +1,16 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include "core/file_sys/vfs_types.h"
9
10namespace FileSys::SystemArchive {
11
12std::string GetLongDisplayVersion();
13
14VirtualDir SystemVersion();
15
16} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 515626658..75fc04302 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
47 if (offset + length > data.size()) 47 if (offset + length > data.size())
48 data.resize(offset + length); 48 data.resize(offset + length);
49 const auto write = std::min(length, data.size() - offset); 49 const auto write = std::min(length, data.size() - offset);
50 std::memcpy(data.data(), data_, write); 50 std::memcpy(data.data() + offset, data_, write);
51 return write; 51 return write;
52} 52}
53 53
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..1320bbe77 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -30,7 +30,7 @@ private:
30 explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {} 30 explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
31 std::tuple<float, float, bool> GetStatus() const override { 31 std::tuple<float, float, bool> GetStatus() const override {
32 if (auto state = touch_state.lock()) { 32 if (auto state = touch_state.lock()) {
33 std::lock_guard<std::mutex> guard(state->mutex); 33 std::lock_guard guard{state->mutex};
34 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed); 34 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
35 } 35 }
36 return std::make_tuple(0.0f, 0.0f, false); 36 return std::make_tuple(0.0f, 0.0f, false);
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); 67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
68} 68}
69 69
70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { 70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
71 new_x = std::max(new_x, framebuffer_layout.screen.left); 71 new_x = std::max(new_x, framebuffer_layout.screen.left);
72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1); 72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
73 73
@@ -81,7 +81,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
81 if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y)) 81 if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
82 return; 82 return;
83 83
84 std::lock_guard<std::mutex> guard(touch_state->mutex); 84 std::lock_guard guard{touch_state->mutex};
85 touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) / 85 touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
86 (framebuffer_layout.screen.right - framebuffer_layout.screen.left); 86 (framebuffer_layout.screen.right - framebuffer_layout.screen.left);
87 touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) / 87 touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
@@ -91,7 +91,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
91} 91}
92 92
93void EmuWindow::TouchReleased() { 93void EmuWindow::TouchReleased() {
94 std::lock_guard<std::mutex> guard(touch_state->mutex); 94 std::lock_guard guard{touch_state->mutex};
95 touch_state->touch_pressed = false; 95 touch_state->touch_pressed = false;
96 touch_state->touch_x = 0; 96 touch_state->touch_x = 0;
97 touch_state->touch_y = 0; 97 touch_state->touch_y = 0;
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 7006a37b3..d0bcb4660 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
166 /** 166 /**
167 * Clip the provided coordinates to be inside the touchscreen area. 167 * Clip the provided coordinates to be inside the touchscreen area.
168 */ 168 */
169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); 169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
170}; 170};
171 171
172} // namespace Core::Frontend 172} // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
12 12
13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio 13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
14template <class T> 14template <class T>
15static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, 15static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
16 float screen_aspect_ratio) { 16 float screen_aspect_ratio) {
17 float scale = std::min(static_cast<float>(window_area.GetWidth()), 17 float scale = std::min(static_cast<float>(window_area.GetWidth()),
18 window_area.GetHeight() / screen_aspect_ratio); 18 window_area.GetHeight() / screen_aspect_ratio);
19 return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), 19 return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
20 static_cast<T>(std::round(scale * screen_aspect_ratio))}; 20 static_cast<T>(std::round(scale * screen_aspect_ratio))};
21} 21}
22 22
23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { 23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
31 ScreenUndocked::Width}; 31 ScreenUndocked::Width};
32 MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; 32 Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
33 MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); 33 Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
34 34
35 float window_aspect_ratio = static_cast<float>(height) / width; 35 float window_aspect_ratio = static_cast<float>(height) / width;
36 36
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
16 unsigned width{ScreenUndocked::Width}; 16 unsigned width{ScreenUndocked::Width};
17 unsigned height{ScreenUndocked::Height}; 17 unsigned height{ScreenUndocked::Height};
18 18
19 MathUtil::Rectangle<unsigned> screen; 19 Common::Rectangle<unsigned> screen;
20 20
21 /** 21 /**
22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked 22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
124 * Orientation is determined by right-hand rule. 124 * Orientation is determined by right-hand rule.
125 * Units: deg/sec 125 * Units: deg/sec
126 */ 126 */
127using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; 127using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
128 128
129/** 129/**
130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are 130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..fae54bcc7 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
@@ -39,10 +39,10 @@ struct CommandHeader {
39 union { 39 union {
40 u32_le raw_low; 40 u32_le raw_low;
41 BitField<0, 16, CommandType> type; 41 BitField<0, 16, CommandType> type;
42 BitField<16, 4, u32_le> num_buf_x_descriptors; 42 BitField<16, 4, u32> num_buf_x_descriptors;
43 BitField<20, 4, u32_le> num_buf_a_descriptors; 43 BitField<20, 4, u32> num_buf_a_descriptors;
44 BitField<24, 4, u32_le> num_buf_b_descriptors; 44 BitField<24, 4, u32> num_buf_b_descriptors;
45 BitField<28, 4, u32_le> num_buf_w_descriptors; 45 BitField<28, 4, u32> num_buf_w_descriptors;
46 }; 46 };
47 47
48 enum class BufferDescriptorCFlag : u32 { 48 enum class BufferDescriptorCFlag : u32 {
@@ -53,28 +53,28 @@ struct CommandHeader {
53 53
54 union { 54 union {
55 u32_le raw_high; 55 u32_le raw_high;
56 BitField<0, 10, u32_le> data_size; 56 BitField<0, 10, u32> data_size;
57 BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags; 57 BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags;
58 BitField<31, 1, u32_le> enable_handle_descriptor; 58 BitField<31, 1, u32> enable_handle_descriptor;
59 }; 59 };
60}; 60};
61static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect"); 61static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect");
62 62
63union HandleDescriptorHeader { 63union HandleDescriptorHeader {
64 u32_le raw_high; 64 u32_le raw_high;
65 BitField<0, 1, u32_le> send_current_pid; 65 BitField<0, 1, u32> send_current_pid;
66 BitField<1, 4, u32_le> num_handles_to_copy; 66 BitField<1, 4, u32> num_handles_to_copy;
67 BitField<5, 4, u32_le> num_handles_to_move; 67 BitField<5, 4, u32> num_handles_to_move;
68}; 68};
69static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect"); 69static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect");
70 70
71struct BufferDescriptorX { 71struct BufferDescriptorX {
72 union { 72 union {
73 BitField<0, 6, u32_le> counter_bits_0_5; 73 BitField<0, 6, u32> counter_bits_0_5;
74 BitField<6, 3, u32_le> address_bits_36_38; 74 BitField<6, 3, u32> address_bits_36_38;
75 BitField<9, 3, u32_le> counter_bits_9_11; 75 BitField<9, 3, u32> counter_bits_9_11;
76 BitField<12, 4, u32_le> address_bits_32_35; 76 BitField<12, 4, u32> address_bits_32_35;
77 BitField<16, 16, u32_le> size; 77 BitField<16, 16, u32> size;
78 }; 78 };
79 79
80 u32_le address_bits_0_31; 80 u32_le address_bits_0_31;
@@ -103,10 +103,10 @@ struct BufferDescriptorABW {
103 u32_le address_bits_0_31; 103 u32_le address_bits_0_31;
104 104
105 union { 105 union {
106 BitField<0, 2, u32_le> flags; 106 BitField<0, 2, u32> flags;
107 BitField<2, 3, u32_le> address_bits_36_38; 107 BitField<2, 3, u32> address_bits_36_38;
108 BitField<24, 4, u32_le> size_bits_32_35; 108 BitField<24, 4, u32> size_bits_32_35;
109 BitField<28, 4, u32_le> address_bits_32_35; 109 BitField<28, 4, u32> address_bits_32_35;
110 }; 110 };
111 111
112 VAddr Address() const { 112 VAddr Address() const {
@@ -128,8 +128,8 @@ struct BufferDescriptorC {
128 u32_le address_bits_0_31; 128 u32_le address_bits_0_31;
129 129
130 union { 130 union {
131 BitField<0, 16, u32_le> address_bits_32_47; 131 BitField<0, 16, u32> address_bits_32_47;
132 BitField<16, 16, u32_le> size; 132 BitField<16, 16, u32> size;
133 }; 133 };
134 134
135 VAddr Address() const { 135 VAddr Address() const {
@@ -167,8 +167,8 @@ struct DomainMessageHeader {
167 struct { 167 struct {
168 union { 168 union {
169 BitField<0, 8, CommandType> command; 169 BitField<0, 8, CommandType> command;
170 BitField<8, 8, u32_le> input_object_count; 170 BitField<8, 8, u32> input_object_count;
171 BitField<16, 16, u32_le> size; 171 BitField<16, 16, u32> size;
172 }; 172 };
173 u32_le object_id; 173 u32_le object_id;
174 INSERT_PADDING_WORDS(2); 174 INSERT_PADDING_WORDS(2);
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 90f276ee8..68406eb63 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
19#include "core/hle/kernel/hle_ipc.h" 19#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/object.h" 20#include "core/hle/kernel/object.h"
21#include "core/hle/kernel/server_session.h" 21#include "core/hle/kernel/server_session.h"
22#include "core/hle/result.h"
22 23
23namespace IPC { 24namespace IPC {
24 25
26constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
27
25class RequestHelperBase { 28class RequestHelperBase {
26protected: 29protected:
27 Kernel::HLERequestContext* context = nullptr; 30 Kernel::HLERequestContext* context = nullptr;
@@ -272,6 +275,20 @@ inline void ResponseBuilder::Push(u64 value) {
272} 275}
273 276
274template <> 277template <>
278inline void ResponseBuilder::Push(float value) {
279 u32 integral;
280 std::memcpy(&integral, &value, sizeof(u32));
281 Push(integral);
282}
283
284template <>
285inline void ResponseBuilder::Push(double value) {
286 u64 integral;
287 std::memcpy(&integral, &value, sizeof(u64));
288 Push(integral);
289}
290
291template <>
275inline void ResponseBuilder::Push(bool value) { 292inline void ResponseBuilder::Push(bool value) {
276 Push(static_cast<u8>(value)); 293 Push(static_cast<u8>(value));
277} 294}
@@ -350,7 +367,7 @@ public:
350 template <class T> 367 template <class T>
351 std::shared_ptr<T> PopIpcInterface() { 368 std::shared_ptr<T> PopIpcInterface() {
352 ASSERT(context->Session()->IsDomain()); 369 ASSERT(context->Session()->IsDomain());
353 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 370 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 371 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
355 } 372 }
356}; 373};
@@ -362,6 +379,11 @@ inline u32 RequestParser::Pop() {
362 return cmdbuf[index++]; 379 return cmdbuf[index++];
363} 380}
364 381
382template <>
383inline s32 RequestParser::Pop() {
384 return static_cast<s32>(Pop<u32>());
385}
386
365template <typename T> 387template <typename T>
366void RequestParser::PopRaw(T& value) { 388void RequestParser::PopRaw(T& value) {
367 std::memcpy(&value, cmdbuf + index, sizeof(T)); 389 std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -393,11 +415,37 @@ inline u64 RequestParser::Pop() {
393} 415}
394 416
395template <> 417template <>
418inline s8 RequestParser::Pop() {
419 return static_cast<s8>(Pop<u8>());
420}
421
422template <>
423inline s16 RequestParser::Pop() {
424 return static_cast<s16>(Pop<u16>());
425}
426
427template <>
396inline s64 RequestParser::Pop() { 428inline s64 RequestParser::Pop() {
397 return static_cast<s64>(Pop<u64>()); 429 return static_cast<s64>(Pop<u64>());
398} 430}
399 431
400template <> 432template <>
433inline float RequestParser::Pop() {
434 const u32 value = Pop<u32>();
435 float real;
436 std::memcpy(&real, &value, sizeof(real));
437 return real;
438}
439
440template <>
441inline double RequestParser::Pop() {
442 const u64 value = Pop<u64>();
443 float real;
444 std::memcpy(&real, &value, sizeof(real));
445 return real;
446}
447
448template <>
401inline bool RequestParser::Pop() { 449inline bool RequestParser::Pop() {
402 return Pop<u8>() != 0; 450 return Pop<u8>() != 0;
403} 451}
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 57157beb4..c8842410b 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_cpu.h" 11#include "core/core_cpu.h"
12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -18,58 +19,15 @@
18#include "core/memory.h" 19#include "core/memory.h"
19 20
20namespace Kernel { 21namespace Kernel {
21namespace AddressArbiter { 22namespace {
22
23// Performs actual address waiting logic.
24static ResultCode WaitForAddress(VAddr address, s64 timeout) {
25 SharedPtr<Thread> current_thread = GetCurrentThread();
26 current_thread->SetArbiterWaitAddress(address);
27 current_thread->SetStatus(ThreadStatus::WaitArb);
28 current_thread->InvalidateWakeupCallback();
29
30 current_thread->WakeAfterDelay(timeout);
31
32 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
33 return RESULT_TIMEOUT;
34}
35
36// Gets the threads waiting on an address.
37static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
38 const auto RetrieveWaitingThreads = [](std::size_t core_index,
39 std::vector<SharedPtr<Thread>>& waiting_threads,
40 VAddr arb_addr) {
41 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
42 const auto& thread_list = scheduler.GetThreadList();
43
44 for (const auto& thread : thread_list) {
45 if (thread->GetArbiterWaitAddress() == arb_addr)
46 waiting_threads.push_back(thread);
47 }
48 };
49
50 // Retrieve all threads that are waiting for this address.
51 std::vector<SharedPtr<Thread>> threads;
52 RetrieveWaitingThreads(0, threads, address);
53 RetrieveWaitingThreads(1, threads, address);
54 RetrieveWaitingThreads(2, threads, address);
55 RetrieveWaitingThreads(3, threads, address);
56
57 // Sort them by priority, such that the highest priority ones come first.
58 std::sort(threads.begin(), threads.end(),
59 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
60 return lhs->GetPriority() < rhs->GetPriority();
61 });
62
63 return threads;
64}
65
66// Wake up num_to_wake (or all) threads in a vector. 23// Wake up num_to_wake (or all) threads in a vector.
67static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { 24void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
68 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 25 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
69 // them all. 26 // them all.
70 std::size_t last = waiting_threads.size(); 27 std::size_t last = waiting_threads.size();
71 if (num_to_wake > 0) 28 if (num_to_wake > 0) {
72 last = num_to_wake; 29 last = std::min(last, static_cast<std::size_t>(num_to_wake));
30 }
73 31
74 // Signal the waiting threads. 32 // Signal the waiting threads.
75 for (std::size_t i = 0; i < last; i++) { 33 for (std::size_t i = 0; i < last; i++) {
@@ -79,88 +37,114 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
79 waiting_threads[i]->ResumeFromWait(); 37 waiting_threads[i]->ResumeFromWait();
80 } 38 }
81} 39}
40} // Anonymous namespace
41
42AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
43AddressArbiter::~AddressArbiter() = default;
44
45ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
46 s32 num_to_wake) {
47 switch (type) {
48 case SignalType::Signal:
49 return SignalToAddressOnly(address, num_to_wake);
50 case SignalType::IncrementAndSignalIfEqual:
51 return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
52 case SignalType::ModifyByWaitingCountAndSignalIfEqual:
53 return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
54 default:
55 return ERR_INVALID_ENUM_VALUE;
56 }
57}
82 58
83// Signals an address being waited on. 59ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
84ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { 60 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
85 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
86
87 WakeThreads(waiting_threads, num_to_wake); 61 WakeThreads(waiting_threads, num_to_wake);
88 return RESULT_SUCCESS; 62 return RESULT_SUCCESS;
89} 63}
90 64
91// Signals an address being waited on and increments its value if equal to the value argument. 65ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
92ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { 66 s32 num_to_wake) {
93 // Ensure that we can write to the address. 67 // Ensure that we can write to the address.
94 if (!Memory::IsValidVirtualAddress(address)) { 68 if (!Memory::IsValidVirtualAddress(address)) {
95 return ERR_INVALID_ADDRESS_STATE; 69 return ERR_INVALID_ADDRESS_STATE;
96 } 70 }
97 71
98 if (static_cast<s32>(Memory::Read32(address)) == value) { 72 if (static_cast<s32>(Memory::Read32(address)) != value) {
99 Memory::Write32(address, static_cast<u32>(value + 1));
100 } else {
101 return ERR_INVALID_STATE; 73 return ERR_INVALID_STATE;
102 } 74 }
103 75
104 return SignalToAddress(address, num_to_wake); 76 Memory::Write32(address, static_cast<u32>(value + 1));
77 return SignalToAddressOnly(address, num_to_wake);
105} 78}
106 79
107// Signals an address being waited on and modifies its value based on waiting thread count if equal 80ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
108// to the value argument. 81 s32 num_to_wake) {
109ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
110 s32 num_to_wake) {
111 // Ensure that we can write to the address. 82 // Ensure that we can write to the address.
112 if (!Memory::IsValidVirtualAddress(address)) { 83 if (!Memory::IsValidVirtualAddress(address)) {
113 return ERR_INVALID_ADDRESS_STATE; 84 return ERR_INVALID_ADDRESS_STATE;
114 } 85 }
115 86
116 // Get threads waiting on the address. 87 // Get threads waiting on the address.
117 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); 88 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
118 89
119 // Determine the modified value depending on the waiting count. 90 // Determine the modified value depending on the waiting count.
120 s32 updated_value; 91 s32 updated_value;
121 if (waiting_threads.empty()) { 92 if (waiting_threads.empty()) {
122 updated_value = value - 1;
123 } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
124 updated_value = value + 1; 93 updated_value = value + 1;
94 } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
95 updated_value = value - 1;
125 } else { 96 } else {
126 updated_value = value; 97 updated_value = value;
127 } 98 }
128 99
129 if (static_cast<s32>(Memory::Read32(address)) == value) { 100 if (static_cast<s32>(Memory::Read32(address)) != value) {
130 Memory::Write32(address, static_cast<u32>(updated_value));
131 } else {
132 return ERR_INVALID_STATE; 101 return ERR_INVALID_STATE;
133 } 102 }
134 103
104 Memory::Write32(address, static_cast<u32>(updated_value));
135 WakeThreads(waiting_threads, num_to_wake); 105 WakeThreads(waiting_threads, num_to_wake);
136 return RESULT_SUCCESS; 106 return RESULT_SUCCESS;
137} 107}
138 108
139// Waits on an address if the value passed is less than the argument value, optionally decrementing. 109ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
140ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { 110 s64 timeout_ns) {
111 switch (type) {
112 case ArbitrationType::WaitIfLessThan:
113 return WaitForAddressIfLessThan(address, value, timeout_ns, false);
114 case ArbitrationType::DecrementAndWaitIfLessThan:
115 return WaitForAddressIfLessThan(address, value, timeout_ns, true);
116 case ArbitrationType::WaitIfEqual:
117 return WaitForAddressIfEqual(address, value, timeout_ns);
118 default:
119 return ERR_INVALID_ENUM_VALUE;
120 }
121}
122
123ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
124 bool should_decrement) {
141 // Ensure that we can read the address. 125 // Ensure that we can read the address.
142 if (!Memory::IsValidVirtualAddress(address)) { 126 if (!Memory::IsValidVirtualAddress(address)) {
143 return ERR_INVALID_ADDRESS_STATE; 127 return ERR_INVALID_ADDRESS_STATE;
144 } 128 }
145 129
146 s32 cur_value = static_cast<s32>(Memory::Read32(address)); 130 const s32 cur_value = static_cast<s32>(Memory::Read32(address));
147 if (cur_value < value) { 131 if (cur_value >= value) {
148 if (should_decrement) {
149 Memory::Write32(address, static_cast<u32>(cur_value - 1));
150 }
151 } else {
152 return ERR_INVALID_STATE; 132 return ERR_INVALID_STATE;
153 } 133 }
134
135 if (should_decrement) {
136 Memory::Write32(address, static_cast<u32>(cur_value - 1));
137 }
138
154 // Short-circuit without rescheduling, if timeout is zero. 139 // Short-circuit without rescheduling, if timeout is zero.
155 if (timeout == 0) { 140 if (timeout == 0) {
156 return RESULT_TIMEOUT; 141 return RESULT_TIMEOUT;
157 } 142 }
158 143
159 return WaitForAddress(address, timeout); 144 return WaitForAddressImpl(address, timeout);
160} 145}
161 146
162// Waits on an address if the value passed is equal to the argument value. 147ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
164 // Ensure that we can read the address. 148 // Ensure that we can read the address.
165 if (!Memory::IsValidVirtualAddress(address)) { 149 if (!Memory::IsValidVirtualAddress(address)) {
166 return ERR_INVALID_ADDRESS_STATE; 150 return ERR_INVALID_ADDRESS_STATE;
@@ -174,7 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
174 return RESULT_TIMEOUT; 158 return RESULT_TIMEOUT;
175 } 159 }
176 160
177 return WaitForAddress(address, timeout); 161 return WaitForAddressImpl(address, timeout);
162}
163
164ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
165 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
166 current_thread->SetArbiterWaitAddress(address);
167 current_thread->SetStatus(ThreadStatus::WaitArb);
168 current_thread->InvalidateWakeupCallback();
169
170 current_thread->WakeAfterDelay(timeout);
171
172 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
173 return RESULT_TIMEOUT;
174}
175
176std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
177 const auto RetrieveWaitingThreads = [this](std::size_t core_index,
178 std::vector<SharedPtr<Thread>>& waiting_threads,
179 VAddr arb_addr) {
180 const auto& scheduler = system.Scheduler(core_index);
181 const auto& thread_list = scheduler.GetThreadList();
182
183 for (const auto& thread : thread_list) {
184 if (thread->GetArbiterWaitAddress() == arb_addr) {
185 waiting_threads.push_back(thread);
186 }
187 }
188 };
189
190 // Retrieve all threads that are waiting for this address.
191 std::vector<SharedPtr<Thread>> threads;
192 RetrieveWaitingThreads(0, threads, address);
193 RetrieveWaitingThreads(1, threads, address);
194 RetrieveWaitingThreads(2, threads, address);
195 RetrieveWaitingThreads(3, threads, address);
196
197 // Sort them by priority, such that the highest priority ones come first.
198 std::sort(threads.begin(), threads.end(),
199 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
200 return lhs->GetPriority() < rhs->GetPriority();
201 });
202
203 return threads;
178} 204}
179} // namespace AddressArbiter
180} // namespace Kernel 205} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index e3657b8e9..ed0d0e69f 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,31 +4,77 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/object.h"
8 11
9union ResultCode; 12union ResultCode;
10 13
14namespace Core {
15class System;
16}
17
11namespace Kernel { 18namespace Kernel {
12 19
13namespace AddressArbiter { 20class Thread;
14enum class ArbitrationType {
15 WaitIfLessThan = 0,
16 DecrementAndWaitIfLessThan = 1,
17 WaitIfEqual = 2,
18};
19 21
20enum class SignalType { 22class AddressArbiter {
21 Signal = 0, 23public:
22 IncrementAndSignalIfEqual = 1, 24 enum class ArbitrationType {
23 ModifyByWaitingCountAndSignalIfEqual = 2, 25 WaitIfLessThan = 0,
24}; 26 DecrementAndWaitIfLessThan = 1,
27 WaitIfEqual = 2,
28 };
29
30 enum class SignalType {
31 Signal = 0,
32 IncrementAndSignalIfEqual = 1,
33 ModifyByWaitingCountAndSignalIfEqual = 2,
34 };
35
36 explicit AddressArbiter(Core::System& system);
37 ~AddressArbiter();
38
39 AddressArbiter(const AddressArbiter&) = delete;
40 AddressArbiter& operator=(const AddressArbiter&) = delete;
41
42 AddressArbiter(AddressArbiter&&) = default;
43 AddressArbiter& operator=(AddressArbiter&&) = delete;
44
45 /// Signals an address being waited on with a particular signaling type.
46 ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
25 47
26ResultCode SignalToAddress(VAddr address, s32 num_to_wake); 48 /// Waits on an address with a particular arbitration type.
27ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); 49 ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
28ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
29 50
30ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); 51private:
31ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 52 /// Signals an address being waited on.
32} // namespace AddressArbiter 53 ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
54
55 /// Signals an address being waited on and increments its value if equal to the value argument.
56 ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
57
58 /// Signals an address being waited on and modifies its value based on waiting thread count if
59 /// equal to the value argument.
60 ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
61 s32 num_to_wake);
62
63 /// Waits on an address if the value passed is less than the argument value,
64 /// optionally decrementing.
65 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
66 bool should_decrement);
67
68 /// Waits on an address if the value passed is equal to the argument value.
69 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
70
71 // Waits on the given address with a timeout in nanoseconds
72 ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
73
74 // Gets the threads waiting on an address.
75 std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
76
77 Core::System& system;
78};
33 79
34} // namespace Kernel 80} // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index d4c91d529..aa432658e 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,10 +33,11 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler. 33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); 34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
35 35
36 if (server_port->hle_handler) 36 if (server_port->HasHLEHandler()) {
37 server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); 37 server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
38 else 38 } else {
39 server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions)); 39 server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
40 }
40 41
41 // Wake the threads waiting on the ServerPort 42 // Wake the threads waiting on the ServerPort
42 server_port->WakeupAllWaitingThreads(); 43 server_port->WakeupAllWaitingThreads();
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..b1f39aad7 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.cpp b/src/core/hle/kernel/code_set.cpp
new file mode 100644
index 000000000..1f434e9af
--- /dev/null
+++ b/src/core/hle/kernel/code_set.cpp
@@ -0,0 +1,12 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/kernel/code_set.h"
6
7namespace Kernel {
8
9CodeSet::CodeSet() = default;
10CodeSet::~CodeSet() = default;
11
12} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
new file mode 100644
index 000000000..879957dcb
--- /dev/null
+++ b/src/core/hle/kernel/code_set.h
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace Kernel {
13
14/**
15 * Represents executable data that may be loaded into a kernel process.
16 *
17 * A code set consists of three basic segments:
18 * - A code (AKA text) segment,
19 * - A read-only data segment (rodata)
20 * - A data segment
21 *
22 * The code segment is the portion of the object file that contains
23 * executable instructions.
24 *
25 * The read-only data segment in the portion of the object file that
26 * contains (as one would expect) read-only data, such as fixed constant
27 * values and data structures.
28 *
29 * The data segment is similar to the read-only data segment -- it contains
30 * variables and data structures that have predefined values, however,
31 * entities within this segment can be modified.
32 */
33struct CodeSet final {
34 /// A single segment within a code set.
35 struct Segment final {
36 /// The byte offset that this segment is located at.
37 std::size_t offset = 0;
38
39 /// The address to map this segment to.
40 VAddr addr = 0;
41
42 /// The size of this segment in bytes.
43 u32 size = 0;
44 };
45
46 explicit CodeSet();
47 ~CodeSet();
48
49 CodeSet(const CodeSet&) = delete;
50 CodeSet& operator=(const CodeSet&) = delete;
51
52 CodeSet(CodeSet&&) = default;
53 CodeSet& operator=(CodeSet&&) = default;
54
55 Segment& CodeSegment() {
56 return segments[0];
57 }
58
59 const Segment& CodeSegment() const {
60 return segments[0];
61 }
62
63 Segment& RODataSegment() {
64 return segments[1];
65 }
66
67 const Segment& RODataSegment() const {
68 return segments[1];
69 }
70
71 Segment& DataSegment() {
72 return segments[2];
73 }
74
75 const Segment& DataSegment() const {
76 return segments[2];
77 }
78
79 /// The overall data that backs this code set.
80 std::vector<u8> memory;
81
82 /// The segments that comprise this code set.
83 std::array<Segment, 3> segments;
84
85 /// The entry point address for this code set.
86 VAddr entrypoint = 0;
87};
88
89} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
17constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; 18constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
18constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; 19constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
19constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; 20constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
16constexpr u16 GetSlot(Handle handle) { 16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15; 17 return static_cast<u16>(handle >> 15);
18} 18}
19 19
20constexpr u16 GetGeneration(Handle handle) { 20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF; 21 return static_cast<u16>(handle & 0x7FFF);
22} 22}
23} // Anonymous namespace 23} // Anonymous namespace
24 24
25HandleTable::HandleTable() { 25HandleTable::HandleTable() {
26 next_generation = 1;
27 Clear(); 26 Clear();
28} 27}
29 28
30HandleTable::~HandleTable() = default; 29HandleTable::~HandleTable() = default;
31 30
31ResultCode HandleTable::SetSize(s32 handle_table_size) {
32 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
33 return ERR_OUT_OF_MEMORY;
34 }
35
36 // Values less than or equal to zero indicate to use the maximum allowable
37 // size for the handle table in the actual kernel, so we ignore the given
38 // value in that case, since we assume this by default unless this function
39 // is called.
40 if (handle_table_size > 0) {
41 table_size = static_cast<u16>(handle_table_size);
42 }
43
44 return RESULT_SUCCESS;
45}
46
32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 47ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
33 DEBUG_ASSERT(obj != nullptr); 48 DEBUG_ASSERT(obj != nullptr);
34 49
35 u16 slot = next_free_slot; 50 const u16 slot = next_free_slot;
36 if (slot >= generations.size()) { 51 if (slot >= table_size) {
37 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 52 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
38 return ERR_HANDLE_TABLE_FULL; 53 return ERR_HANDLE_TABLE_FULL;
39 } 54 }
40 next_free_slot = generations[slot]; 55 next_free_slot = generations[slot];
41 56
42 u16 generation = next_generation++; 57 const u16 generation = next_generation++;
43 58
44 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. 59 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
45 // Horizon OS uses zero to represent an invalid handle, so skip to 1. 60 // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
64} 79}
65 80
66ResultCode HandleTable::Close(Handle handle) { 81ResultCode HandleTable::Close(Handle handle) {
67 if (!IsValid(handle)) 82 if (!IsValid(handle)) {
68 return ERR_INVALID_HANDLE; 83 return ERR_INVALID_HANDLE;
84 }
69 85
70 u16 slot = GetSlot(handle); 86 const u16 slot = GetSlot(handle);
71 87
72 objects[slot] = nullptr; 88 objects[slot] = nullptr;
73 89
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
77} 93}
78 94
79bool HandleTable::IsValid(Handle handle) const { 95bool HandleTable::IsValid(Handle handle) const {
80 std::size_t slot = GetSlot(handle); 96 const std::size_t slot = GetSlot(handle);
81 u16 generation = GetGeneration(handle); 97 const u16 generation = GetGeneration(handle);
82 98
83 return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; 99 return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
84} 100}
85 101
86SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { 102SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
97} 113}
98 114
99void HandleTable::Clear() { 115void HandleTable::Clear() {
100 for (u16 i = 0; i < MAX_COUNT; ++i) { 116 for (u16 i = 0; i < table_size; ++i) {
101 generations[i] = i + 1; 117 generations[i] = i + 1;
102 objects[i] = nullptr; 118 objects[i] = nullptr;
103 } 119 }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
50 ~HandleTable(); 50 ~HandleTable();
51 51
52 /** 52 /**
53 * Sets the number of handles that may be in use at one time
54 * for this handle table.
55 *
56 * @param handle_table_size The desired size to limit the handle table to.
57 *
58 * @returns an error code indicating if initialization was successful.
59 * If initialization was not successful, then ERR_OUT_OF_MEMORY
60 * will be returned.
61 *
62 * @pre handle_table_size must be within the range [0, 1024]
63 */
64 ResultCode SetSize(s32 handle_table_size);
65
66 /**
53 * Allocates a handle for the given object. 67 * Allocates a handle for the given object.
54 * @return The created Handle or one of the following errors: 68 * @return The created Handle or one of the following errors:
55 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. 69 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
104 std::array<u16, MAX_COUNT> generations; 118 std::array<u16, MAX_COUNT> generations;
105 119
106 /** 120 /**
121 * The limited size of the handle table. This can be specified by process
122 * capabilities in order to restrict the overall number of handles that
123 * can be created in a process instance
124 */
125 u16 table_size = static_cast<u16>(MAX_COUNT);
126
127 /**
107 * Global counter of the number of created handles. Stored in `generations` when a handle is 128 * Global counter of the number of created handles. Stored in `generations` when a handle is
108 * created, and wraps around to 1 when it hits 0x8000. 129 * created, and wraps around to 1 when it hits 0x8000.
109 */ 130 */
110 u16 next_generation; 131 u16 next_generation = 1;
111 132
112 /// Head of the free slots linked list. 133 /// Head of the free slots linked list.
113 u16 next_free_slot; 134 u16 next_free_slot = 0;
114}; 135};
115 136
116} // namespace Kernel 137} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7a524ce5a..3f14bfa86 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@
12 12
13#include "core/core.h" 13#include "core/core.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/client_port.h" 16#include "core/hle/kernel/client_port.h"
16#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
@@ -28,12 +29,12 @@ namespace Kernel {
28 * @param thread_handle The handle of the thread that's been awoken 29 * @param thread_handle The handle of the thread that's been awoken
29 * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time 30 * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
30 */ 31 */
31static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) { 32static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
32 const auto proper_handle = static_cast<Handle>(thread_handle); 33 const auto proper_handle = static_cast<Handle>(thread_handle);
33 const auto& system = Core::System::GetInstance(); 34 const auto& system = Core::System::GetInstance();
34 35
35 // Lock the global kernel mutex when we enter the kernel HLE. 36 // Lock the global kernel mutex when we enter the kernel HLE.
36 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 37 std::lock_guard lock{HLE::g_hle_lock};
37 38
38 SharedPtr<Thread> thread = 39 SharedPtr<Thread> thread =
39 system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle); 40 system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
@@ -61,7 +62,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
61 62
62 if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 || 63 if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
63 thread->GetWaitHandle() != 0) { 64 thread->GetWaitHandle() != 0) {
64 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 65 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
66 thread->GetStatus() == ThreadStatus::WaitCondVar);
65 thread->SetMutexWaitAddress(0); 67 thread->SetMutexWaitAddress(0);
66 thread->SetCondVarWaitAddress(0); 68 thread->SetCondVarWaitAddress(0);
67 thread->SetWaitHandle(0); 69 thread->SetWaitHandle(0);
@@ -86,6 +88,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
86} 88}
87 89
88struct KernelCore::Impl { 90struct KernelCore::Impl {
91 explicit Impl(Core::System& system) : system{system} {}
92
89 void Initialize(KernelCore& kernel) { 93 void Initialize(KernelCore& kernel) {
90 Shutdown(); 94 Shutdown();
91 95
@@ -111,7 +115,7 @@ struct KernelCore::Impl {
111 115
112 // Creates the default system resource limit 116 // Creates the default system resource limit
113 void InitializeSystemResourceLimit(KernelCore& kernel) { 117 void InitializeSystemResourceLimit(KernelCore& kernel) {
114 system_resource_limit = ResourceLimit::Create(kernel, "System"); 118 system_resource_limit = ResourceLimit::Create(kernel);
115 119
116 // If setting the default system values fails, then something seriously wrong has occurred. 120 // If setting the default system values fails, then something seriously wrong has occurred.
117 ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000) 121 ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000)
@@ -124,7 +128,7 @@ struct KernelCore::Impl {
124 128
125 void InitializeThreads() { 129 void InitializeThreads() {
126 thread_wakeup_event_type = 130 thread_wakeup_event_type =
127 CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); 131 system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
128 } 132 }
129 133
130 std::atomic<u32> next_object_id{0}; 134 std::atomic<u32> next_object_id{0};
@@ -137,7 +141,7 @@ struct KernelCore::Impl {
137 141
138 SharedPtr<ResourceLimit> system_resource_limit; 142 SharedPtr<ResourceLimit> system_resource_limit;
139 143
140 CoreTiming::EventType* thread_wakeup_event_type = nullptr; 144 Core::Timing::EventType* thread_wakeup_event_type = nullptr;
141 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, 145 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
142 // allowing us to simply use a pool index or similar. 146 // allowing us to simply use a pool index or similar.
143 Kernel::HandleTable thread_wakeup_callback_handle_table; 147 Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -145,9 +149,12 @@ struct KernelCore::Impl {
145 /// Map of named ports managed by the kernel, which can be retrieved using 149 /// Map of named ports managed by the kernel, which can be retrieved using
146 /// the ConnectToPort SVC. 150 /// the ConnectToPort SVC.
147 NamedPortTable named_ports; 151 NamedPortTable named_ports;
152
153 // System context
154 Core::System& system;
148}; 155};
149 156
150KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} 157KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
151KernelCore::~KernelCore() { 158KernelCore::~KernelCore() {
152 Shutdown(); 159 Shutdown();
153} 160}
@@ -184,6 +191,10 @@ const Process* KernelCore::CurrentProcess() const {
184 return impl->current_process; 191 return impl->current_process;
185} 192}
186 193
194const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
195 return impl->process_list;
196}
197
187void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) { 198void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
188 impl->named_ports.emplace(std::move(name), std::move(port)); 199 impl->named_ports.emplace(std::move(name), std::move(port));
189} 200}
@@ -213,7 +224,7 @@ u64 KernelCore::CreateNewProcessID() {
213 return impl->next_process_id++; 224 return impl->next_process_id++;
214} 225}
215 226
216CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const { 227Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
217 return impl->thread_wakeup_event_type; 228 return impl->thread_wakeup_event_type;
218} 229}
219 230
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index c643a6401..6b8738599 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -8,15 +8,18 @@
8#include <unordered_map> 8#include <unordered_map>
9#include "core/hle/kernel/object.h" 9#include "core/hle/kernel/object.h"
10 10
11template <typename T> 11namespace Core {
12class ResultVal; 12class System;
13}
13 14
14namespace CoreTiming { 15namespace Core::Timing {
16class CoreTiming;
15struct EventType; 17struct EventType;
16} 18} // namespace Core::Timing
17 19
18namespace Kernel { 20namespace Kernel {
19 21
22class AddressArbiter;
20class ClientPort; 23class ClientPort;
21class HandleTable; 24class HandleTable;
22class Process; 25class Process;
@@ -29,7 +32,14 @@ private:
29 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; 32 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
30 33
31public: 34public:
32 KernelCore(); 35 /// Constructs an instance of the kernel using the given System
36 /// instance as a context for any necessary system-related state,
37 /// such as threads, CPU core state, etc.
38 ///
39 /// @post After execution of the constructor, the provided System
40 /// object *must* outlive the kernel instance itself.
41 ///
42 explicit KernelCore(Core::System& system);
33 ~KernelCore(); 43 ~KernelCore();
34 44
35 KernelCore(const KernelCore&) = delete; 45 KernelCore(const KernelCore&) = delete;
@@ -62,6 +72,9 @@ public:
62 /// Retrieves a const pointer to the current process. 72 /// Retrieves a const pointer to the current process.
63 const Process* CurrentProcess() const; 73 const Process* CurrentProcess() const;
64 74
75 /// Retrieves the list of processes.
76 const std::vector<SharedPtr<Process>>& GetProcessList() const;
77
65 /// Adds a port to the named port table 78 /// Adds a port to the named port table
66 void AddNamedPort(std::string name, SharedPtr<ClientPort> port); 79 void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
67 80
@@ -89,7 +102,7 @@ private:
89 u64 CreateNewThreadID(); 102 u64 CreateNewThreadID();
90 103
91 /// Retrieves the event type used for thread wakeup callbacks. 104 /// Retrieves the event type used for thread wakeup callbacks.
92 CoreTiming::EventType* ThreadWakeupCallbackEventType() const; 105 Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
93 106
94 /// Provides a reference to the thread wakeup callback handle table. 107 /// Provides a reference to the thread wakeup callback handle table.
95 Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); 108 Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 0743670ad..98e87313b 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <map>
6#include <utility> 5#include <utility>
7#include <vector> 6#include <vector>
8 7
@@ -10,8 +9,11 @@
10#include "core/core.h" 9#include "core/core.h"
11#include "core/hle/kernel/errors.h" 10#include "core/hle/kernel/errors.h"
12#include "core/hle/kernel/handle_table.h" 11#include "core/hle/kernel/handle_table.h"
12#include "core/hle/kernel/kernel.h"
13#include "core/hle/kernel/mutex.h" 13#include "core/hle/kernel/mutex.h"
14#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
15#include "core/hle/kernel/process.h"
16#include "core/hle/kernel/scheduler.h"
15#include "core/hle/kernel/thread.h" 17#include "core/hle/kernel/thread.h"
16#include "core/hle/result.h" 18#include "core/hle/result.h"
17#include "core/memory.h" 19#include "core/memory.h"
@@ -57,41 +59,47 @@ static void TransferMutexOwnership(VAddr mutex_addr, SharedPtr<Thread> current_t
57 } 59 }
58} 60}
59 61
60ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle holding_thread_handle, 62Mutex::Mutex(Core::System& system) : system{system} {}
63Mutex::~Mutex() = default;
64
65ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
61 Handle requesting_thread_handle) { 66 Handle requesting_thread_handle) {
62 // The mutex address must be 4-byte aligned 67 // The mutex address must be 4-byte aligned
63 if ((address % sizeof(u32)) != 0) { 68 if ((address % sizeof(u32)) != 0) {
64 return ERR_INVALID_ADDRESS; 69 return ERR_INVALID_ADDRESS;
65 } 70 }
66 71
72 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
73 Thread* const current_thread = system.CurrentScheduler().GetCurrentThread();
67 SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); 74 SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
68 SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle); 75 SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
69 76
70 // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another 77 // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
71 // thread. 78 // thread.
72 ASSERT(requesting_thread == GetCurrentThread()); 79 ASSERT(requesting_thread == current_thread);
73 80
74 u32 addr_value = Memory::Read32(address); 81 const u32 addr_value = Memory::Read32(address);
75 82
76 // If the mutex isn't being held, just return success. 83 // If the mutex isn't being held, just return success.
77 if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { 84 if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
78 return RESULT_SUCCESS; 85 return RESULT_SUCCESS;
79 } 86 }
80 87
81 if (holding_thread == nullptr) 88 if (holding_thread == nullptr) {
82 return ERR_INVALID_HANDLE; 89 return ERR_INVALID_HANDLE;
90 }
83 91
84 // Wait until the mutex is released 92 // Wait until the mutex is released
85 GetCurrentThread()->SetMutexWaitAddress(address); 93 current_thread->SetMutexWaitAddress(address);
86 GetCurrentThread()->SetWaitHandle(requesting_thread_handle); 94 current_thread->SetWaitHandle(requesting_thread_handle);
87 95
88 GetCurrentThread()->SetStatus(ThreadStatus::WaitMutex); 96 current_thread->SetStatus(ThreadStatus::WaitMutex);
89 GetCurrentThread()->InvalidateWakeupCallback(); 97 current_thread->InvalidateWakeupCallback();
90 98
91 // Update the lock holder thread's priority to prevent priority inversion. 99 // Update the lock holder thread's priority to prevent priority inversion.
92 holding_thread->AddMutexWaiter(GetCurrentThread()); 100 holding_thread->AddMutexWaiter(current_thread);
93 101
94 Core::System::GetInstance().PrepareReschedule(); 102 system.PrepareReschedule();
95 103
96 return RESULT_SUCCESS; 104 return RESULT_SUCCESS;
97} 105}
@@ -102,7 +110,8 @@ ResultCode Mutex::Release(VAddr address) {
102 return ERR_INVALID_ADDRESS; 110 return ERR_INVALID_ADDRESS;
103 } 111 }
104 112
105 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address); 113 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
114 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
106 115
107 // There are no more threads waiting for the mutex, release it completely. 116 // There are no more threads waiting for the mutex, release it completely.
108 if (thread == nullptr) { 117 if (thread == nullptr) {
@@ -111,7 +120,7 @@ ResultCode Mutex::Release(VAddr address) {
111 } 120 }
112 121
113 // Transfer the ownership of the mutex from the previous owner to the new one. 122 // Transfer the ownership of the mutex from the previous owner to the new one.
114 TransferMutexOwnership(address, GetCurrentThread(), thread); 123 TransferMutexOwnership(address, current_thread, thread);
115 124
116 u32 mutex_value = thread->GetWaitHandle(); 125 u32 mutex_value = thread->GetWaitHandle();
117 126
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index 81e62d497..b904de2e8 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -5,32 +5,34 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/hle/kernel/object.h"
9 8
10union ResultCode; 9union ResultCode;
11 10
12namespace Kernel { 11namespace Core {
12class System;
13}
13 14
14class HandleTable; 15namespace Kernel {
15class Thread;
16 16
17class Mutex final { 17class Mutex final {
18public: 18public:
19 explicit Mutex(Core::System& system);
20 ~Mutex();
21
19 /// Flag that indicates that a mutex still has threads waiting for it. 22 /// Flag that indicates that a mutex still has threads waiting for it.
20 static constexpr u32 MutexHasWaitersFlag = 0x40000000; 23 static constexpr u32 MutexHasWaitersFlag = 0x40000000;
21 /// Mask of the bits in a mutex address value that contain the mutex owner. 24 /// Mask of the bits in a mutex address value that contain the mutex owner.
22 static constexpr u32 MutexOwnerMask = 0xBFFFFFFF; 25 static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;
23 26
24 /// Attempts to acquire a mutex at the specified address. 27 /// Attempts to acquire a mutex at the specified address.
25 static ResultCode TryAcquire(HandleTable& handle_table, VAddr address, 28 ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
26 Handle holding_thread_handle, Handle requesting_thread_handle); 29 Handle requesting_thread_handle);
27 30
28 /// Releases the mutex at the specified address. 31 /// Releases the mutex at the specified address.
29 static ResultCode Release(VAddr address); 32 ResultCode Release(VAddr address);
30 33
31private: 34private:
32 Mutex() = default; 35 Core::System& system;
33 ~Mutex() = default;
34}; 36};
35 37
36} // namespace Kernel 38} // namespace Kernel
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp
index 8870463d0..10431e94c 100644
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -23,7 +23,7 @@ bool Object::IsWaitable() const {
23 case HandleType::Unknown: 23 case HandleType::Unknown:
24 case HandleType::WritableEvent: 24 case HandleType::WritableEvent:
25 case HandleType::SharedMemory: 25 case HandleType::SharedMemory:
26 case HandleType::AddressArbiter: 26 case HandleType::TransferMemory:
27 case HandleType::ResourceLimit: 27 case HandleType::ResourceLimit:
28 case HandleType::ClientPort: 28 case HandleType::ClientPort:
29 case HandleType::ClientSession: 29 case HandleType::ClientSession:
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 4c2505908..332876c27 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -22,9 +22,9 @@ enum class HandleType : u32 {
22 WritableEvent, 22 WritableEvent,
23 ReadableEvent, 23 ReadableEvent,
24 SharedMemory, 24 SharedMemory,
25 TransferMemory,
25 Thread, 26 Thread,
26 Process, 27 Process,
27 AddressArbiter,
28 ResourceLimit, 28 ResourceLimit,
29 ClientPort, 29 ClientPort,
30 ServerPort, 30 ServerPort,
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..041267318 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -5,10 +5,12 @@
5#include <algorithm> 5#include <algorithm>
6#include <memory> 6#include <memory>
7#include <random> 7#include <random>
8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
10#include "core/core.h" 11#include "core/core.h"
11#include "core/file_sys/program_metadata.h" 12#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/process.h" 16#include "core/hle/kernel/process.h"
@@ -31,7 +33,7 @@ namespace {
31 */ 33 */
32void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { 34void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
33 // Setup page table so we can write to memory 35 // Setup page table so we can write to memory
34 SetCurrentPageTable(&owner_process.VMManager().page_table); 36 Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
35 37
36 // Initialize new "main" thread 38 // Initialize new "main" thread
37 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); 39 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
@@ -50,12 +52,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
50} 52}
51} // Anonymous namespace 53} // Anonymous namespace
52 54
53CodeSet::CodeSet() = default; 55SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
54CodeSet::~CodeSet() = default; 56 auto& kernel = system.Kernel();
55
56SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
57 SharedPtr<Process> process(new Process(kernel));
58 57
58 SharedPtr<Process> process(new Process(system));
59 process->name = std::move(name); 59 process->name = std::move(name);
60 process->resource_limit = kernel.GetSystemResourceLimit(); 60 process->resource_limit = kernel.GetSystemResourceLimit();
61 process->status = ProcessStatus::Created; 61 process->status = ProcessStatus::Created;
@@ -76,6 +76,18 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const {
76 return resource_limit; 76 return resource_limit;
77} 77}
78 78
79u64 Process::GetTotalPhysicalMemoryUsed() const {
80 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
81}
82
83void Process::RegisterThread(const Thread* thread) {
84 thread_list.push_back(thread);
85}
86
87void Process::UnregisterThread(const Thread* thread) {
88 thread_list.remove(thread);
89}
90
79ResultCode Process::ClearSignalState() { 91ResultCode Process::ClearSignalState() {
80 if (status == ProcessStatus::Exited) { 92 if (status == ProcessStatus::Exited) {
81 LOG_ERROR(Kernel, "called on a terminated process instance."); 93 LOG_ERROR(Kernel, "called on a terminated process instance.");
@@ -99,17 +111,26 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
99 vm_manager.Reset(metadata.GetAddressSpaceType()); 111 vm_manager.Reset(metadata.GetAddressSpaceType());
100 112
101 const auto& caps = metadata.GetKernelCapabilities(); 113 const auto& caps = metadata.GetKernelCapabilities();
102 return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); 114 const auto capability_init_result =
115 capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
116 if (capability_init_result.IsError()) {
117 return capability_init_result;
118 }
119
120 return handle_table.SetSize(capabilities.GetHandleTableSize());
103} 121}
104 122
105void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { 123void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
124 // The kernel always ensures that the given stack size is page aligned.
125 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
126
106 // Allocate and map the main thread stack 127 // Allocate and map the main thread stack
107 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part 128 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
108 // of the user address space. 129 // of the user address space.
130 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
109 vm_manager 131 vm_manager
110 .MapMemoryBlock(vm_manager.GetTLSIORegionEndAddress() - stack_size, 132 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
111 std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, 133 0, main_thread_stack_size, MemoryState::Stack)
112 MemoryState::Stack)
113 .Unwrap(); 134 .Unwrap();
114 135
115 vm_manager.LogLayout(); 136 vm_manager.LogLayout();
@@ -126,7 +147,7 @@ void Process::PrepareForTermination() {
126 if (thread->GetOwnerProcess() != this) 147 if (thread->GetOwnerProcess() != this)
127 continue; 148 continue;
128 149
129 if (thread == GetCurrentThread()) 150 if (thread == system.CurrentScheduler().GetCurrentThread())
130 continue; 151 continue;
131 152
132 // TODO(Subv): When are the other running/ready threads terminated? 153 // TODO(Subv): When are the other running/ready threads terminated?
@@ -138,7 +159,6 @@ void Process::PrepareForTermination() {
138 } 159 }
139 }; 160 };
140 161
141 const auto& system = Core::System::GetInstance();
142 stop_threads(system.Scheduler(0).GetThreadList()); 162 stop_threads(system.Scheduler(0).GetThreadList());
143 stop_threads(system.Scheduler(1).GetThreadList()); 163 stop_threads(system.Scheduler(1).GetThreadList());
144 stop_threads(system.Scheduler(2).GetThreadList()); 164 stop_threads(system.Scheduler(2).GetThreadList());
@@ -206,35 +226,38 @@ void Process::FreeTLSSlot(VAddr tls_address) {
206} 226}
207 227
208void Process::LoadModule(CodeSet module_, VAddr base_addr) { 228void Process::LoadModule(CodeSet module_, VAddr base_addr) {
209 const auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, 229 const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory));
230
231 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
210 MemoryState memory_state) { 232 MemoryState memory_state) {
211 const auto vma = vm_manager 233 const auto vma = vm_manager
212 .MapMemoryBlock(segment.addr + base_addr, module_.memory, 234 .MapMemoryBlock(segment.addr + base_addr, memory, segment.offset,
213 segment.offset, segment.size, memory_state) 235 segment.size, memory_state)
214 .Unwrap(); 236 .Unwrap();
215 vm_manager.Reprotect(vma, permissions); 237 vm_manager.Reprotect(vma, permissions);
216 }; 238 };
217 239
218 // Map CodeSet segments 240 // Map CodeSet segments
219 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::CodeStatic); 241 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
220 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeMutable); 242 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
221 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable); 243 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
244
245 code_memory_size += module_.memory.size();
222 246
223 // Clear instruction cache in CPU JIT 247 // Clear instruction cache in CPU JIT
224 Core::System::GetInstance().ArmInterface(0).ClearInstructionCache(); 248 system.InvalidateCpuInstructionCaches();
225 Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
226 Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
227 Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
228} 249}
229 250
230Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} 251Process::Process(Core::System& system)
231Kernel::Process::~Process() {} 252 : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
253
254Process::~Process() = default;
232 255
233void Process::Acquire(Thread* thread) { 256void Process::Acquire(Thread* thread) {
234 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); 257 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
235} 258}
236 259
237bool Process::ShouldWait(Thread* thread) const { 260bool Process::ShouldWait(const Thread* thread) const {
238 return !is_signaled; 261 return !is_signaled;
239} 262}
240 263
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dcc57ae9f..f060f2a3b 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -7,17 +7,23 @@
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <cstddef> 9#include <cstddef>
10#include <memory> 10#include <list>
11#include <string> 11#include <string>
12#include <vector> 12#include <vector>
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/handle_table.h" 16#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/mutex.h"
16#include "core/hle/kernel/process_capability.h" 18#include "core/hle/kernel/process_capability.h"
17#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
18#include "core/hle/kernel/wait_object.h" 20#include "core/hle/kernel/wait_object.h"
19#include "core/hle/result.h" 21#include "core/hle/result.h"
20 22
23namespace Core {
24class System;
25}
26
21namespace FileSys { 27namespace FileSys {
22class ProgramMetadata; 28class ProgramMetadata;
23} 29}
@@ -28,13 +34,7 @@ class KernelCore;
28class ResourceLimit; 34class ResourceLimit;
29class Thread; 35class Thread;
30 36
31struct AddressMapping { 37struct CodeSet;
32 // Address and size must be page-aligned
33 VAddr address;
34 u64 size;
35 bool read_only;
36 bool unk_flag;
37};
38 38
39enum class MemoryRegion : u16 { 39enum class MemoryRegion : u16 {
40 APPLICATION = 1, 40 APPLICATION = 1,
@@ -60,46 +60,6 @@ enum class ProcessStatus {
60 DebugBreak, 60 DebugBreak,
61}; 61};
62 62
63struct CodeSet final {
64 struct Segment {
65 std::size_t offset = 0;
66 VAddr addr = 0;
67 u32 size = 0;
68 };
69
70 explicit CodeSet();
71 ~CodeSet();
72
73 Segment& CodeSegment() {
74 return segments[0];
75 }
76
77 const Segment& CodeSegment() const {
78 return segments[0];
79 }
80
81 Segment& RODataSegment() {
82 return segments[1];
83 }
84
85 const Segment& RODataSegment() const {
86 return segments[1];
87 }
88
89 Segment& DataSegment() {
90 return segments[2];
91 }
92
93 const Segment& DataSegment() const {
94 return segments[2];
95 }
96
97 std::shared_ptr<std::vector<u8>> memory;
98
99 std::array<Segment, 3> segments;
100 VAddr entrypoint = 0;
101};
102
103class Process final : public WaitObject { 63class Process final : public WaitObject {
104public: 64public:
105 enum : u64 { 65 enum : u64 {
@@ -116,7 +76,7 @@ public:
116 76
117 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; 77 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
118 78
119 static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name); 79 static SharedPtr<Process> Create(Core::System& system, std::string&& name);
120 80
121 std::string GetTypeName() const override { 81 std::string GetTypeName() const override {
122 return "Process"; 82 return "Process";
@@ -150,6 +110,26 @@ public:
150 return handle_table; 110 return handle_table;
151 } 111 }
152 112
113 /// Gets a reference to the process' address arbiter.
114 AddressArbiter& GetAddressArbiter() {
115 return address_arbiter;
116 }
117
118 /// Gets a const reference to the process' address arbiter.
119 const AddressArbiter& GetAddressArbiter() const {
120 return address_arbiter;
121 }
122
123 /// Gets a reference to the process' mutex lock.
124 Mutex& GetMutex() {
125 return mutex;
126 }
127
128 /// Gets a const reference to the process' mutex lock
129 const Mutex& GetMutex() const {
130 return mutex;
131 }
132
153 /// Gets the current status of the process 133 /// Gets the current status of the process
154 ProcessStatus GetStatus() const { 134 ProcessStatus GetStatus() const {
155 return status; 135 return status;
@@ -207,6 +187,22 @@ public:
207 return random_entropy.at(index); 187 return random_entropy.at(index);
208 } 188 }
209 189
190 /// Retrieves the total physical memory used by this process in bytes.
191 u64 GetTotalPhysicalMemoryUsed() const;
192
193 /// Gets the list of all threads created with this process as their owner.
194 const std::list<const Thread*>& GetThreadList() const {
195 return thread_list;
196 }
197
198 /// Registers a thread as being created under this process,
199 /// adding it to this process' thread list.
200 void RegisterThread(const Thread* thread);
201
202 /// Unregisters a thread from this process, removing it
203 /// from this process' thread list.
204 void UnregisterThread(const Thread* thread);
205
210 /// Clears the signaled state of the process if and only if it's signaled. 206 /// Clears the signaled state of the process if and only if it's signaled.
211 /// 207 ///
212 /// @pre The process must not be already terminated. If this is called on a 208 /// @pre The process must not be already terminated. If this is called on a
@@ -231,7 +227,7 @@ public:
231 /** 227 /**
232 * Applies address space changes and launches the process main thread. 228 * Applies address space changes and launches the process main thread.
233 */ 229 */
234 void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size); 230 void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
235 231
236 /** 232 /**
237 * Prepares a process for termination by stopping all of its threads 233 * Prepares a process for termination by stopping all of its threads
@@ -251,11 +247,11 @@ public:
251 void FreeTLSSlot(VAddr tls_address); 247 void FreeTLSSlot(VAddr tls_address);
252 248
253private: 249private:
254 explicit Process(KernelCore& kernel); 250 explicit Process(Core::System& system);
255 ~Process() override; 251 ~Process() override;
256 252
257 /// Checks if the specified thread should wait until this process is available. 253 /// Checks if the specified thread should wait until this process is available.
258 bool ShouldWait(Thread* thread) const override; 254 bool ShouldWait(const Thread* thread) const override;
259 255
260 /// Acquires/locks this process for the specified thread if it's available. 256 /// Acquires/locks this process for the specified thread if it's available.
261 void Acquire(Thread* thread) override; 257 void Acquire(Thread* thread) override;
@@ -268,6 +264,12 @@ private:
268 /// Memory manager for this process. 264 /// Memory manager for this process.
269 Kernel::VMManager vm_manager; 265 Kernel::VMManager vm_manager;
270 266
267 /// Size of the main thread's stack in bytes.
268 u64 main_thread_stack_size = 0;
269
270 /// Size of the loaded code memory in bytes.
271 u64 code_memory_size = 0;
272
271 /// Current status of the process 273 /// Current status of the process
272 ProcessStatus status; 274 ProcessStatus status;
273 275
@@ -309,9 +311,24 @@ private:
309 /// Per-process handle table for storing created object handles in. 311 /// Per-process handle table for storing created object handles in.
310 HandleTable handle_table; 312 HandleTable handle_table;
311 313
314 /// Per-process address arbiter.
315 AddressArbiter address_arbiter;
316
317 /// The per-process mutex lock instance used for handling various
318 /// forms of services, such as lock arbitration, and condition
319 /// variable related facilities.
320 Mutex mutex;
321
312 /// Random values for svcGetInfo RandomEntropy 322 /// Random values for svcGetInfo RandomEntropy
313 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; 323 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
314 324
325 /// List of threads that are running with this process as their owner.
326 std::list<const Thread*> thread_list;
327
328 /// System context
329 Core::System& system;
330
331 /// Name of this process
315 std::string name; 332 std::string name;
316}; 333};
317 334
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
96 interrupt_capabilities.set(); 96 interrupt_capabilities.set();
97 97
98 // Allow using the maximum possible amount of handles 98 // Allow using the maximum possible amount of handles
99 handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); 99 handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
100 100
101 // Allow all debugging capabilities. 101 // Allow all debugging capabilities.
102 is_debuggable = true; 102 is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
337 return ERR_RESERVED_VALUE; 337 return ERR_RESERVED_VALUE;
338 } 338 }
339 339
340 handle_table_size = (flags >> 16) & 0x3FF; 340 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
341 return RESULT_SUCCESS; 341 return RESULT_SUCCESS;
342} 342}
343 343
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
156 } 156 }
157 157
158 /// Gets the number of total allowable handles for the process' handle table. 158 /// Gets the number of total allowable handles for the process' handle table.
159 u32 GetHandleTableSize() const { 159 s32 GetHandleTableSize() const {
160 return handle_table_size; 160 return handle_table_size;
161 } 161 }
162 162
@@ -252,7 +252,7 @@ private:
252 u64 core_mask = 0; 252 u64 core_mask = 0;
253 u64 priority_mask = 0; 253 u64 priority_mask = 0;
254 254
255 u32 handle_table_size = 0; 255 s32 handle_table_size = 0;
256 u32 kernel_version = 0; 256 u32 kernel_version = 0;
257 257
258 ProgramType program_type = ProgramType::SysModule; 258 ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 0e5083f70..c2b798a4e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -14,7 +14,7 @@ namespace Kernel {
14ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {} 14ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
15ReadableEvent::~ReadableEvent() = default; 15ReadableEvent::~ReadableEvent() = default;
16 16
17bool ReadableEvent::ShouldWait(Thread* thread) const { 17bool ReadableEvent::ShouldWait(const Thread* thread) const {
18 return !signaled; 18 return !signaled;
19} 19}
20 20
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 77a9c362c..2eb9dcbb7 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -36,7 +36,7 @@ public:
36 return HANDLE_TYPE; 36 return HANDLE_TYPE;
37 } 37 }
38 38
39 bool ShouldWait(Thread* thread) const override; 39 bool ShouldWait(const Thread* thread) const override;
40 void Acquire(Thread* thread) override; 40 void Acquire(Thread* thread) override;
41 41
42 /// Unconditionally clears the readable event's state. 42 /// Unconditionally clears the readable event's state.
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 2f9695005..173f69915 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -16,11 +16,8 @@ constexpr std::size_t ResourceTypeToIndex(ResourceType type) {
16ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {} 16ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {}
17ResourceLimit::~ResourceLimit() = default; 17ResourceLimit::~ResourceLimit() = default;
18 18
19SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel, std::string name) { 19SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel) {
20 SharedPtr<ResourceLimit> resource_limit(new ResourceLimit(kernel)); 20 return new ResourceLimit(kernel);
21
22 resource_limit->name = std::move(name);
23 return resource_limit;
24} 21}
25 22
26s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const { 23s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const {
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h
index 59dc11c22..70e09858a 100644
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -31,16 +31,14 @@ constexpr bool IsValidResourceType(ResourceType type) {
31 31
32class ResourceLimit final : public Object { 32class ResourceLimit final : public Object {
33public: 33public:
34 /** 34 /// Creates a resource limit object.
35 * Creates a resource limit object. 35 static SharedPtr<ResourceLimit> Create(KernelCore& kernel);
36 */
37 static SharedPtr<ResourceLimit> Create(KernelCore& kernel, std::string name = "Unknown");
38 36
39 std::string GetTypeName() const override { 37 std::string GetTypeName() const override {
40 return "ResourceLimit"; 38 return "ResourceLimit";
41 } 39 }
42 std::string GetName() const override { 40 std::string GetName() const override {
43 return name; 41 return GetTypeName();
44 } 42 }
45 43
46 static const HandleType HANDLE_TYPE = HandleType::ResourceLimit; 44 static const HandleType HANDLE_TYPE = HandleType::ResourceLimit;
@@ -95,9 +93,6 @@ private:
95 ResourceArray limits{}; 93 ResourceArray limits{};
96 /// Current resource limit values. 94 /// Current resource limit values.
97 ResourceArray values{}; 95 ResourceArray values{};
98
99 /// Name of resource limit object.
100 std::string name;
101}; 96};
102 97
103} // namespace Kernel 98} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index df4d6cf0a..ac501bf7f 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -28,8 +29,8 @@ Scheduler::~Scheduler() {
28} 29}
29 30
30bool Scheduler::HaveReadyThreads() const { 31bool Scheduler::HaveReadyThreads() const {
31 std::lock_guard<std::mutex> lock(scheduler_mutex); 32 std::lock_guard lock{scheduler_mutex};
32 return ready_queue.get_first() != nullptr; 33 return !ready_queue.empty();
33} 34}
34 35
35Thread* Scheduler::GetCurrentThread() const { 36Thread* Scheduler::GetCurrentThread() const {
@@ -45,23 +46,28 @@ Thread* Scheduler::PopNextReadyThread() {
45 Thread* thread = GetCurrentThread(); 46 Thread* thread = GetCurrentThread();
46 47
47 if (thread && thread->GetStatus() == ThreadStatus::Running) { 48 if (thread && thread->GetStatus() == ThreadStatus::Running) {
49 if (ready_queue.empty()) {
50 return thread;
51 }
48 // We have to do better than the current thread. 52 // We have to do better than the current thread.
49 // This call returns null when that's not possible. 53 // This call returns null when that's not possible.
50 next = ready_queue.pop_first_better(thread->GetPriority()); 54 next = ready_queue.front();
51 if (!next) { 55 if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
52 // Otherwise just keep going with the current thread
53 next = thread; 56 next = thread;
54 } 57 }
55 } else { 58 } else {
56 next = ready_queue.pop_first(); 59 if (ready_queue.empty()) {
60 return nullptr;
61 }
62 next = ready_queue.front();
57 } 63 }
58 64
59 return next; 65 return next;
60} 66}
61 67
62void Scheduler::SwitchContext(Thread* new_thread) { 68void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 69 Thread* previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 70 Process* const previous_process = system.Kernel().CurrentProcess();
65 71
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 72 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 73
@@ -74,7 +80,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
74 if (previous_thread->GetStatus() == ThreadStatus::Running) { 80 if (previous_thread->GetStatus() == ThreadStatus::Running) {
75 // This is only the case when a reschedule is triggered without the current thread 81 // This is only the case when a reschedule is triggered without the current thread
76 // yielding execution (i.e. an event triggered, system core time-sliced, etc) 82 // yielding execution (i.e. an event triggered, system core time-sliced, etc)
77 ready_queue.push_front(previous_thread->GetPriority(), previous_thread); 83 ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
78 previous_thread->SetStatus(ThreadStatus::Ready); 84 previous_thread->SetStatus(ThreadStatus::Ready);
79 } 85 }
80 } 86 }
@@ -89,13 +95,13 @@ void Scheduler::SwitchContext(Thread* new_thread) {
89 95
90 current_thread = new_thread; 96 current_thread = new_thread;
91 97
92 ready_queue.remove(new_thread->GetPriority(), new_thread); 98 ready_queue.remove(new_thread, new_thread->GetPriority());
93 new_thread->SetStatus(ThreadStatus::Running); 99 new_thread->SetStatus(ThreadStatus::Running);
94 100
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 101 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 102 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 103 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); 104 Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
99 } 105 }
100 106
101 cpu_core.LoadContext(new_thread->GetContext()); 107 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +117,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 117
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 118void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 119 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = CoreTiming::GetTicks(); 120 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 121 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 122
117 if (thread != nullptr) { 123 if (thread != nullptr) {
@@ -126,7 +132,7 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
126} 132}
127 133
128void Scheduler::Reschedule() { 134void Scheduler::Reschedule() {
129 std::lock_guard<std::mutex> lock(scheduler_mutex); 135 std::lock_guard lock{scheduler_mutex};
130 136
131 Thread* cur = GetCurrentThread(); 137 Thread* cur = GetCurrentThread();
132 Thread* next = PopNextReadyThread(); 138 Thread* next = PopNextReadyThread();
@@ -142,51 +148,54 @@ void Scheduler::Reschedule() {
142 SwitchContext(next); 148 SwitchContext(next);
143} 149}
144 150
145void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) { 151void Scheduler::AddThread(SharedPtr<Thread> thread) {
146 std::lock_guard<std::mutex> lock(scheduler_mutex); 152 std::lock_guard lock{scheduler_mutex};
147 153
148 thread_list.push_back(std::move(thread)); 154 thread_list.push_back(std::move(thread));
149 ready_queue.prepare(priority);
150} 155}
151 156
152void Scheduler::RemoveThread(Thread* thread) { 157void Scheduler::RemoveThread(Thread* thread) {
153 std::lock_guard<std::mutex> lock(scheduler_mutex); 158 std::lock_guard lock{scheduler_mutex};
154 159
155 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), 160 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
156 thread_list.end()); 161 thread_list.end());
157} 162}
158 163
159void Scheduler::ScheduleThread(Thread* thread, u32 priority) { 164void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
160 std::lock_guard<std::mutex> lock(scheduler_mutex); 165 std::lock_guard lock{scheduler_mutex};
161 166
162 ASSERT(thread->GetStatus() == ThreadStatus::Ready); 167 ASSERT(thread->GetStatus() == ThreadStatus::Ready);
163 ready_queue.push_back(priority, thread); 168 ready_queue.add(thread, priority);
164} 169}
165 170
166void Scheduler::UnscheduleThread(Thread* thread, u32 priority) { 171void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
167 std::lock_guard<std::mutex> lock(scheduler_mutex); 172 std::lock_guard lock{scheduler_mutex};
168 173
169 ASSERT(thread->GetStatus() == ThreadStatus::Ready); 174 ASSERT(thread->GetStatus() == ThreadStatus::Ready);
170 ready_queue.remove(priority, thread); 175 ready_queue.remove(thread, priority);
171} 176}
172 177
173void Scheduler::SetThreadPriority(Thread* thread, u32 priority) { 178void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
174 std::lock_guard<std::mutex> lock(scheduler_mutex); 179 std::lock_guard lock{scheduler_mutex};
180 if (thread->GetPriority() == priority) {
181 return;
182 }
175 183
176 // If thread was ready, adjust queues 184 // If thread was ready, adjust queues
177 if (thread->GetStatus() == ThreadStatus::Ready) 185 if (thread->GetStatus() == ThreadStatus::Ready)
178 ready_queue.move(thread, thread->GetPriority(), priority); 186 ready_queue.adjust(thread, thread->GetPriority(), priority);
179 else
180 ready_queue.prepare(priority);
181} 187}
182 188
183Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const { 189Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
184 std::lock_guard<std::mutex> lock(scheduler_mutex); 190 std::lock_guard lock{scheduler_mutex};
185 191
186 const u32 mask = 1U << core; 192 const u32 mask = 1U << core;
187 return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) { 193 for (auto* thread : ready_queue) {
188 return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority; 194 if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
189 }); 195 return thread;
196 }
197 }
198 return nullptr;
190} 199}
191 200
192void Scheduler::YieldWithoutLoadBalancing(Thread* thread) { 201void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
@@ -198,8 +207,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
198 ASSERT(thread->GetPriority() < THREADPRIO_COUNT); 207 ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
199 208
200 // Yield this thread -- sleep for zero time and force reschedule to different thread 209 // Yield this thread -- sleep for zero time and force reschedule to different thread
201 WaitCurrentThread_Sleep(); 210 GetCurrentThread()->Sleep(0);
202 GetCurrentThread()->WakeAfterDelay(0);
203} 211}
204 212
205void Scheduler::YieldWithLoadBalancing(Thread* thread) { 213void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +222,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
214 ASSERT(priority < THREADPRIO_COUNT); 222 ASSERT(priority < THREADPRIO_COUNT);
215 223
216 // Sleep for zero time to be able to force reschedule to different thread 224 // Sleep for zero time to be able to force reschedule to different thread
217 WaitCurrentThread_Sleep(); 225 GetCurrentThread()->Sleep(0);
218 GetCurrentThread()->WakeAfterDelay(0);
219 226
220 Thread* suggested_thread = nullptr; 227 Thread* suggested_thread = nullptr;
221 228
@@ -223,8 +230,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 230 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 231 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 232 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 233 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 234
229 // If scheduler provides a suggested thread 235 // If scheduler provides a suggested thread
230 if (res != nullptr) { 236 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..b29bf7be8 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -7,13 +7,14 @@
7#include <mutex> 7#include <mutex>
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/thread_queue_list.h" 10#include "common/multi_level_queue.h"
11#include "core/hle/kernel/object.h" 11#include "core/hle/kernel/object.h"
12#include "core/hle/kernel/thread.h" 12#include "core/hle/kernel/thread.h"
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -37,7 +38,7 @@ public:
37 u64 GetLastContextSwitchTicks() const; 38 u64 GetLastContextSwitchTicks() const;
38 39
39 /// Adds a new thread to the scheduler 40 /// Adds a new thread to the scheduler
40 void AddThread(SharedPtr<Thread> thread, u32 priority); 41 void AddThread(SharedPtr<Thread> thread);
41 42
42 /// Removes a thread from the scheduler 43 /// Removes a thread from the scheduler
43 void RemoveThread(Thread* thread); 44 void RemoveThread(Thread* thread);
@@ -155,13 +156,14 @@ private:
155 std::vector<SharedPtr<Thread>> thread_list; 156 std::vector<SharedPtr<Thread>> thread_list;
156 157
157 /// Lists only ready thread ids. 158 /// Lists only ready thread ids.
158 Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST + 1> ready_queue; 159 Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
159 160
160 SharedPtr<Thread> current_thread = nullptr; 161 SharedPtr<Thread> current_thread = nullptr;
161 162
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index d6ceeb2da..708fdf9e1 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,7 +26,11 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
26 return MakeResult(std::move(session)); 26 return MakeResult(std::move(session));
27} 27}
28 28
29bool ServerPort::ShouldWait(Thread* thread) const { 29void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
30 pending_sessions.push_back(std::move(pending_session));
31}
32
33bool ServerPort::ShouldWait(const Thread* thread) const {
30 // If there are no pending sessions, we wait until a new one is added. 34 // If there are no pending sessions, we wait until a new one is added.
31 return pending_sessions.empty(); 35 return pending_sessions.empty();
32} 36}
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index e52f8245f..76293cb8b 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -22,6 +22,8 @@ class SessionRequestHandler;
22 22
23class ServerPort final : public WaitObject { 23class ServerPort final : public WaitObject {
24public: 24public:
25 using HLEHandler = std::shared_ptr<SessionRequestHandler>;
26
25 /** 27 /**
26 * Creates a pair of ServerPort and an associated ClientPort. 28 * Creates a pair of ServerPort and an associated ClientPort.
27 * 29 *
@@ -51,29 +53,44 @@ public:
51 */ 53 */
52 ResultVal<SharedPtr<ServerSession>> Accept(); 54 ResultVal<SharedPtr<ServerSession>> Accept();
53 55
56 /// Whether or not this server port has an HLE handler available.
57 bool HasHLEHandler() const {
58 return hle_handler != nullptr;
59 }
60
61 /// Gets the HLE handler for this port.
62 HLEHandler GetHLEHandler() const {
63 return hle_handler;
64 }
65
54 /** 66 /**
55 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port 67 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
56 * will inherit a reference to this handler. 68 * will inherit a reference to this handler.
57 */ 69 */
58 void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) { 70 void SetHleHandler(HLEHandler hle_handler_) {
59 hle_handler = std::move(hle_handler_); 71 hle_handler = std::move(hle_handler_);
60 } 72 }
61 73
62 std::string name; ///< Name of port (optional) 74 /// Appends a ServerSession to the collection of ServerSessions
75 /// waiting to be accepted by this port.
76 void AppendPendingSession(SharedPtr<ServerSession> pending_session);
77
78 bool ShouldWait(const Thread* thread) const override;
79 void Acquire(Thread* thread) override;
80
81private:
82 explicit ServerPort(KernelCore& kernel);
83 ~ServerPort() override;
63 84
64 /// ServerSessions waiting to be accepted by the port 85 /// ServerSessions waiting to be accepted by the port
65 std::vector<SharedPtr<ServerSession>> pending_sessions; 86 std::vector<SharedPtr<ServerSession>> pending_sessions;
66 87
67 /// This session's HLE request handler template (optional) 88 /// This session's HLE request handler template (optional)
68 /// ServerSessions created from this port inherit a reference to this handler. 89 /// ServerSessions created from this port inherit a reference to this handler.
69 std::shared_ptr<SessionRequestHandler> hle_handler; 90 HLEHandler hle_handler;
70
71 bool ShouldWait(Thread* thread) const override;
72 void Acquire(Thread* thread) override;
73 91
74private: 92 /// Name of the port (optional)
75 explicit ServerPort(KernelCore& kernel); 93 std::string name;
76 ~ServerPort() override;
77}; 94};
78 95
79} // namespace Kernel 96} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..40cec143e 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -46,7 +46,7 @@ ResultVal<SharedPtr<ServerSession>> ServerSession::Create(KernelCore& kernel, st
46 return MakeResult(std::move(server_session)); 46 return MakeResult(std::move(server_session));
47} 47}
48 48
49bool ServerSession::ShouldWait(Thread* thread) const { 49bool ServerSession::ShouldWait(const Thread* thread) const {
50 // Closed sessions should never wait, an error will be returned from svcReplyAndReceive. 50 // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
51 if (parent->client == nullptr) 51 if (parent->client == nullptr)
52 return false; 52 return false;
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 63 pending_requesting_threads.pop_back();
64} 64}
65 65
66void ServerSession::ClientDisconnected() {
67 // We keep a shared pointer to the hle handler to keep it alive throughout
68 // the call to ClientDisconnected, as ClientDisconnected invalidates the
69 // hle_handler member itself during the course of the function executing.
70 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
71 if (handler) {
72 // Note that after this returns, this server session's hle_handler is
73 // invalidated (set to null).
74 handler->ClientDisconnected(this);
75 }
76
77 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
78 // their WaitSynchronization result to 0xC920181A.
79
80 // Clean up the list of client threads with pending requests, they are unneeded now that the
81 // client endpoint is closed.
82 pending_requesting_threads.clear();
83 currently_handling = nullptr;
84}
85
86void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
87 domain_request_handlers.push_back(std::move(handler));
88}
89
90std::size_t ServerSession::NumDomainRequestHandlers() const {
91 return domain_request_handlers.size();
92}
93
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 94ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 95 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 96 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 97 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 98
71 99 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 100 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 101
74 switch (domain_message_header->command) { 102 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 103 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 104 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 105 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 106 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 107 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 108 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 109 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 110 "to {} needed to return a new interface!",
83 } 111 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 112 UNREACHABLE();
85 113 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 114 }
115 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
96 116
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 117 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
98 static_cast<int>(domain_message_header->command.Value())); 118 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
99 ASSERT(false); 119
120 domain_request_handlers[object_id - 1] = nullptr;
121
122 IPC::ResponseBuilder rb{context, 2};
123 rb.Push(RESULT_SUCCESS);
124 return RESULT_SUCCESS;
125 }
100 } 126 }
101 127
128 LOG_CRITICAL(IPC, "Unknown domain command={}",
129 static_cast<int>(domain_message_header.command.Value()));
130 ASSERT(false);
102 return RESULT_SUCCESS; 131 return RESULT_SUCCESS;
103} 132}
104 133
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..79b84bade 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
46 return HANDLE_TYPE; 46 return HANDLE_TYPE;
47 } 47 }
48 48
49 Session* GetParent() {
50 return parent.get();
51 }
52
53 const Session* GetParent() const {
54 return parent.get();
55 }
56
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 57 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 58
51 /** 59 /**
@@ -74,27 +82,20 @@ public:
74 */ 82 */
75 ResultCode HandleSyncRequest(SharedPtr<Thread> thread); 83 ResultCode HandleSyncRequest(SharedPtr<Thread> thread);
76 84
77 bool ShouldWait(Thread* thread) const override; 85 bool ShouldWait(const Thread* thread) const override;
78 86
79 void Acquire(Thread* thread) override; 87 void Acquire(Thread* thread) override;
80 88
81 std::string name; ///< The name of this session (optional) 89 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 90 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 91
86 /// This is the list of domain request handlers (after conversion to a domain) 92 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 93 /// this ServerSession instance.
88 94 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
89 /// List of threads that are pending a response after a sync request. This list is processed in
90 /// a LIFO manner, thus, the last request will be dispatched first.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93 95
94 /// Thread whose request is currently being handled. A request is considered "handled" when a 96 /// Retrieves the total number of domain request handlers that have been
95 /// response is sent via svcReplyAndReceive. 97 /// appended to this ServerSession instance.
96 /// TODO(Subv): Find a better name for this. 98 std::size_t NumDomainRequestHandlers() const;
97 SharedPtr<Thread> currently_handling;
98 99
99 /// Returns true if the session has been converted to a domain, otherwise False 100 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 101 bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
129 /// object handle. 130 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 131 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 132
133 /// The parent session, which links to the client endpoint.
134 std::shared_ptr<Session> parent;
135
136 /// This session's HLE request handler (applicable when not a domain)
137 std::shared_ptr<SessionRequestHandler> hle_handler;
138
139 /// This is the list of domain request handlers (after conversion to a domain)
140 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
141
142 /// List of threads that are pending a response after a sync request. This list is processed in
143 /// a LIFO manner, thus, the last request will be dispatched first.
144 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
145 std::vector<SharedPtr<Thread>> pending_requesting_threads;
146
147 /// Thread whose request is currently being handled. A request is considered "handled" when a
148 /// response is sent via svcReplyAndReceive.
149 /// TODO(Subv): Find a better name for this.
150 SharedPtr<Thread> currently_handling;
151
132 /// When set to True, converts the session to a domain at the end of the command 152 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 153 bool convert_to_domain{};
154
155 /// The name of this session (optional)
156 std::string name;
134}; 157};
135 158
136} // namespace Kernel 159} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..f15c5ee36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,11 +6,9 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
13#include "core/memory.h"
14 12
15namespace Kernel { 13namespace Kernel {
16 14
@@ -34,8 +32,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 32 shared_memory->backing_block_offset = 0;
35 33
36 // Refresh the address mappings for the current process. 34 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 35 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 36 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 37 shared_memory->backing_block.get());
40 } 38 }
41 } else { 39 } else {
@@ -120,7 +118,15 @@ ResultCode SharedMemory::Map(Process& target_process, VAddr address, MemoryPermi
120 ConvertPermissions(permissions)); 118 ConvertPermissions(permissions));
121} 119}
122 120
123ResultCode SharedMemory::Unmap(Process& target_process, VAddr address) { 121ResultCode SharedMemory::Unmap(Process& target_process, VAddr address, u64 unmap_size) {
122 if (unmap_size != size) {
123 LOG_ERROR(Kernel,
124 "Invalid size passed to Unmap. Size must be equal to the size of the "
125 "memory managed. Shared memory size=0x{:016X}, Unmap size=0x{:016X}",
126 size, unmap_size);
127 return ERR_INVALID_SIZE;
128 }
129
124 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not 130 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not
125 // mapped to a SharedMemory. 131 // mapped to a SharedMemory.
126 return target_process.VMManager().UnmapRange(address, size); 132 return target_process.VMManager().UnmapRange(address, size);
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index dab2a6bea..37e18c443 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -104,11 +104,17 @@ public:
104 104
105 /** 105 /**
106 * Unmaps a shared memory block from the specified address in system memory 106 * Unmaps a shared memory block from the specified address in system memory
107 *
107 * @param target_process Process from which to unmap the memory block. 108 * @param target_process Process from which to unmap the memory block.
108 * @param address Address in system memory where the shared memory block is mapped 109 * @param address Address in system memory where the shared memory block is mapped.
110 * @param unmap_size The amount of bytes to unmap from this shared memory instance.
111 *
109 * @return Result code of the unmap operation 112 * @return Result code of the unmap operation
113 *
114 * @pre The given size to unmap must be the same size as the amount of memory managed by
115 * the SharedMemory instance itself, otherwise ERR_INVALID_SIZE will be returned.
110 */ 116 */
111 ResultCode Unmap(Process& target_process, VAddr address); 117 ResultCode Unmap(Process& target_process, VAddr address, u64 unmap_size);
112 118
113 /** 119 /**
114 * Gets a pointer to the shared memory block 120 * Gets a pointer to the shared memory block
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 7cfecb68c..ab10db3df 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
@@ -31,6 +32,7 @@
31#include "core/hle/kernel/svc.h" 32#include "core/hle/kernel/svc.h"
32#include "core/hle/kernel/svc_wrap.h" 33#include "core/hle/kernel/svc_wrap.h"
33#include "core/hle/kernel/thread.h" 34#include "core/hle/kernel/thread.h"
35#include "core/hle/kernel/transfer_memory.h"
34#include "core/hle/kernel/writable_event.h" 36#include "core/hle/kernel/writable_event.h"
35#include "core/hle/lock.h" 37#include "core/hle/lock.h"
36#include "core/hle/result.h" 38#include "core/hle/result.h"
@@ -47,23 +49,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address; 49 return address + size > address;
48} 50}
49 51
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// 8 GiB 52// 8 GiB
68constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; 53constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
69 54
@@ -105,14 +90,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 return ERR_INVALID_ADDRESS_STATE; 90 return ERR_INVALID_ADDRESS_STATE;
106 } 91 }
107 92
108 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { 93 if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
109 LOG_ERROR(Kernel_SVC, 94 LOG_ERROR(Kernel_SVC,
110 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 95 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
111 src_addr, size); 96 src_addr, size);
112 return ERR_INVALID_ADDRESS_STATE; 97 return ERR_INVALID_ADDRESS_STATE;
113 } 98 }
114 99
115 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { 100 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
116 LOG_ERROR(Kernel_SVC, 101 LOG_ERROR(Kernel_SVC,
117 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 102 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
118 dst_addr, size); 103 dst_addr, size);
@@ -190,11 +175,8 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
190 return ERR_INVALID_SIZE; 175 return ERR_INVALID_SIZE;
191 } 176 }
192 177
193 auto& vm_manager = Core::CurrentProcess()->VMManager(); 178 auto& vm_manager = Core::System::GetInstance().Kernel().CurrentProcess()->VMManager();
194 const VAddr heap_base = vm_manager.GetHeapRegionBaseAddress(); 179 const auto alloc_result = vm_manager.SetHeapSize(heap_size);
195 const auto alloc_result =
196 vm_manager.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite);
197
198 if (alloc_result.Failed()) { 180 if (alloc_result.Failed()) {
199 return alloc_result.Code(); 181 return alloc_result.Code();
200 } 182 }
@@ -238,7 +220,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
238 auto* const current_process = Core::CurrentProcess(); 220 auto* const current_process = Core::CurrentProcess();
239 auto& vm_manager = current_process->VMManager(); 221 auto& vm_manager = current_process->VMManager();
240 222
241 if (!IsInsideAddressSpace(vm_manager, addr, size)) { 223 if (!vm_manager.IsWithinAddressSpace(addr, size)) {
242 LOG_ERROR(Kernel_SVC, 224 LOG_ERROR(Kernel_SVC,
243 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 225 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
244 size); 226 size);
@@ -299,7 +281,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
299 } 281 }
300 282
301 auto& vm_manager = Core::CurrentProcess()->VMManager(); 283 auto& vm_manager = Core::CurrentProcess()->VMManager();
302 if (!IsInsideAddressSpace(vm_manager, address, size)) { 284 if (!vm_manager.IsWithinAddressSpace(address, size)) {
303 LOG_ERROR(Kernel_SVC, 285 LOG_ERROR(Kernel_SVC,
304 "Given address (0x{:016X}) is outside the bounds of the address space.", address); 286 "Given address (0x{:016X}) is outside the bounds of the address space.", address);
305 return ERR_INVALID_ADDRESS_STATE; 287 return ERR_INVALID_ADDRESS_STATE;
@@ -567,9 +549,9 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
567 return ERR_INVALID_ADDRESS; 549 return ERR_INVALID_ADDRESS;
568 } 550 }
569 551
570 auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 552 auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
571 return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle, 553 return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
572 requesting_thread_handle); 554 requesting_thread_handle);
573} 555}
574 556
575/// Unlock a mutex 557/// Unlock a mutex
@@ -587,7 +569,8 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
587 return ERR_INVALID_ADDRESS; 569 return ERR_INVALID_ADDRESS;
588 } 570 }
589 571
590 return Mutex::Release(mutex_addr); 572 auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
573 return current_process->GetMutex().Release(mutex_addr);
591} 574}
592 575
593enum class BreakType : u32 { 576enum class BreakType : u32 {
@@ -726,7 +709,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
726 HeapRegionBaseAddr = 4, 709 HeapRegionBaseAddr = 4,
727 HeapRegionSize = 5, 710 HeapRegionSize = 5,
728 TotalMemoryUsage = 6, 711 TotalMemoryUsage = 6,
729 TotalHeapUsage = 7, 712 TotalPhysicalMemoryUsed = 7,
730 IsCurrentProcessBeingDebugged = 8, 713 IsCurrentProcessBeingDebugged = 8,
731 RegisterResourceLimit = 9, 714 RegisterResourceLimit = 9,
732 IdleTickCount = 10, 715 IdleTickCount = 10,
@@ -762,7 +745,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
762 case GetInfoType::NewMapRegionBaseAddr: 745 case GetInfoType::NewMapRegionBaseAddr:
763 case GetInfoType::NewMapRegionSize: 746 case GetInfoType::NewMapRegionSize:
764 case GetInfoType::TotalMemoryUsage: 747 case GetInfoType::TotalMemoryUsage:
765 case GetInfoType::TotalHeapUsage: 748 case GetInfoType::TotalPhysicalMemoryUsed:
766 case GetInfoType::IsVirtualAddressMemoryEnabled: 749 case GetInfoType::IsVirtualAddressMemoryEnabled:
767 case GetInfoType::PersonalMmHeapUsage: 750 case GetInfoType::PersonalMmHeapUsage:
768 case GetInfoType::TitleId: 751 case GetInfoType::TitleId:
@@ -822,8 +805,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
822 *result = process->VMManager().GetTotalMemoryUsage(); 805 *result = process->VMManager().GetTotalMemoryUsage();
823 return RESULT_SUCCESS; 806 return RESULT_SUCCESS;
824 807
825 case GetInfoType::TotalHeapUsage: 808 case GetInfoType::TotalPhysicalMemoryUsed:
826 *result = process->VMManager().GetTotalHeapUsage(); 809 *result = process->GetTotalPhysicalMemoryUsed();
827 return RESULT_SUCCESS; 810 return RESULT_SUCCESS;
828 811
829 case GetInfoType::IsVirtualAddressMemoryEnabled: 812 case GetInfoType::IsVirtualAddressMemoryEnabled:
@@ -918,6 +901,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
918 } 901 }
919 902
920 const auto& system = Core::System::GetInstance(); 903 const auto& system = Core::System::GetInstance();
904 const auto& core_timing = system.CoreTiming();
921 const auto& scheduler = system.CurrentScheduler(); 905 const auto& scheduler = system.CurrentScheduler();
922 const auto* const current_thread = scheduler.GetCurrentThread(); 906 const auto* const current_thread = scheduler.GetCurrentThread();
923 const bool same_thread = current_thread == thread; 907 const bool same_thread = current_thread == thread;
@@ -927,9 +911,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
927 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { 911 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
928 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); 912 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
929 913
930 out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks); 914 out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
931 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { 915 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
932 out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks; 916 out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
933 } 917 }
934 918
935 *result = out_ticks; 919 *result = out_ticks;
@@ -1156,7 +1140,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
1156 return ERR_INVALID_MEMORY_RANGE; 1140 return ERR_INVALID_MEMORY_RANGE;
1157 } 1141 }
1158 1142
1159 return shared_memory->Unmap(*current_process, addr); 1143 return shared_memory->Unmap(*current_process, addr, size);
1160} 1144}
1161 1145
1162static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address, 1146static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address,
@@ -1299,10 +1283,14 @@ static ResultCode StartThread(Handle thread_handle) {
1299 1283
1300/// Called when a thread exits 1284/// Called when a thread exits
1301static void ExitThread() { 1285static void ExitThread() {
1302 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC()); 1286 auto& system = Core::System::GetInstance();
1303 1287
1304 ExitCurrentThread(); 1288 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
1305 Core::System::GetInstance().PrepareReschedule(); 1289
1290 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
1291 current_thread->Stop();
1292 system.CurrentScheduler().RemoveThread(current_thread);
1293 system.PrepareReschedule();
1306} 1294}
1307 1295
1308/// Sleep the current thread 1296/// Sleep the current thread
@@ -1315,32 +1303,32 @@ static void SleepThread(s64 nanoseconds) {
1315 YieldAndWaitForLoadBalancing = -2, 1303 YieldAndWaitForLoadBalancing = -2,
1316 }; 1304 };
1317 1305
1306 auto& system = Core::System::GetInstance();
1307 auto& scheduler = system.CurrentScheduler();
1308 auto* const current_thread = scheduler.GetCurrentThread();
1309
1318 if (nanoseconds <= 0) { 1310 if (nanoseconds <= 0) {
1319 auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
1320 switch (static_cast<SleepType>(nanoseconds)) { 1311 switch (static_cast<SleepType>(nanoseconds)) {
1321 case SleepType::YieldWithoutLoadBalancing: 1312 case SleepType::YieldWithoutLoadBalancing:
1322 scheduler.YieldWithoutLoadBalancing(GetCurrentThread()); 1313 scheduler.YieldWithoutLoadBalancing(current_thread);
1323 break; 1314 break;
1324 case SleepType::YieldWithLoadBalancing: 1315 case SleepType::YieldWithLoadBalancing:
1325 scheduler.YieldWithLoadBalancing(GetCurrentThread()); 1316 scheduler.YieldWithLoadBalancing(current_thread);
1326 break; 1317 break;
1327 case SleepType::YieldAndWaitForLoadBalancing: 1318 case SleepType::YieldAndWaitForLoadBalancing:
1328 scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread()); 1319 scheduler.YieldAndWaitForLoadBalancing(current_thread);
1329 break; 1320 break;
1330 default: 1321 default:
1331 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); 1322 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
1332 } 1323 }
1333 } else { 1324 } else {
1334 // Sleep current thread and check for next thread to schedule 1325 current_thread->Sleep(nanoseconds);
1335 WaitCurrentThread_Sleep();
1336
1337 // Create an event to wake the thread up after the specified nanosecond delay has passed
1338 GetCurrentThread()->WakeAfterDelay(nanoseconds);
1339 } 1326 }
1340 1327
1341 // Reschedule all CPU cores 1328 // Reschedule all CPU cores
1342 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) 1329 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
1343 Core::System::GetInstance().CpuCore(i).PrepareReschedule(); 1330 system.CpuCore(i).PrepareReschedule();
1331 }
1344} 1332}
1345 1333
1346/// Wait process wide key atomic 1334/// Wait process wide key atomic
@@ -1351,17 +1339,21 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
1351 "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}", 1339 "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
1352 mutex_addr, condition_variable_addr, thread_handle, nano_seconds); 1340 mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
1353 1341
1354 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1342 auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
1343 const auto& handle_table = current_process->GetHandleTable();
1355 SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1344 SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1356 ASSERT(thread); 1345 ASSERT(thread);
1357 1346
1358 CASCADE_CODE(Mutex::Release(mutex_addr)); 1347 const auto release_result = current_process->GetMutex().Release(mutex_addr);
1348 if (release_result.IsError()) {
1349 return release_result;
1350 }
1359 1351
1360 SharedPtr<Thread> current_thread = GetCurrentThread(); 1352 SharedPtr<Thread> current_thread = GetCurrentThread();
1361 current_thread->SetCondVarWaitAddress(condition_variable_addr); 1353 current_thread->SetCondVarWaitAddress(condition_variable_addr);
1362 current_thread->SetMutexWaitAddress(mutex_addr); 1354 current_thread->SetMutexWaitAddress(mutex_addr);
1363 current_thread->SetWaitHandle(thread_handle); 1355 current_thread->SetWaitHandle(thread_handle);
1364 current_thread->SetStatus(ThreadStatus::WaitMutex); 1356 current_thread->SetStatus(ThreadStatus::WaitCondVar);
1365 current_thread->InvalidateWakeupCallback(); 1357 current_thread->InvalidateWakeupCallback();
1366 1358
1367 current_thread->WakeAfterDelay(nano_seconds); 1359 current_thread->WakeAfterDelay(nano_seconds);
@@ -1405,10 +1397,10 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1405 // them all. 1397 // them all.
1406 std::size_t last = waiting_threads.size(); 1398 std::size_t last = waiting_threads.size();
1407 if (target != -1) 1399 if (target != -1)
1408 last = target; 1400 last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
1409 1401
1410 // If there are no threads waiting on this condition variable, just exit 1402 // If there are no threads waiting on this condition variable, just exit
1411 if (last > waiting_threads.size()) 1403 if (last == 0)
1412 return RESULT_SUCCESS; 1404 return RESULT_SUCCESS;
1413 1405
1414 for (std::size_t index = 0; index < last; ++index) { 1406 for (std::size_t index = 0; index < last; ++index) {
@@ -1416,6 +1408,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1416 1408
1417 ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr); 1409 ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
1418 1410
1411 // liberate Cond Var Thread.
1412 thread->SetCondVarWaitAddress(0);
1413
1419 std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex(); 1414 std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
1420 1415
1421 auto& monitor = Core::System::GetInstance().Monitor(); 1416 auto& monitor = Core::System::GetInstance().Monitor();
@@ -1434,10 +1429,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1434 } 1429 }
1435 } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(), 1430 } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
1436 thread->GetWaitHandle())); 1431 thread->GetWaitHandle()));
1437
1438 if (mutex_val == 0) { 1432 if (mutex_val == 0) {
1439 // We were able to acquire the mutex, resume this thread. 1433 // We were able to acquire the mutex, resume this thread.
1440 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 1434 ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
1441 thread->ResumeFromWait(); 1435 thread->ResumeFromWait();
1442 1436
1443 auto* const lock_owner = thread->GetLockOwner(); 1437 auto* const lock_owner = thread->GetLockOwner();
@@ -1447,8 +1441,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1447 1441
1448 thread->SetLockOwner(nullptr); 1442 thread->SetLockOwner(nullptr);
1449 thread->SetMutexWaitAddress(0); 1443 thread->SetMutexWaitAddress(0);
1450 thread->SetCondVarWaitAddress(0);
1451 thread->SetWaitHandle(0); 1444 thread->SetWaitHandle(0);
1445 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
1452 } else { 1446 } else {
1453 // Atomically signal that the mutex now has a waiting thread. 1447 // Atomically signal that the mutex now has a waiting thread.
1454 do { 1448 do {
@@ -1467,12 +1461,11 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1467 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1461 const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
1468 auto owner = handle_table.Get<Thread>(owner_handle); 1462 auto owner = handle_table.Get<Thread>(owner_handle);
1469 ASSERT(owner); 1463 ASSERT(owner);
1470 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 1464 ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
1471 thread->InvalidateWakeupCallback(); 1465 thread->InvalidateWakeupCallback();
1466 thread->SetStatus(ThreadStatus::WaitMutex);
1472 1467
1473 owner->AddMutexWaiter(thread); 1468 owner->AddMutexWaiter(thread);
1474
1475 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
1476 } 1469 }
1477 } 1470 }
1478 1471
@@ -1494,20 +1487,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
1494 return ERR_INVALID_ADDRESS; 1487 return ERR_INVALID_ADDRESS;
1495 } 1488 }
1496 1489
1497 switch (static_cast<AddressArbiter::ArbitrationType>(type)) { 1490 const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
1498 case AddressArbiter::ArbitrationType::WaitIfLessThan: 1491 auto& address_arbiter =
1499 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); 1492 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1500 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: 1493 return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
1501 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
1502 case AddressArbiter::ArbitrationType::WaitIfEqual:
1503 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
1504 default:
1505 LOG_ERROR(Kernel_SVC,
1506 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
1507 "or WaitIfEqual but got {}",
1508 type);
1509 return ERR_INVALID_ENUM_VALUE;
1510 }
1511} 1494}
1512 1495
1513// Signals to an address (via Address Arbiter) 1496// Signals to an address (via Address Arbiter)
@@ -1525,31 +1508,21 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
1525 return ERR_INVALID_ADDRESS; 1508 return ERR_INVALID_ADDRESS;
1526 } 1509 }
1527 1510
1528 switch (static_cast<AddressArbiter::SignalType>(type)) { 1511 const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
1529 case AddressArbiter::SignalType::Signal: 1512 auto& address_arbiter =
1530 return AddressArbiter::SignalToAddress(address, num_to_wake); 1513 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1531 case AddressArbiter::SignalType::IncrementAndSignalIfEqual: 1514 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
1532 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
1533 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
1534 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
1535 num_to_wake);
1536 default:
1537 LOG_ERROR(Kernel_SVC,
1538 "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
1539 "or ModifyByWaitingCountAndSignalIfEqual but got {}",
1540 type);
1541 return ERR_INVALID_ENUM_VALUE;
1542 }
1543} 1515}
1544 1516
1545/// This returns the total CPU ticks elapsed since the CPU was powered-on 1517/// This returns the total CPU ticks elapsed since the CPU was powered-on
1546static u64 GetSystemTick() { 1518static u64 GetSystemTick() {
1547 LOG_TRACE(Kernel_SVC, "called"); 1519 LOG_TRACE(Kernel_SVC, "called");
1548 1520
1549 const u64 result{CoreTiming::GetTicks()}; 1521 auto& core_timing = Core::System::GetInstance().CoreTiming();
1522 const u64 result{core_timing.GetTicks()};
1550 1523
1551 // Advance time to defeat dumb games that busy-wait for the frame to end. 1524 // Advance time to defeat dumb games that busy-wait for the frame to end.
1552 CoreTiming::AddTicks(400); 1525 core_timing.AddTicks(400);
1553 1526
1554 return result; 1527 return result;
1555} 1528}
@@ -1612,14 +1585,121 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
1612 } 1585 }
1613 1586
1614 auto& kernel = Core::System::GetInstance().Kernel(); 1587 auto& kernel = Core::System::GetInstance().Kernel();
1615 auto process = kernel.CurrentProcess(); 1588 auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
1616 auto& handle_table = process->GetHandleTable();
1617 const auto shared_mem_handle = SharedMemory::Create(kernel, process, size, perms, perms, addr);
1618 1589
1619 CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle)); 1590 auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
1591 const auto result = handle_table.Create(std::move(transfer_mem_handle));
1592 if (result.Failed()) {
1593 return result.Code();
1594 }
1595
1596 *handle = *result;
1620 return RESULT_SUCCESS; 1597 return RESULT_SUCCESS;
1621} 1598}
1622 1599
1600static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 permission_raw) {
1601 LOG_DEBUG(Kernel_SVC,
1602 "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
1603 handle, address, size, permission_raw);
1604
1605 if (!Common::Is4KBAligned(address)) {
1606 LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
1607 address);
1608 return ERR_INVALID_ADDRESS;
1609 }
1610
1611 if (size == 0 || !Common::Is4KBAligned(size)) {
1612 LOG_ERROR(Kernel_SVC,
1613 "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
1614 size);
1615 return ERR_INVALID_SIZE;
1616 }
1617
1618 if (!IsValidAddressRange(address, size)) {
1619 LOG_ERROR(Kernel_SVC,
1620 "Given address and size overflows the 64-bit range (address=0x{:016X}, "
1621 "size=0x{:016X}).",
1622 address, size);
1623 return ERR_INVALID_ADDRESS_STATE;
1624 }
1625
1626 const auto permissions = static_cast<MemoryPermission>(permission_raw);
1627 if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read &&
1628 permissions != MemoryPermission::ReadWrite) {
1629 LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).",
1630 permission_raw);
1631 return ERR_INVALID_STATE;
1632 }
1633
1634 const auto& kernel = Core::System::GetInstance().Kernel();
1635 const auto* const current_process = kernel.CurrentProcess();
1636 const auto& handle_table = current_process->GetHandleTable();
1637
1638 auto transfer_memory = handle_table.Get<TransferMemory>(handle);
1639 if (!transfer_memory) {
1640 LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
1641 handle);
1642 return ERR_INVALID_HANDLE;
1643 }
1644
1645 if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
1646 LOG_ERROR(Kernel_SVC,
1647 "Given address and size don't fully fit within the ASLR region "
1648 "(address=0x{:016X}, size=0x{:016X}).",
1649 address, size);
1650 return ERR_INVALID_MEMORY_RANGE;
1651 }
1652
1653 return transfer_memory->MapMemory(address, size, permissions);
1654}
1655
1656static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
1657 LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
1658 address, size);
1659
1660 if (!Common::Is4KBAligned(address)) {
1661 LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
1662 address);
1663 return ERR_INVALID_ADDRESS;
1664 }
1665
1666 if (size == 0 || !Common::Is4KBAligned(size)) {
1667 LOG_ERROR(Kernel_SVC,
1668 "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
1669 size);
1670 return ERR_INVALID_SIZE;
1671 }
1672
1673 if (!IsValidAddressRange(address, size)) {
1674 LOG_ERROR(Kernel_SVC,
1675 "Given address and size overflows the 64-bit range (address=0x{:016X}, "
1676 "size=0x{:016X}).",
1677 address, size);
1678 return ERR_INVALID_ADDRESS_STATE;
1679 }
1680
1681 const auto& kernel = Core::System::GetInstance().Kernel();
1682 const auto* const current_process = kernel.CurrentProcess();
1683 const auto& handle_table = current_process->GetHandleTable();
1684
1685 auto transfer_memory = handle_table.Get<TransferMemory>(handle);
1686 if (!transfer_memory) {
1687 LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
1688 handle);
1689 return ERR_INVALID_HANDLE;
1690 }
1691
1692 if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
1693 LOG_ERROR(Kernel_SVC,
1694 "Given address and size don't fully fit within the ASLR region "
1695 "(address=0x{:016X}, size=0x{:016X}).",
1696 address, size);
1697 return ERR_INVALID_MEMORY_RANGE;
1698 }
1699
1700 return transfer_memory->UnmapMemory(address, size);
1701}
1702
1623static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) { 1703static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
1624 LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); 1704 LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
1625 1705
@@ -1903,6 +1983,83 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
1903 return RESULT_SUCCESS; 1983 return RESULT_SUCCESS;
1904} 1984}
1905 1985
1986static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids,
1987 u32 out_process_ids_size) {
1988 LOG_DEBUG(Kernel_SVC, "called. out_process_ids=0x{:016X}, out_process_ids_size={}",
1989 out_process_ids, out_process_ids_size);
1990
1991 // If the supplied size is negative or greater than INT32_MAX / sizeof(u64), bail.
1992 if ((out_process_ids_size & 0xF0000000) != 0) {
1993 LOG_ERROR(Kernel_SVC,
1994 "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
1995 out_process_ids_size);
1996 return ERR_OUT_OF_RANGE;
1997 }
1998
1999 const auto& kernel = Core::System::GetInstance().Kernel();
2000 const auto& vm_manager = kernel.CurrentProcess()->VMManager();
2001 const auto total_copy_size = out_process_ids_size * sizeof(u64);
2002
2003 if (out_process_ids_size > 0 &&
2004 !vm_manager.IsWithinAddressSpace(out_process_ids, total_copy_size)) {
2005 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2006 out_process_ids, out_process_ids + total_copy_size);
2007 return ERR_INVALID_ADDRESS_STATE;
2008 }
2009
2010 const auto& process_list = kernel.GetProcessList();
2011 const auto num_processes = process_list.size();
2012 const auto copy_amount = std::min(std::size_t{out_process_ids_size}, num_processes);
2013
2014 for (std::size_t i = 0; i < copy_amount; ++i) {
2015 Memory::Write64(out_process_ids, process_list[i]->GetProcessID());
2016 out_process_ids += sizeof(u64);
2017 }
2018
2019 *out_num_processes = static_cast<u32>(num_processes);
2020 return RESULT_SUCCESS;
2021}
2022
2023ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thread_ids_size,
2024 Handle debug_handle) {
2025 // TODO: Handle this case when debug events are supported.
2026 UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
2027
2028 LOG_DEBUG(Kernel_SVC, "called. out_thread_ids=0x{:016X}, out_thread_ids_size={}",
2029 out_thread_ids, out_thread_ids_size);
2030
2031 // If the size is negative or larger than INT32_MAX / sizeof(u64)
2032 if ((out_thread_ids_size & 0xF0000000) != 0) {
2033 LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
2034 out_thread_ids_size);
2035 return ERR_OUT_OF_RANGE;
2036 }
2037
2038 const auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
2039 const auto& vm_manager = current_process->VMManager();
2040 const auto total_copy_size = out_thread_ids_size * sizeof(u64);
2041
2042 if (out_thread_ids_size > 0 &&
2043 !vm_manager.IsWithinAddressSpace(out_thread_ids, total_copy_size)) {
2044 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2045 out_thread_ids, out_thread_ids + total_copy_size);
2046 return ERR_INVALID_ADDRESS_STATE;
2047 }
2048
2049 const auto& thread_list = current_process->GetThreadList();
2050 const auto num_threads = thread_list.size();
2051 const auto copy_amount = std::min(std::size_t{out_thread_ids_size}, num_threads);
2052
2053 auto list_iter = thread_list.cbegin();
2054 for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) {
2055 Memory::Write64(out_thread_ids, (*list_iter)->GetThreadID());
2056 out_thread_ids += sizeof(u64);
2057 }
2058
2059 *out_num_threads = static_cast<u32>(num_threads);
2060 return RESULT_SUCCESS;
2061}
2062
1906namespace { 2063namespace {
1907struct FunctionDef { 2064struct FunctionDef {
1908 using Func = void(); 2065 using Func = void();
@@ -1995,8 +2152,8 @@ static const FunctionDef SVC_Table[] = {
1995 {0x4E, nullptr, "ReadWriteRegister"}, 2152 {0x4E, nullptr, "ReadWriteRegister"},
1996 {0x4F, nullptr, "SetProcessActivity"}, 2153 {0x4F, nullptr, "SetProcessActivity"},
1997 {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, 2154 {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"},
1998 {0x51, nullptr, "MapTransferMemory"}, 2155 {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"},
1999 {0x52, nullptr, "UnmapTransferMemory"}, 2156 {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"},
2000 {0x53, nullptr, "CreateInterruptEvent"}, 2157 {0x53, nullptr, "CreateInterruptEvent"},
2001 {0x54, nullptr, "QueryPhysicalAddress"}, 2158 {0x54, nullptr, "QueryPhysicalAddress"},
2002 {0x55, nullptr, "QueryIoMapping"}, 2159 {0x55, nullptr, "QueryIoMapping"},
@@ -2015,8 +2172,8 @@ static const FunctionDef SVC_Table[] = {
2015 {0x62, nullptr, "TerminateDebugProcess"}, 2172 {0x62, nullptr, "TerminateDebugProcess"},
2016 {0x63, nullptr, "GetDebugEvent"}, 2173 {0x63, nullptr, "GetDebugEvent"},
2017 {0x64, nullptr, "ContinueDebugEvent"}, 2174 {0x64, nullptr, "ContinueDebugEvent"},
2018 {0x65, nullptr, "GetProcessList"}, 2175 {0x65, SvcWrap<GetProcessList>, "GetProcessList"},
2019 {0x66, nullptr, "GetThreadList"}, 2176 {0x66, SvcWrap<GetThreadList>, "GetThreadList"},
2020 {0x67, nullptr, "GetDebugThreadContext"}, 2177 {0x67, nullptr, "GetDebugThreadContext"},
2021 {0x68, nullptr, "SetDebugThreadContext"}, 2178 {0x68, nullptr, "SetDebugThreadContext"},
2022 {0x69, nullptr, "QueryDebugProcessMemory"}, 2179 {0x69, nullptr, "QueryDebugProcessMemory"},
@@ -2058,7 +2215,7 @@ void CallSVC(u32 immediate) {
2058 MICROPROFILE_SCOPE(Kernel_SVC); 2215 MICROPROFILE_SCOPE(Kernel_SVC);
2059 2216
2060 // Lock the global kernel mutex when we enter the kernel HLE. 2217 // Lock the global kernel mutex when we enter the kernel HLE.
2061 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 2218 std::lock_guard lock{HLE::g_hle_lock};
2062 2219
2063 const FunctionDef* info = GetSVCInfo(immediate); 2220 const FunctionDef* info = GetSVCInfo(immediate);
2064 if (info) { 2221 if (info) {
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 2a2c2c5ea..b3733680f 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -78,6 +78,14 @@ void SvcWrap() {
78 FuncReturn(retval); 78 FuncReturn(retval);
79} 79}
80 80
81template <ResultCode func(u32*, u64, u32)>
82void SvcWrap() {
83 u32 param_1 = 0;
84 const u32 retval = func(&param_1, Param(1), static_cast<u32>(Param(2))).raw;
85 Core::CurrentArmInterface().SetReg(1, param_1);
86 FuncReturn(retval);
87}
88
81template <ResultCode func(u64*, u32)> 89template <ResultCode func(u64*, u32)>
82void SvcWrap() { 90void SvcWrap() {
83 u64 param_1 = 0; 91 u64 param_1 = 0;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d3984dfc4..1b891f632 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9 9
10#include <boost/range/algorithm_ext/erase.hpp>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -30,7 +28,7 @@
30 28
31namespace Kernel { 29namespace Kernel {
32 30
33bool Thread::ShouldWait(Thread* thread) const { 31bool Thread::ShouldWait(const Thread* thread) const {
34 return status != ThreadStatus::Dead; 32 return status != ThreadStatus::Dead;
35} 33}
36 34
@@ -43,7 +41,8 @@ Thread::~Thread() = default;
43 41
44void Thread::Stop() { 42void Thread::Stop() {
45 // Cancel any outstanding wakeup events for this thread 43 // Cancel any outstanding wakeup events for this thread
46 CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); 44 Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
45 callback_handle);
47 kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); 46 kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
48 callback_handle = 0; 47 callback_handle = 0;
49 48
@@ -63,21 +62,12 @@ void Thread::Stop() {
63 } 62 }
64 wait_objects.clear(); 63 wait_objects.clear();
65 64
65 owner_process->UnregisterThread(this);
66
66 // Mark the TLS slot in the thread's page as free. 67 // Mark the TLS slot in the thread's page as free.
67 owner_process->FreeTLSSlot(tls_address); 68 owner_process->FreeTLSSlot(tls_address);
68} 69}
69 70
70void WaitCurrentThread_Sleep() {
71 Thread* thread = GetCurrentThread();
72 thread->SetStatus(ThreadStatus::WaitSleep);
73}
74
75void ExitCurrentThread() {
76 Thread* thread = GetCurrentThread();
77 thread->Stop();
78 Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
79}
80
81void Thread::WakeAfterDelay(s64 nanoseconds) { 71void Thread::WakeAfterDelay(s64 nanoseconds) {
82 // Don't schedule a wakeup if the thread wants to wait forever 72 // Don't schedule a wakeup if the thread wants to wait forever
83 if (nanoseconds == -1) 73 if (nanoseconds == -1)
@@ -85,12 +75,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
85 75
86 // This function might be called from any thread so we have to be cautious and use the 76 // This function might be called from any thread so we have to be cautious and use the
87 // thread-safe version of ScheduleEvent. 77 // thread-safe version of ScheduleEvent.
88 CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), 78 Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
89 kernel.ThreadWakeupCallbackEventType(), callback_handle); 79 Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
80 callback_handle);
90} 81}
91 82
92void Thread::CancelWakeupTimer() { 83void Thread::CancelWakeupTimer() {
93 CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle); 84 Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
85 kernel.ThreadWakeupCallbackEventType(), callback_handle);
94} 86}
95 87
96static std::optional<s32> GetNextProcessorId(u64 mask) { 88static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -115,6 +107,7 @@ void Thread::ResumeFromWait() {
115 case ThreadStatus::WaitSleep: 107 case ThreadStatus::WaitSleep:
116 case ThreadStatus::WaitIPC: 108 case ThreadStatus::WaitIPC:
117 case ThreadStatus::WaitMutex: 109 case ThreadStatus::WaitMutex:
110 case ThreadStatus::WaitCondVar:
118 case ThreadStatus::WaitArb: 111 case ThreadStatus::WaitArb:
119 break; 112 break;
120 113
@@ -181,14 +174,13 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
181 return ERR_INVALID_PROCESSOR_ID; 174 return ERR_INVALID_PROCESSOR_ID;
182 } 175 }
183 176
184 // TODO(yuriks): Other checks, returning 0xD9001BEA
185
186 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { 177 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
187 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 178 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
188 // TODO (bunnei): Find the correct error code to use here 179 // TODO (bunnei): Find the correct error code to use here
189 return ResultCode(-1); 180 return ResultCode(-1);
190 } 181 }
191 182
183 auto& system = Core::System::GetInstance();
192 SharedPtr<Thread> thread(new Thread(kernel)); 184 SharedPtr<Thread> thread(new Thread(kernel));
193 185
194 thread->thread_id = kernel.CreateNewThreadID(); 186 thread->thread_id = kernel.CreateNewThreadID();
@@ -197,7 +189,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
197 thread->stack_top = stack_top; 189 thread->stack_top = stack_top;
198 thread->tpidr_el0 = 0; 190 thread->tpidr_el0 = 0;
199 thread->nominal_priority = thread->current_priority = priority; 191 thread->nominal_priority = thread->current_priority = priority;
200 thread->last_running_ticks = CoreTiming::GetTicks(); 192 thread->last_running_ticks = system.CoreTiming().GetTicks();
201 thread->processor_id = processor_id; 193 thread->processor_id = processor_id;
202 thread->ideal_core = processor_id; 194 thread->ideal_core = processor_id;
203 thread->affinity_mask = 1ULL << processor_id; 195 thread->affinity_mask = 1ULL << processor_id;
@@ -208,10 +200,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
208 thread->name = std::move(name); 200 thread->name = std::move(name);
209 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); 201 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
210 thread->owner_process = &owner_process; 202 thread->owner_process = &owner_process;
211 thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id); 203 thread->scheduler = &system.Scheduler(processor_id);
212 thread->scheduler->AddThread(thread, priority); 204 thread->scheduler->AddThread(thread);
213 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread); 205 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
214 206
207 thread->owner_process->RegisterThread(thread.get());
208
215 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 209 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
216 // to initialize the context 210 // to initialize the context
217 ResetThreadContext(thread->context, stack_top, entry_point, arg); 211 ResetThreadContext(thread->context, stack_top, entry_point, arg);
@@ -239,16 +233,16 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
239 context.cpu_registers[1] = output; 233 context.cpu_registers[1] = output;
240} 234}
241 235
242s32 Thread::GetWaitObjectIndex(WaitObject* object) const { 236s32 Thread::GetWaitObjectIndex(const WaitObject* object) const {
243 ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); 237 ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
244 auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); 238 const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
245 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); 239 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
246} 240}
247 241
248VAddr Thread::GetCommandBufferAddress() const { 242VAddr Thread::GetCommandBufferAddress() const {
249 // Offset from the start of TLS at which the IPC command buffer begins. 243 // Offset from the start of TLS at which the IPC command buffer begins.
250 static constexpr int CommandHeaderOffset = 0x80; 244 constexpr u64 command_header_offset = 0x80;
251 return GetTLSAddress() + CommandHeaderOffset; 245 return GetTLSAddress() + command_header_offset;
252} 246}
253 247
254void Thread::SetStatus(ThreadStatus new_status) { 248void Thread::SetStatus(ThreadStatus new_status) {
@@ -257,7 +251,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
257 } 251 }
258 252
259 if (status == ThreadStatus::Running) { 253 if (status == ThreadStatus::Running) {
260 last_running_ticks = CoreTiming::GetTicks(); 254 last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
261 } 255 }
262 256
263 status = new_status; 257 status = new_status;
@@ -267,8 +261,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
267 if (thread->lock_owner == this) { 261 if (thread->lock_owner == this) {
268 // If the thread is already waiting for this thread to release the mutex, ensure that the 262 // If the thread is already waiting for this thread to release the mutex, ensure that the
269 // waiters list is consistent and return without doing anything. 263 // waiters list is consistent and return without doing anything.
270 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 264 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
271 ASSERT(itr != wait_mutex_threads.end()); 265 ASSERT(iter != wait_mutex_threads.end());
272 return; 266 return;
273 } 267 }
274 268
@@ -276,11 +270,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
276 ASSERT(thread->lock_owner == nullptr); 270 ASSERT(thread->lock_owner == nullptr);
277 271
278 // Ensure that the thread is not already in the list of mutex waiters 272 // Ensure that the thread is not already in the list of mutex waiters
279 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 273 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
280 ASSERT(itr == wait_mutex_threads.end()); 274 ASSERT(iter == wait_mutex_threads.end());
281 275
276 // Keep the list in an ordered fashion
277 const auto insertion_point = std::find_if(
278 wait_mutex_threads.begin(), wait_mutex_threads.end(),
279 [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
280 wait_mutex_threads.insert(insertion_point, thread);
282 thread->lock_owner = this; 281 thread->lock_owner = this;
283 wait_mutex_threads.emplace_back(std::move(thread)); 282
284 UpdatePriority(); 283 UpdatePriority();
285} 284}
286 285
@@ -288,32 +287,44 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
288 ASSERT(thread->lock_owner == this); 287 ASSERT(thread->lock_owner == this);
289 288
290 // Ensure that the thread is in the list of mutex waiters 289 // Ensure that the thread is in the list of mutex waiters
291 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 290 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
292 ASSERT(itr != wait_mutex_threads.end()); 291 ASSERT(iter != wait_mutex_threads.end());
292
293 wait_mutex_threads.erase(iter);
293 294
294 boost::remove_erase(wait_mutex_threads, thread);
295 thread->lock_owner = nullptr; 295 thread->lock_owner = nullptr;
296 UpdatePriority(); 296 UpdatePriority();
297} 297}
298 298
299void Thread::UpdatePriority() { 299void Thread::UpdatePriority() {
300 // Find the highest priority among all the threads that are waiting for this thread's lock 300 // If any of the threads waiting on the mutex have a higher priority
301 // (taking into account priority inheritance), then this thread inherits
302 // that thread's priority.
301 u32 new_priority = nominal_priority; 303 u32 new_priority = nominal_priority;
302 for (const auto& thread : wait_mutex_threads) { 304 if (!wait_mutex_threads.empty()) {
303 if (thread->nominal_priority < new_priority) 305 if (wait_mutex_threads.front()->current_priority < new_priority) {
304 new_priority = thread->nominal_priority; 306 new_priority = wait_mutex_threads.front()->current_priority;
307 }
305 } 308 }
306 309
307 if (new_priority == current_priority) 310 if (new_priority == current_priority) {
308 return; 311 return;
312 }
309 313
310 scheduler->SetThreadPriority(this, new_priority); 314 scheduler->SetThreadPriority(this, new_priority);
311
312 current_priority = new_priority; 315 current_priority = new_priority;
313 316
317 if (!lock_owner) {
318 return;
319 }
320
321 // Ensure that the thread is within the correct location in the waiting list.
322 auto old_owner = lock_owner;
323 lock_owner->RemoveMutexWaiter(this);
324 old_owner->AddMutexWaiter(this);
325
314 // Recursively update the priority of the thread that depends on the priority of this one. 326 // Recursively update the priority of the thread that depends on the priority of this one.
315 if (lock_owner) 327 lock_owner->UpdatePriority();
316 lock_owner->UpdatePriority();
317} 328}
318 329
319void Thread::ChangeCore(u32 core, u64 mask) { 330void Thread::ChangeCore(u32 core, u64 mask) {
@@ -345,7 +356,7 @@ void Thread::ChangeScheduler() {
345 if (*new_processor_id != processor_id) { 356 if (*new_processor_id != processor_id) {
346 // Remove thread from previous core's scheduler 357 // Remove thread from previous core's scheduler
347 scheduler->RemoveThread(this); 358 scheduler->RemoveThread(this);
348 next_scheduler.AddThread(this, current_priority); 359 next_scheduler.AddThread(this);
349 } 360 }
350 361
351 processor_id = *new_processor_id; 362 processor_id = *new_processor_id;
@@ -360,7 +371,7 @@ void Thread::ChangeScheduler() {
360 system.CpuCore(processor_id).PrepareReschedule(); 371 system.CpuCore(processor_id).PrepareReschedule();
361} 372}
362 373
363bool Thread::AllWaitObjectsReady() { 374bool Thread::AllWaitObjectsReady() const {
364 return std::none_of( 375 return std::none_of(
365 wait_objects.begin(), wait_objects.end(), 376 wait_objects.begin(), wait_objects.end(),
366 [this](const SharedPtr<WaitObject>& object) { return object->ShouldWait(this); }); 377 [this](const SharedPtr<WaitObject>& object) { return object->ShouldWait(this); });
@@ -389,6 +400,14 @@ void Thread::SetActivity(ThreadActivity value) {
389 } 400 }
390} 401}
391 402
403void Thread::Sleep(s64 nanoseconds) {
404 // Sleep current thread and check for next thread to schedule
405 SetStatus(ThreadStatus::WaitSleep);
406
407 // Create an event to wake the thread up after the specified nanosecond delay has passed
408 WakeAfterDelay(nanoseconds);
409}
410
392//////////////////////////////////////////////////////////////////////////////////////////////////// 411////////////////////////////////////////////////////////////////////////////////////////////////////
393 412
394/** 413/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..73e5d1bb4 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -51,7 +51,8 @@ enum class ThreadStatus {
51 WaitIPC, ///< Waiting for the reply from an IPC request 51 WaitIPC, ///< Waiting for the reply from an IPC request
52 WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false 52 WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
53 WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true 53 WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true
54 WaitMutex, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc 54 WaitMutex, ///< Waiting due to an ArbitrateLock svc
55 WaitCondVar, ///< Waiting due to an WaitProcessWideKey svc
55 WaitArb, ///< Waiting due to a SignalToAddress/WaitForAddress svc 56 WaitArb, ///< Waiting due to a SignalToAddress/WaitForAddress svc
56 Dormant, ///< Created but not yet made ready 57 Dormant, ///< Created but not yet made ready
57 Dead ///< Run to completion, or forcefully terminated 58 Dead ///< Run to completion, or forcefully terminated
@@ -110,7 +111,7 @@ public:
110 return HANDLE_TYPE; 111 return HANDLE_TYPE;
111 } 112 }
112 113
113 bool ShouldWait(Thread* thread) const override; 114 bool ShouldWait(const Thread* thread) const override;
114 void Acquire(Thread* thread) override; 115 void Acquire(Thread* thread) override;
115 116
116 /** 117 /**
@@ -204,7 +205,7 @@ public:
204 * object in the list. 205 * object in the list.
205 * @param object Object to query the index of. 206 * @param object Object to query the index of.
206 */ 207 */
207 s32 GetWaitObjectIndex(WaitObject* object) const; 208 s32 GetWaitObjectIndex(const WaitObject* object) const;
208 209
209 /** 210 /**
210 * Stops a thread, invalidating it from further use 211 * Stops a thread, invalidating it from further use
@@ -298,7 +299,7 @@ public:
298 } 299 }
299 300
300 /// Determines whether all the objects this thread is waiting on are ready. 301 /// Determines whether all the objects this thread is waiting on are ready.
301 bool AllWaitObjectsReady(); 302 bool AllWaitObjectsReady() const;
302 303
303 const MutexWaitingThreads& GetMutexWaitingThreads() const { 304 const MutexWaitingThreads& GetMutexWaitingThreads() const {
304 return wait_mutex_threads; 305 return wait_mutex_threads;
@@ -383,6 +384,9 @@ public:
383 384
384 void SetActivity(ThreadActivity value); 385 void SetActivity(ThreadActivity value);
385 386
387 /// Sleeps this thread for the given amount of nanoseconds.
388 void Sleep(s64 nanoseconds);
389
386private: 390private:
387 explicit Thread(KernelCore& kernel); 391 explicit Thread(KernelCore& kernel);
388 ~Thread() override; 392 ~Thread() override;
@@ -398,8 +402,14 @@ private:
398 VAddr entry_point = 0; 402 VAddr entry_point = 0;
399 VAddr stack_top = 0; 403 VAddr stack_top = 0;
400 404
401 u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application 405 /// Nominal thread priority, as set by the emulated application.
402 u32 current_priority = 0; ///< Current thread priority, can be temporarily changed 406 /// The nominal priority is the thread priority without priority
407 /// inheritance taken into account.
408 u32 nominal_priority = 0;
409
410 /// Current thread priority. This may change over the course of the
411 /// thread's lifetime in order to facilitate priority inheritance.
412 u32 current_priority = 0;
403 413
404 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. 414 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
405 u64 last_running_ticks = 0; ///< CPU tick when thread was last running 415 u64 last_running_ticks = 0; ///< CPU tick when thread was last running
@@ -460,14 +470,4 @@ private:
460 */ 470 */
461Thread* GetCurrentThread(); 471Thread* GetCurrentThread();
462 472
463/**
464 * Waits the current thread on a sleep
465 */
466void WaitCurrentThread_Sleep();
467
468/**
469 * Stops the current thread and removes it from the thread_list
470 */
471void ExitCurrentThread();
472
473} // namespace Kernel 473} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
new file mode 100644
index 000000000..23228e1b5
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -0,0 +1,73 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/kernel/errors.h"
6#include "core/hle/kernel/kernel.h"
7#include "core/hle/kernel/process.h"
8#include "core/hle/kernel/shared_memory.h"
9#include "core/hle/kernel/transfer_memory.h"
10#include "core/hle/result.h"
11
12namespace Kernel {
13
14TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
15TransferMemory::~TransferMemory() = default;
16
17SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address,
18 size_t size, MemoryPermission permissions) {
19 SharedPtr<TransferMemory> transfer_memory{new TransferMemory(kernel)};
20
21 transfer_memory->base_address = base_address;
22 transfer_memory->memory_size = size;
23 transfer_memory->owner_permissions = permissions;
24 transfer_memory->owner_process = kernel.CurrentProcess();
25
26 return transfer_memory;
27}
28
29ResultCode TransferMemory::MapMemory(VAddr address, size_t size, MemoryPermission permissions) {
30 if (memory_size != size) {
31 return ERR_INVALID_SIZE;
32 }
33
34 if (owner_permissions != permissions) {
35 return ERR_INVALID_STATE;
36 }
37
38 if (is_mapped) {
39 return ERR_INVALID_STATE;
40 }
41
42 const auto map_state = owner_permissions == MemoryPermission::None
43 ? MemoryState::TransferMemoryIsolated
44 : MemoryState::TransferMemory;
45 auto& vm_manager = owner_process->VMManager();
46 const auto map_result = vm_manager.MapMemoryBlock(
47 address, std::make_shared<std::vector<u8>>(size), 0, size, map_state);
48
49 if (map_result.Failed()) {
50 return map_result.Code();
51 }
52
53 is_mapped = true;
54 return RESULT_SUCCESS;
55}
56
57ResultCode TransferMemory::UnmapMemory(VAddr address, size_t size) {
58 if (memory_size != size) {
59 return ERR_INVALID_SIZE;
60 }
61
62 auto& vm_manager = owner_process->VMManager();
63 const auto result = vm_manager.UnmapRange(address, size);
64
65 if (result.IsError()) {
66 return result;
67 }
68
69 is_mapped = false;
70 return RESULT_SUCCESS;
71}
72
73} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
new file mode 100644
index 000000000..ec294951e
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -0,0 +1,91 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/kernel/object.h"
8
9union ResultCode;
10
11namespace Kernel {
12
13class KernelCore;
14class Process;
15
16enum class MemoryPermission : u32;
17
18/// Defines the interface for transfer memory objects.
19///
20/// Transfer memory is typically used for the purpose of
21/// transferring memory between separate process instances,
22/// thus the name.
23///
24class TransferMemory final : public Object {
25public:
26 static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
27
28 static SharedPtr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, size_t size,
29 MemoryPermission permissions);
30
31 TransferMemory(const TransferMemory&) = delete;
32 TransferMemory& operator=(const TransferMemory&) = delete;
33
34 TransferMemory(TransferMemory&&) = delete;
35 TransferMemory& operator=(TransferMemory&&) = delete;
36
37 std::string GetTypeName() const override {
38 return "TransferMemory";
39 }
40
41 std::string GetName() const override {
42 return GetTypeName();
43 }
44
45 HandleType GetHandleType() const override {
46 return HANDLE_TYPE;
47 }
48
49 /// Attempts to map transfer memory with the given range and memory permissions.
50 ///
51 /// @param address The base address to being mapping memory at.
52 /// @param size The size of the memory to map, in bytes.
53 /// @param permissions The memory permissions to check against when mapping memory.
54 ///
55 /// @pre The given address, size, and memory permissions must all match
56 /// the same values that were given when creating the transfer memory
57 /// instance.
58 ///
59 ResultCode MapMemory(VAddr address, size_t size, MemoryPermission permissions);
60
61 /// Unmaps the transfer memory with the given range
62 ///
63 /// @param address The base address to begin unmapping memory at.
64 /// @param size The size of the memory to unmap, in bytes.
65 ///
66 /// @pre The given address and size must be the same as the ones used
67 /// to create the transfer memory instance.
68 ///
69 ResultCode UnmapMemory(VAddr address, size_t size);
70
71private:
72 explicit TransferMemory(KernelCore& kernel);
73 ~TransferMemory() override;
74
75 /// The base address for the memory managed by this instance.
76 VAddr base_address = 0;
77
78 /// Size of the memory, in bytes, that this instance manages.
79 size_t memory_size = 0;
80
81 /// The memory permissions that are applied to this instance.
82 MemoryPermission owner_permissions{};
83
84 /// The process that this transfer memory instance was created under.
85 Process* owner_process = nullptr;
86
87 /// Whether or not this transfer memory instance has mapped memory.
88 bool is_mapped = false;
89};
90
91} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..ec0a480ce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,34 +7,42 @@
7#include <utility> 7#include <utility>
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/memory_hook.h"
10#include "core/arm/arm_interface.h" 11#include "core/arm/arm_interface.h"
11#include "core/core.h" 12#include "core/core.h"
12#include "core/file_sys/program_metadata.h" 13#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/vm_manager.h" 15#include "core/hle/kernel/vm_manager.h"
15#include "core/memory.h" 16#include "core/memory.h"
16#include "core/memory_hook.h"
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
20 20namespace {
21static const char* GetMemoryStateName(MemoryState state) { 21const char* GetMemoryStateName(MemoryState state) {
22 static constexpr const char* names[] = { 22 static constexpr const char* names[] = {
23 "Unmapped", "Io", 23 "Unmapped", "Io",
24 "Normal", "CodeStatic", 24 "Normal", "Code",
25 "CodeMutable", "Heap", 25 "CodeData", "Heap",
26 "Shared", "Unknown1", 26 "Shared", "Unknown1",
27 "ModuleCodeStatic", "ModuleCodeMutable", 27 "ModuleCode", "ModuleCodeData",
28 "IpcBuffer0", "Stack", 28 "IpcBuffer0", "Stack",
29 "ThreadLocal", "TransferMemoryIsolated", 29 "ThreadLocal", "TransferMemoryIsolated",
30 "TransferMemory", "ProcessMemory", 30 "TransferMemory", "ProcessMemory",
31 "Inaccessible", "IpcBuffer1", 31 "Inaccessible", "IpcBuffer1",
32 "IpcBuffer3", "KernelStack", 32 "IpcBuffer3", "KernelStack",
33 }; 33 };
34 34
35 return names[ToSvcMemoryState(state)]; 35 return names[ToSvcMemoryState(state)];
36} 36}
37 37
38// Checks if a given address range lies within a larger address range.
39constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
40 VAddr address_range_end) {
41 const VAddr end_address = address + size - 1;
42 return address_range_begin <= address && end_address <= address_range_end - 1;
43}
44} // Anonymous namespace
45
38bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 46bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
39 ASSERT(base + size == next.base); 47 ASSERT(base + size == next.base);
40 if (permissions != next.permissions || state != next.state || attribute != next.attribute || 48 if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -169,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
169 177
170ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size, 178ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
171 MemoryState state, 179 MemoryState state,
172 Memory::MemoryHookPointer mmio_handler) { 180 Common::MemoryHookPointer mmio_handler) {
173 // This is the appropriately sized VMA that will turn into our allocation. 181 // This is the appropriately sized VMA that will turn into our allocation.
174 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); 182 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
175 VirtualMemoryArea& final_vma = vma_handle->second; 183 VirtualMemoryArea& final_vma = vma_handle->second;
@@ -248,59 +256,50 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
248 return RESULT_SUCCESS; 256 return RESULT_SUCCESS;
249} 257}
250 258
251ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { 259ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
252 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 260 if (size > GetHeapRegionSize()) {
253 target + size < target) { 261 return ERR_OUT_OF_MEMORY;
254 return ERR_INVALID_ADDRESS; 262 }
263
264 // No need to do any additional work if the heap is already the given size.
265 if (size == GetCurrentHeapSize()) {
266 return MakeResult(heap_region_base);
255 } 267 }
256 268
257 if (heap_memory == nullptr) { 269 if (heap_memory == nullptr) {
258 // Initialize heap 270 // Initialize heap
259 heap_memory = std::make_shared<std::vector<u8>>(); 271 heap_memory = std::make_shared<std::vector<u8>>(size);
260 heap_start = heap_end = target; 272 heap_end = heap_region_base + size;
261 } else { 273 } else {
262 UnmapRange(heap_start, heap_end - heap_start); 274 UnmapRange(heap_region_base, GetCurrentHeapSize());
263 }
264
265 // If necessary, expand backing vector to cover new heap extents.
266 if (target < heap_start) {
267 heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
268 heap_start = target;
269 RefreshMemoryBlockMappings(heap_memory.get());
270 }
271 if (target + size > heap_end) {
272 heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
273 heap_end = target + size;
274 RefreshMemoryBlockMappings(heap_memory.get());
275 } 275 }
276 ASSERT(heap_end - heap_start == heap_memory->size());
277 276
278 CASCADE_RESULT(auto vma, MapMemoryBlock(target, heap_memory, target - heap_start, size, 277 // If necessary, expand backing vector to cover new heap extents in
279 MemoryState::Heap)); 278 // the case of allocating. Otherwise, shrink the backing memory,
280 Reprotect(vma, perms); 279 // if a smaller heap has been requested.
280 const u64 old_heap_size = GetCurrentHeapSize();
281 if (size > old_heap_size) {
282 const u64 alloc_size = size - old_heap_size;
281 283
282 heap_used = size; 284 heap_memory->insert(heap_memory->end(), alloc_size, 0);
283 285 RefreshMemoryBlockMappings(heap_memory.get());
284 return MakeResult<VAddr>(heap_end - size); 286 } else if (size < old_heap_size) {
285} 287 heap_memory->resize(size);
288 heap_memory->shrink_to_fit();
286 289
287ResultCode VMManager::HeapFree(VAddr target, u64 size) { 290 RefreshMemoryBlockMappings(heap_memory.get());
288 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
289 target + size < target) {
290 return ERR_INVALID_ADDRESS;
291 } 291 }
292 292
293 if (size == 0) { 293 heap_end = heap_region_base + size;
294 return RESULT_SUCCESS; 294 ASSERT(GetCurrentHeapSize() == heap_memory->size());
295 }
296 295
297 const ResultCode result = UnmapRange(target, size); 296 const auto mapping_result =
298 if (result.IsError()) { 297 MapMemoryBlock(heap_region_base, heap_memory, 0, size, MemoryState::Heap);
299 return result; 298 if (mapping_result.Failed()) {
299 return mapping_result.Code();
300 } 300 }
301 301
302 heap_used -= size; 302 return MakeResult<VAddr>(heap_region_base);
303 return RESULT_SUCCESS;
304} 303}
305 304
306MemoryInfo VMManager::QueryMemory(VAddr address) const { 305MemoryInfo VMManager::QueryMemory(VAddr address) const {
@@ -592,6 +591,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
592 591
593 heap_region_base = map_region_end; 592 heap_region_base = map_region_end;
594 heap_region_end = heap_region_base + heap_region_size; 593 heap_region_end = heap_region_base + heap_region_size;
594 heap_end = heap_region_base;
595 595
596 new_map_region_base = heap_region_end; 596 new_map_region_base = heap_region_end;
597 new_map_region_end = new_map_region_base + new_map_region_size; 597 new_map_region_end = new_map_region_base + new_map_region_size;
@@ -618,7 +618,7 @@ void VMManager::ClearPageTable() {
618 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 618 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
619 page_table.special_regions.clear(); 619 page_table.special_regions.clear();
620 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 620 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
621 Memory::PageType::Unmapped); 621 Common::PageType::Unmapped);
622} 622}
623 623
624VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask, 624VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -686,10 +686,6 @@ u64 VMManager::GetTotalMemoryUsage() const {
686 return 0xF8000000; 686 return 0xF8000000;
687} 687}
688 688
689u64 VMManager::GetTotalHeapUsage() const {
690 return heap_used;
691}
692
693VAddr VMManager::GetAddressSpaceBaseAddress() const { 689VAddr VMManager::GetAddressSpaceBaseAddress() const {
694 return address_space_base; 690 return address_space_base;
695} 691}
@@ -706,6 +702,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
706 return address_space_width; 702 return address_space_width;
707} 703}
708 704
705bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
706 return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
707 GetAddressSpaceEndAddress());
708}
709
709VAddr VMManager::GetASLRRegionBaseAddress() const { 710VAddr VMManager::GetASLRRegionBaseAddress() const {
710 return aslr_region_base; 711 return aslr_region_base;
711} 712}
@@ -750,6 +751,11 @@ u64 VMManager::GetCodeRegionSize() const {
750 return code_region_end - code_region_base; 751 return code_region_end - code_region_base;
751} 752}
752 753
754bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
755 return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
756 GetCodeRegionEndAddress());
757}
758
753VAddr VMManager::GetHeapRegionBaseAddress() const { 759VAddr VMManager::GetHeapRegionBaseAddress() const {
754 return heap_region_base; 760 return heap_region_base;
755} 761}
@@ -762,6 +768,15 @@ u64 VMManager::GetHeapRegionSize() const {
762 return heap_region_end - heap_region_base; 768 return heap_region_end - heap_region_base;
763} 769}
764 770
771u64 VMManager::GetCurrentHeapSize() const {
772 return heap_end - heap_region_base;
773}
774
775bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
776 return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
777 GetHeapRegionEndAddress());
778}
779
765VAddr VMManager::GetMapRegionBaseAddress() const { 780VAddr VMManager::GetMapRegionBaseAddress() const {
766 return map_region_base; 781 return map_region_base;
767} 782}
@@ -774,6 +789,10 @@ u64 VMManager::GetMapRegionSize() const {
774 return map_region_end - map_region_base; 789 return map_region_end - map_region_base;
775} 790}
776 791
792bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
793 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
794}
795
777VAddr VMManager::GetNewMapRegionBaseAddress() const { 796VAddr VMManager::GetNewMapRegionBaseAddress() const {
778 return new_map_region_base; 797 return new_map_region_base;
779} 798}
@@ -786,6 +805,11 @@ u64 VMManager::GetNewMapRegionSize() const {
786 return new_map_region_end - new_map_region_base; 805 return new_map_region_end - new_map_region_base;
787} 806}
788 807
808bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
809 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
810 GetNewMapRegionEndAddress());
811}
812
789VAddr VMManager::GetTLSIORegionBaseAddress() const { 813VAddr VMManager::GetTLSIORegionBaseAddress() const {
790 return tls_io_region_base; 814 return tls_io_region_base;
791} 815}
@@ -798,4 +822,9 @@ u64 VMManager::GetTLSIORegionSize() const {
798 return tls_io_region_end - tls_io_region_base; 822 return tls_io_region_end - tls_io_region_base;
799} 823}
800 824
825bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
826 return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
827 GetTLSIORegionEndAddress());
828}
829
801} // namespace Kernel 830} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..6f484b7bf 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
9#include <tuple> 9#include <tuple>
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
13#include "common/page_table.h"
12#include "core/hle/result.h" 14#include "core/hle/result.h"
13#include "core/memory.h" 15#include "core/memory.h"
14#include "core/memory_hook.h"
15 16
16namespace FileSys { 17namespace FileSys {
17enum class ProgramAddressSpaceType : u8; 18enum class ProgramAddressSpaceType : u8;
@@ -164,12 +165,12 @@ enum class MemoryState : u32 {
164 Unmapped = 0x00, 165 Unmapped = 0x00,
165 Io = 0x01 | FlagMapped, 166 Io = 0x01 | FlagMapped,
166 Normal = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed, 167 Normal = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed,
167 CodeStatic = 0x03 | CodeFlags | FlagMapProcess, 168 Code = 0x03 | CodeFlags | FlagMapProcess,
168 CodeMutable = 0x04 | CodeFlags | FlagMapProcess | FlagCodeMemory, 169 CodeData = 0x04 | DataFlags | FlagMapProcess | FlagCodeMemory,
169 Heap = 0x05 | DataFlags | FlagCodeMemory, 170 Heap = 0x05 | DataFlags | FlagCodeMemory,
170 Shared = 0x06 | FlagMapped | FlagMemoryPoolAllocated, 171 Shared = 0x06 | FlagMapped | FlagMemoryPoolAllocated,
171 ModuleCodeStatic = 0x08 | CodeFlags | FlagModule | FlagMapProcess, 172 ModuleCode = 0x08 | CodeFlags | FlagModule | FlagMapProcess,
172 ModuleCodeMutable = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory, 173 ModuleCodeData = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory,
173 174
174 IpcBuffer0 = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated | 175 IpcBuffer0 = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated |
175 IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned, 176 IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned,
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 // Settings for type = MMIO 291 // Settings for type = MMIO
291 /// Physical address of the register area this VMA maps to. 292 /// Physical address of the register area this VMA maps to.
292 PAddr paddr = 0; 293 PAddr paddr = 0;
293 Memory::MemoryHookPointer mmio_handler = nullptr; 294 Common::MemoryHookPointer mmio_handler = nullptr;
294 295
295 /// Tests if this area can be merged to the right with `next`. 296 /// Tests if this area can be merged to the right with `next`.
296 bool CanBeMergedWith(const VirtualMemoryArea& next) const; 297 bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
368 * @param mmio_handler The handler that will implement read and write for this MMIO region. 369 * @param mmio_handler The handler that will implement read and write for this MMIO region.
369 */ 370 */
370 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state, 371 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
371 Memory::MemoryHookPointer mmio_handler); 372 Common::MemoryHookPointer mmio_handler);
372 373
373 /// Unmaps a range of addresses, splitting VMAs as necessary. 374 /// Unmaps a range of addresses, splitting VMAs as necessary.
374 ResultCode UnmapRange(VAddr target, u64 size); 375 ResultCode UnmapRange(VAddr target, u64 size);
@@ -379,11 +380,41 @@ public:
379 /// Changes the permissions of a range of addresses, splitting VMAs as necessary. 380 /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
380 ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms); 381 ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms);
381 382
382 ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
383 ResultCode HeapFree(VAddr target, u64 size);
384
385 ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state); 383 ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state);
386 384
385 /// Attempts to allocate a heap with the given size.
386 ///
387 /// @param size The size of the heap to allocate in bytes.
388 ///
389 /// @note If a heap is currently allocated, and this is called
390 /// with a size that is equal to the size of the current heap,
391 /// then this function will do nothing and return the current
392 /// heap's starting address, as there's no need to perform
393 /// any additional heap allocation work.
394 ///
395 /// @note If a heap is currently allocated, and this is called
396 /// with a size less than the current heap's size, then
397 /// this function will attempt to shrink the heap.
398 ///
399 /// @note If a heap is currently allocated, and this is called
400 /// with a size larger than the current heap's size, then
401 /// this function will attempt to extend the size of the heap.
402 ///
403 /// @returns A result indicating either success or failure.
404 /// <p>
405 /// If successful, this function will return a result
406 /// containing the starting address to the allocated heap.
407 /// <p>
408 /// If unsuccessful, this function will return a result
409 /// containing an error code.
410 ///
411 /// @pre The given size must lie within the allowable heap
412 /// memory region managed by this VMManager instance.
413 /// Failure to abide by this will result in ERR_OUT_OF_MEMORY
414 /// being returned as the result.
415 ///
416 ResultVal<VAddr> SetHeapSize(u64 size);
417
387 /// Queries the memory manager for information about the given address. 418 /// Queries the memory manager for information about the given address.
388 /// 419 ///
389 /// @param address The address to query the memory manager about for information. 420 /// @param address The address to query the memory manager about for information.
@@ -417,9 +448,6 @@ public:
417 /// Gets the total memory usage, used by svcGetInfo 448 /// Gets the total memory usage, used by svcGetInfo
418 u64 GetTotalMemoryUsage() const; 449 u64 GetTotalMemoryUsage() const;
419 450
420 /// Gets the total heap usage, used by svcGetInfo
421 u64 GetTotalHeapUsage() const;
422
423 /// Gets the address space base address 451 /// Gets the address space base address
424 VAddr GetAddressSpaceBaseAddress() const; 452 VAddr GetAddressSpaceBaseAddress() const;
425 453
@@ -432,18 +460,21 @@ public:
432 /// Gets the address space width in bits. 460 /// Gets the address space width in bits.
433 u64 GetAddressSpaceWidth() const; 461 u64 GetAddressSpaceWidth() const;
434 462
463 /// Determines whether or not the given address range lies within the address space.
464 bool IsWithinAddressSpace(VAddr address, u64 size) const;
465
435 /// Gets the base address of the ASLR region. 466 /// Gets the base address of the ASLR region.
436 VAddr GetASLRRegionBaseAddress() const; 467 VAddr GetASLRRegionBaseAddress() const;
437 468
438 /// Gets the end address of the ASLR region. 469 /// Gets the end address of the ASLR region.
439 VAddr GetASLRRegionEndAddress() const; 470 VAddr GetASLRRegionEndAddress() const;
440 471
441 /// Determines whether or not the specified address range is within the ASLR region.
442 bool IsWithinASLRRegion(VAddr address, u64 size) const;
443
444 /// Gets the size of the ASLR region 472 /// Gets the size of the ASLR region
445 u64 GetASLRRegionSize() const; 473 u64 GetASLRRegionSize() const;
446 474
475 /// Determines whether or not the specified address range is within the ASLR region.
476 bool IsWithinASLRRegion(VAddr address, u64 size) const;
477
447 /// Gets the base address of the code region. 478 /// Gets the base address of the code region.
448 VAddr GetCodeRegionBaseAddress() const; 479 VAddr GetCodeRegionBaseAddress() const;
449 480
@@ -453,6 +484,9 @@ public:
453 /// Gets the total size of the code region in bytes. 484 /// Gets the total size of the code region in bytes.
454 u64 GetCodeRegionSize() const; 485 u64 GetCodeRegionSize() const;
455 486
487 /// Determines whether or not the specified range is within the code region.
488 bool IsWithinCodeRegion(VAddr address, u64 size) const;
489
456 /// Gets the base address of the heap region. 490 /// Gets the base address of the heap region.
457 VAddr GetHeapRegionBaseAddress() const; 491 VAddr GetHeapRegionBaseAddress() const;
458 492
@@ -462,6 +496,16 @@ public:
462 /// Gets the total size of the heap region in bytes. 496 /// Gets the total size of the heap region in bytes.
463 u64 GetHeapRegionSize() const; 497 u64 GetHeapRegionSize() const;
464 498
499 /// Gets the total size of the current heap in bytes.
500 ///
501 /// @note This is the current allocated heap size, not the size
502 /// of the region it's allowed to exist within.
503 ///
504 u64 GetCurrentHeapSize() const;
505
506 /// Determines whether or not the specified range is within the heap region.
507 bool IsWithinHeapRegion(VAddr address, u64 size) const;
508
465 /// Gets the base address of the map region. 509 /// Gets the base address of the map region.
466 VAddr GetMapRegionBaseAddress() const; 510 VAddr GetMapRegionBaseAddress() const;
467 511
@@ -471,6 +515,9 @@ public:
471 /// Gets the total size of the map region in bytes. 515 /// Gets the total size of the map region in bytes.
472 u64 GetMapRegionSize() const; 516 u64 GetMapRegionSize() const;
473 517
518 /// Determines whether or not the specified range is within the map region.
519 bool IsWithinMapRegion(VAddr address, u64 size) const;
520
474 /// Gets the base address of the new map region. 521 /// Gets the base address of the new map region.
475 VAddr GetNewMapRegionBaseAddress() const; 522 VAddr GetNewMapRegionBaseAddress() const;
476 523
@@ -480,6 +527,9 @@ public:
480 /// Gets the total size of the new map region in bytes. 527 /// Gets the total size of the new map region in bytes.
481 u64 GetNewMapRegionSize() const; 528 u64 GetNewMapRegionSize() const;
482 529
530 /// Determines whether or not the given address range is within the new map region
531 bool IsWithinNewMapRegion(VAddr address, u64 size) const;
532
483 /// Gets the base address of the TLS IO region. 533 /// Gets the base address of the TLS IO region.
484 VAddr GetTLSIORegionBaseAddress() const; 534 VAddr GetTLSIORegionBaseAddress() const;
485 535
@@ -489,9 +539,12 @@ public:
489 /// Gets the total size of the TLS IO region in bytes. 539 /// Gets the total size of the TLS IO region in bytes.
490 u64 GetTLSIORegionSize() const; 540 u64 GetTLSIORegionSize() const;
491 541
542 /// Determines if the given address range is within the TLS IO region.
543 bool IsWithinTLSIORegion(VAddr address, u64 size) const;
544
492 /// Each VMManager has its own page table, which is set as the main one when the owning process 545 /// Each VMManager has its own page table, which is set as the main one when the owning process
493 /// is scheduled. 546 /// is scheduled.
494 Memory::PageTable page_table; 547 Common::PageTable page_table{Memory::PAGE_BITS};
495 548
496private: 549private:
497 using VMAIter = VMAMap::iterator; 550 using VMAIter = VMAMap::iterator;
@@ -606,9 +659,9 @@ private:
606 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous 659 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
607 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. 660 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
608 std::shared_ptr<std::vector<u8>> heap_memory; 661 std::shared_ptr<std::vector<u8>> heap_memory;
609 // The left/right bounds of the address space covered by heap_memory. 662
610 VAddr heap_start = 0; 663 // The end of the currently allocated heap. This is not an inclusive
664 // end of the range. This is essentially 'base_address + current_size'.
611 VAddr heap_end = 0; 665 VAddr heap_end = 0;
612 u64 heap_used = 0;
613}; 666};
614} // namespace Kernel 667} // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h
index 5987fb971..04464a51a 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -24,7 +24,7 @@ public:
24 * @param thread The thread about which we're deciding. 24 * @param thread The thread about which we're deciding.
25 * @return True if the current thread should wait due to this object being unavailable 25 * @return True if the current thread should wait due to this object being unavailable
26 */ 26 */
27 virtual bool ShouldWait(Thread* thread) const = 0; 27 virtual bool ShouldWait(const Thread* thread) const = 0;
28 28
29 /// Acquire/lock the object for the specified thread if it is available 29 /// Acquire/lock the object for the specified thread if it is available
30 virtual void Acquire(Thread* thread) = 0; 30 virtual void Acquire(Thread* thread) = 0;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..ab84f5ddc 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,20 +8,11 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
15 14
16/** 15/**
17 * Detailed description of the error. Code 0 always means success.
18 */
19enum class ErrorDescription : u32 {
20 Success = 0,
21 RemoteProcessDead = 301,
22};
23
24/**
25 * Identifies the module which caused the error. Error codes can be propagated through a call 16 * Identifies the module which caused the error. Error codes can be propagated through a call
26 * chain, meaning that this doesn't always correspond to the module where the API call made is 17 * chain, meaning that this doesn't always correspond to the module where the API call made is
27 * contained. 18 * contained.
@@ -121,7 +112,7 @@ enum class ErrorModule : u32 {
121 ShopN = 811, 112 ShopN = 811,
122}; 113};
123 114
124/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields. 115/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
125union ResultCode { 116union ResultCode {
126 u32 raw; 117 u32 raw;
127 118
@@ -134,17 +125,9 @@ union ResultCode {
134 125
135 constexpr explicit ResultCode(u32 raw) : raw(raw) {} 126 constexpr explicit ResultCode(u32 raw) : raw(raw) {}
136 127
137 constexpr ResultCode(ErrorModule module, ErrorDescription description)
138 : ResultCode(module, static_cast<u32>(description)) {}
139
140 constexpr ResultCode(ErrorModule module_, u32 description_) 128 constexpr ResultCode(ErrorModule module_, u32 description_)
141 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {} 129 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}
142 130
143 constexpr ResultCode& operator=(const ResultCode& o) {
144 raw = o.raw;
145 return *this;
146 }
147
148 constexpr bool IsSuccess() const { 131 constexpr bool IsSuccess() const {
149 return raw == 0; 132 return raw == 0;
150 } 133 }
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 3f009d2b7..d31ab7970 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <array> 6#include <array>
6#include <cinttypes> 7#include <cinttypes>
7#include <cstring> 8#include <cstring>
8#include <stack>
9#include "audio_core/audio_renderer.h" 9#include "audio_core/audio_renderer.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/file_sys/savedata_factory.h" 11#include "core/file_sys/savedata_factory.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
93} 93}
94 94
95IAudioController::IAudioController() : ServiceFramework("IAudioController") { 95IAudioController::IAudioController() : ServiceFramework("IAudioController") {
96 // clang-format off
96 static const FunctionInfo functions[] = { 97 static const FunctionInfo functions[] = {
97 {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"}, 98 {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
98 {1, &IAudioController::GetMainAppletExpectedMasterVolume, 99 {1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
99 "GetMainAppletExpectedMasterVolume"}, 100 {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
100 {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, 101 {3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
101 "GetLibraryAppletExpectedMasterVolume"}, 102 {4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
102 {3, nullptr, "ChangeMainAppletMasterVolume"},
103 {4, nullptr, "SetTransparentVolumeRate"},
104 }; 103 };
104 // clang-format on
105
105 RegisterHandlers(functions); 106 RegisterHandlers(functions);
106} 107}
107 108
108IAudioController::~IAudioController() = default; 109IAudioController::~IAudioController() = default;
109 110
110void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 111void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
111 LOG_WARNING(Service_AM, "(STUBBED) called"); 112 IPC::RequestParser rp{ctx};
113 const float main_applet_volume_tmp = rp.Pop<float>();
114 const float library_applet_volume_tmp = rp.Pop<float>();
115
116 LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
117 main_applet_volume_tmp, library_applet_volume_tmp);
118
119 // Ensure the volume values remain within the 0-100% range
120 main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
121 library_applet_volume =
122 std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
123
112 IPC::ResponseBuilder rb{ctx, 2}; 124 IPC::ResponseBuilder rb{ctx, 2};
113 rb.Push(RESULT_SUCCESS); 125 rb.Push(RESULT_SUCCESS);
114} 126}
115 127
116void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 128void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
117 LOG_WARNING(Service_AM, "(STUBBED) called"); 129 LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
118 IPC::ResponseBuilder rb{ctx, 3}; 130 IPC::ResponseBuilder rb{ctx, 3};
119 rb.Push(RESULT_SUCCESS); 131 rb.Push(RESULT_SUCCESS);
120 rb.Push(volume); 132 rb.Push(main_applet_volume);
121} 133}
122 134
123void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 135void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
124 LOG_WARNING(Service_AM, "(STUBBED) called"); 136 LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
125 IPC::ResponseBuilder rb{ctx, 3}; 137 IPC::ResponseBuilder rb{ctx, 3};
126 rb.Push(RESULT_SUCCESS); 138 rb.Push(RESULT_SUCCESS);
127 rb.Push(volume); 139 rb.Push(library_applet_volume);
140}
141
142void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
143 struct Parameters {
144 float volume;
145 s64 fade_time_ns;
146 };
147 static_assert(sizeof(Parameters) == 16);
148
149 IPC::RequestParser rp{ctx};
150 const auto parameters = rp.PopRaw<Parameters>();
151
152 LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
153 parameters.fade_time_ns);
154
155 main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
156 fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
157
158 IPC::ResponseBuilder rb{ctx, 2};
159 rb.Push(RESULT_SUCCESS);
160}
161
162void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
163 IPC::RequestParser rp{ctx};
164 const float transparent_volume_rate_tmp = rp.Pop<float>();
165
166 LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
167
168 // Clamp volume range to 0-100%.
169 transparent_volume_rate =
170 std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
171
172 IPC::ResponseBuilder rb{ctx, 2};
173 rb.Push(RESULT_SUCCESS);
128} 174}
129 175
130IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") { 176IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
@@ -169,7 +215,21 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
169 215
170IDisplayController::~IDisplayController() = default; 216IDisplayController::~IDisplayController() = default;
171 217
172IDebugFunctions::IDebugFunctions() : ServiceFramework("IDebugFunctions") {} 218IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
219 // clang-format off
220 static const FunctionInfo functions[] = {
221 {0, nullptr, "NotifyMessageToHomeMenuForDebug"},
222 {1, nullptr, "OpenMainApplication"},
223 {10, nullptr, "EmulateButtonEvent"},
224 {20, nullptr, "InvalidateTransitionLayer"},
225 {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
226 {40, nullptr, "GetAppletResourceUsageInfo"},
227 };
228 // clang-format on
229
230 RegisterHandlers(functions);
231}
232
173IDebugFunctions::~IDebugFunctions() = default; 233IDebugFunctions::~IDebugFunctions() = default;
174 234
175ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger) 235ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
@@ -179,8 +239,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
179 {0, nullptr, "Exit"}, 239 {0, nullptr, "Exit"},
180 {1, &ISelfController::LockExit, "LockExit"}, 240 {1, &ISelfController::LockExit, "LockExit"},
181 {2, &ISelfController::UnlockExit, "UnlockExit"}, 241 {2, &ISelfController::UnlockExit, "UnlockExit"},
182 {3, nullptr, "EnterFatalSection"}, 242 {3, &ISelfController::EnterFatalSection, "EnterFatalSection"},
183 {4, nullptr, "LeaveFatalSection"}, 243 {4, &ISelfController::LeaveFatalSection, "LeaveFatalSection"},
184 {9, &ISelfController::GetLibraryAppletLaunchableEvent, "GetLibraryAppletLaunchableEvent"}, 244 {9, &ISelfController::GetLibraryAppletLaunchableEvent, "GetLibraryAppletLaunchableEvent"},
185 {10, &ISelfController::SetScreenShotPermission, "SetScreenShotPermission"}, 245 {10, &ISelfController::SetScreenShotPermission, "SetScreenShotPermission"},
186 {11, &ISelfController::SetOperationModeChangedNotification, "SetOperationModeChangedNotification"}, 246 {11, &ISelfController::SetOperationModeChangedNotification, "SetOperationModeChangedNotification"},
@@ -225,41 +285,54 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
225 285
226ISelfController::~ISelfController() = default; 286ISelfController::~ISelfController() = default;
227 287
228void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) { 288void ISelfController::LockExit(Kernel::HLERequestContext& ctx) {
229 // Takes 3 input u8s with each field located immediately after the previous
230 // u8, these are bool flags. No output.
231 LOG_WARNING(Service_AM, "(STUBBED) called"); 289 LOG_WARNING(Service_AM, "(STUBBED) called");
232 290
233 IPC::RequestParser rp{ctx}; 291 IPC::ResponseBuilder rb{ctx, 2};
292 rb.Push(RESULT_SUCCESS);
293}
234 294
235 struct FocusHandlingModeParams { 295void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) {
236 u8 unknown0; 296 LOG_WARNING(Service_AM, "(STUBBED) called");
237 u8 unknown1;
238 u8 unknown2;
239 };
240 auto flags = rp.PopRaw<FocusHandlingModeParams>();
241 297
242 IPC::ResponseBuilder rb{ctx, 2}; 298 IPC::ResponseBuilder rb{ctx, 2};
243 rb.Push(RESULT_SUCCESS); 299 rb.Push(RESULT_SUCCESS);
244} 300}
245 301
246void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) { 302void ISelfController::EnterFatalSection(Kernel::HLERequestContext& ctx) {
247 LOG_WARNING(Service_AM, "(STUBBED) called"); 303 ++num_fatal_sections_entered;
304 LOG_DEBUG(Service_AM, "called. Num fatal sections entered: {}", num_fatal_sections_entered);
248 305
249 IPC::ResponseBuilder rb{ctx, 2}; 306 IPC::ResponseBuilder rb{ctx, 2};
250 rb.Push(RESULT_SUCCESS); 307 rb.Push(RESULT_SUCCESS);
251} 308}
252 309
253void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) { 310void ISelfController::LeaveFatalSection(Kernel::HLERequestContext& ctx) {
254 IPC::RequestParser rp{ctx}; 311 LOG_DEBUG(Service_AM, "called.");
255 312
256 bool flag = rp.Pop<bool>(); 313 // Entry and exit of fatal sections must be balanced.
257 LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag); 314 if (num_fatal_sections_entered == 0) {
315 IPC::ResponseBuilder rb{ctx, 2};
316 rb.Push(ResultCode{ErrorModule::AM, 512});
317 return;
318 }
319
320 --num_fatal_sections_entered;
258 321
259 IPC::ResponseBuilder rb{ctx, 2}; 322 IPC::ResponseBuilder rb{ctx, 2};
260 rb.Push(RESULT_SUCCESS); 323 rb.Push(RESULT_SUCCESS);
261} 324}
262 325
326void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) {
327 LOG_WARNING(Service_AM, "(STUBBED) called");
328
329 launchable_event.writable->Signal();
330
331 IPC::ResponseBuilder rb{ctx, 2, 1};
332 rb.Push(RESULT_SUCCESS);
333 rb.PushCopyObjects(launchable_event.readable);
334}
335
263void ISelfController::SetScreenShotPermission(Kernel::HLERequestContext& ctx) { 336void ISelfController::SetScreenShotPermission(Kernel::HLERequestContext& ctx) {
264 LOG_WARNING(Service_AM, "(STUBBED) called"); 337 LOG_WARNING(Service_AM, "(STUBBED) called");
265 338
@@ -277,40 +350,51 @@ void ISelfController::SetOperationModeChangedNotification(Kernel::HLERequestCont
277 rb.Push(RESULT_SUCCESS); 350 rb.Push(RESULT_SUCCESS);
278} 351}
279 352
280void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) { 353void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) {
281 // Takes 3 input u8s with each field located immediately after the previous
282 // u8, these are bool flags. No output.
283 IPC::RequestParser rp{ctx}; 354 IPC::RequestParser rp{ctx};
284 355
285 bool enabled = rp.Pop<bool>(); 356 bool flag = rp.Pop<bool>();
286 LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled); 357 LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag);
287 358
288 IPC::ResponseBuilder rb{ctx, 2}; 359 IPC::ResponseBuilder rb{ctx, 2};
289 rb.Push(RESULT_SUCCESS); 360 rb.Push(RESULT_SUCCESS);
290} 361}
291 362
292void ISelfController::LockExit(Kernel::HLERequestContext& ctx) { 363void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
364 // Takes 3 input u8s with each field located immediately after the previous
365 // u8, these are bool flags. No output.
293 LOG_WARNING(Service_AM, "(STUBBED) called"); 366 LOG_WARNING(Service_AM, "(STUBBED) called");
294 367
368 IPC::RequestParser rp{ctx};
369
370 struct FocusHandlingModeParams {
371 u8 unknown0;
372 u8 unknown1;
373 u8 unknown2;
374 };
375 auto flags = rp.PopRaw<FocusHandlingModeParams>();
376
295 IPC::ResponseBuilder rb{ctx, 2}; 377 IPC::ResponseBuilder rb{ctx, 2};
296 rb.Push(RESULT_SUCCESS); 378 rb.Push(RESULT_SUCCESS);
297} 379}
298 380
299void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) { 381void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) {
300 LOG_WARNING(Service_AM, "(STUBBED) called"); 382 LOG_WARNING(Service_AM, "(STUBBED) called");
301 383
302 IPC::ResponseBuilder rb{ctx, 2}; 384 IPC::ResponseBuilder rb{ctx, 2};
303 rb.Push(RESULT_SUCCESS); 385 rb.Push(RESULT_SUCCESS);
304} 386}
305 387
306void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) { 388void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) {
307 LOG_WARNING(Service_AM, "(STUBBED) called"); 389 // Takes 3 input u8s with each field located immediately after the previous
390 // u8, these are bool flags. No output.
391 IPC::RequestParser rp{ctx};
308 392
309 launchable_event.writable->Signal(); 393 bool enabled = rp.Pop<bool>();
394 LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled);
310 395
311 IPC::ResponseBuilder rb{ctx, 2, 1}; 396 IPC::ResponseBuilder rb{ctx, 2};
312 rb.Push(RESULT_SUCCESS); 397 rb.Push(RESULT_SUCCESS);
313 rb.PushCopyObjects(launchable_event.readable);
314} 398}
315 399
316void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) { 400void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index b6113cfdd..991b7d47c 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <memory> 8#include <memory>
8#include <queue> 9#include <queue>
9#include "core/hle/kernel/writable_event.h" 10#include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
81 void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx); 82 void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
82 void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx); 83 void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
83 void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx); 84 void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
85 void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
86 void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);
84 87
85 u32 volume{100}; 88 static constexpr float min_allowed_volume = 0.0f;
89 static constexpr float max_allowed_volume = 1.0f;
90
91 float main_applet_volume{0.25f};
92 float library_applet_volume{max_allowed_volume};
93 float transparent_volume_rate{min_allowed_volume};
94
95 // Volume transition fade time in nanoseconds.
96 // e.g. If the main applet volume was 0% and was changed to 50%
97 // with a fade of 50ns, then over the course of 50ns,
98 // the volume will gradually fade up to 50%
99 std::chrono::nanoseconds fade_time_ns{0};
86}; 100};
87 101
88class IDisplayController final : public ServiceFramework<IDisplayController> { 102class IDisplayController final : public ServiceFramework<IDisplayController> {
@@ -103,17 +117,19 @@ public:
103 ~ISelfController() override; 117 ~ISelfController() override;
104 118
105private: 119private:
106 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
107 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
108 void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
109 void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
110 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
111 void LockExit(Kernel::HLERequestContext& ctx); 120 void LockExit(Kernel::HLERequestContext& ctx);
112 void UnlockExit(Kernel::HLERequestContext& ctx); 121 void UnlockExit(Kernel::HLERequestContext& ctx);
122 void EnterFatalSection(Kernel::HLERequestContext& ctx);
123 void LeaveFatalSection(Kernel::HLERequestContext& ctx);
113 void GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx); 124 void GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx);
125 void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
126 void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
127 void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
128 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
129 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
130 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
114 void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx); 131 void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx);
115 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx); 132 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
116 void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
117 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
118 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
119 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
@@ -121,6 +137,7 @@ private:
121 std::shared_ptr<NVFlinger::NVFlinger> nvflinger; 137 std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
122 Kernel::EventPair launchable_event; 138 Kernel::EventPair launchable_event;
123 u32 idle_time_detection_extension = 0; 139 u32 idle_time_detection_extension = 0;
140 u64 num_fatal_sections_entered = 0;
124}; 141};
125 142
126class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { 143class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index dc6a6b188..21f5e64c7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
18#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
20#include "core/hle/service/audio/audout_u.h" 20#include "core/hle/service/audio/audout_u.h"
21#include "core/hle/service/audio/errors.h"
21#include "core/memory.h" 22#include "core/memory.h"
22 23
23namespace Service::Audio { 24namespace Service::Audio {
24 25
25namespace ErrCodes {
26enum {
27 ErrorUnknown = 2,
28 BufferCountExceeded = 8,
29};
30}
31
32constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; 26constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
33constexpr int DefaultSampleRate{48000}; 27constexpr int DefaultSampleRate{48000};
34 28
@@ -68,12 +62,12 @@ public:
68 RegisterHandlers(functions); 62 RegisterHandlers(functions);
69 63
70 // This is the event handle used to check if the audio buffer was released 64 // This is the event handle used to check if the audio buffer was released
71 auto& kernel = Core::System::GetInstance().Kernel(); 65 auto& system = Core::System::GetInstance();
72 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, 66 buffer_event = Kernel::WritableEvent::CreateEventPair(
73 "IAudioOutBufferReleased"); 67 system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
74 68
75 stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count, 69 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
76 std::move(unique_name), 70 audio_params.channel_count, std::move(unique_name),
77 [=]() { buffer_event.writable->Signal(); }); 71 [=]() { buffer_event.writable->Signal(); });
78 } 72 }
79 73
@@ -100,7 +94,7 @@ private:
100 94
101 if (stream->IsPlaying()) { 95 if (stream->IsPlaying()) {
102 IPC::ResponseBuilder rb{ctx, 2}; 96 IPC::ResponseBuilder rb{ctx, 2};
103 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); 97 rb.Push(ERR_OPERATION_FAILED);
104 return; 98 return;
105 } 99 }
106 100
@@ -113,7 +107,9 @@ private:
113 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
114 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
115 109
116 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
117 113
118 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
143 139
144 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { 140 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
145 IPC::ResponseBuilder rb{ctx, 2}; 141 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); 142 rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
143 return;
147 } 144 }
148 145
149 IPC::ResponseBuilder rb{ctx, 2}; 146 IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 76cc48254..c9de10a24 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
17#include "core/hle/kernel/readable_event.h" 17#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/audio/audren_u.h" 19#include "core/hle/service/audio/audren_u.h"
20#include "core/hle/service/audio/errors.h"
20 21
21namespace Service::Audio { 22namespace Service::Audio {
22 23
@@ -37,15 +38,16 @@ public:
37 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, 38 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
38 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, 39 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
39 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, 40 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
40 {11, nullptr, "ExecuteAudioRendererRendering"}, 41 {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
41 }; 42 };
42 // clang-format on 43 // clang-format on
43 RegisterHandlers(functions); 44 RegisterHandlers(functions);
44 45
45 auto& kernel = Core::System::GetInstance().Kernel(); 46 auto& system = Core::System::GetInstance();
46 system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, 47 system_event = Kernel::WritableEvent::CreateEventPair(
47 "IAudioRenderer:SystemEvent"); 48 system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
48 renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable); 49 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
50 system_event.writable);
49 } 51 }
50 52
51private: 53private:
@@ -137,6 +139,17 @@ private:
137 rb.Push(rendering_time_limit_percent); 139 rb.Push(rendering_time_limit_percent);
138 } 140 }
139 141
142 void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
143 LOG_DEBUG(Service_Audio, "called");
144
145 // This service command currently only reports an unsupported operation
146 // error code, or aborts. Given that, we just always return an error
147 // code in this case.
148
149 IPC::ResponseBuilder rb{ctx, 2};
150 rb.Push(ERR_NOT_SUPPORTED);
151 }
152
140 Kernel::EventPair system_event; 153 Kernel::EventPair system_event;
141 std::unique_ptr<AudioCore::AudioRenderer> renderer; 154 std::unique_ptr<AudioCore::AudioRenderer> renderer;
142 u32 rendering_time_limit_percent = 100; 155 u32 rendering_time_limit_percent = 100;
@@ -234,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
234 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 247 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
235 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, 248 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
236 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, 249 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
237 {3, nullptr, "OpenAudioRendererAuto"}, 250 {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
238 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, 251 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
239 }; 252 };
240 // clang-format on 253 // clang-format on
@@ -247,12 +260,7 @@ AudRenU::~AudRenU() = default;
247void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { 260void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
248 LOG_DEBUG(Service_Audio, "called"); 261 LOG_DEBUG(Service_Audio, "called");
249 262
250 IPC::RequestParser rp{ctx}; 263 OpenAudioRendererImpl(ctx);
251 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
252 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
253
254 rb.Push(RESULT_SUCCESS);
255 rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
256} 264}
257 265
258void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 266void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -261,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
261 LOG_DEBUG(Service_Audio, "called"); 269 LOG_DEBUG(Service_Audio, "called");
262 270
263 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); 271 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
264 buffer_sz += params.unknown_c * 1024; 272 buffer_sz += params.submix_count * 1024;
265 buffer_sz += 0x940 * (params.unknown_c + 1); 273 buffer_sz += 0x940 * (params.submix_count + 1);
266 buffer_sz += 0x3F0 * params.voice_count; 274 buffer_sz += 0x3F0 * params.voice_count;
267 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); 275 buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
268 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 276 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
269 buffer_sz += 277 buffer_sz += Common::AlignUp(
270 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 278 (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
271 (params.mix_buffer_count + 6), 279 (params.mix_buffer_count + 6),
272 0x40); 280 0x40);
273 281
274 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 282 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
275 u32 count = params.unknown_c + 1; 283 const u32 count = params.submix_count + 1;
276 u64 node_count = Common::AlignUp(count, 0x40); 284 u64 node_count = Common::AlignUp(count, 0x40);
277 u64 node_state_buffer_sz = 285 const u64 node_state_buffer_sz =
278 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 286 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
279 u64 edge_matrix_buffer_sz = 0; 287 u64 edge_matrix_buffer_sz = 0;
280 node_count = Common::AlignUp(count * count, 0x40); 288 node_count = Common::AlignUp(count * count, 0x40);
@@ -288,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
288 296
289 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 297 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
290 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 298 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
291 buffer_sz += 0xE0 * params.unknown_2c; 299 buffer_sz += 0xE0 * params.num_splitter_send_channels;
292 buffer_sz += 0x20 * params.splitter_count; 300 buffer_sz += 0x20 * params.splitter_count;
293 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); 301 buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
294 } 302 }
295 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 303 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
296 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 304 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
297 ((params.voice_count * 256) | 0x40); 305 ((params.voice_count * 256) | 0x40);
298 306
299 if (params.unknown_1c >= 1) { 307 if (params.performance_frame_count >= 1) {
300 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 308 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
301 16 * params.voice_count + 16) + 309 16 * params.voice_count + 16) +
302 0x658) * 310 0x658) *
303 (params.unknown_1c + 1) + 311 (params.performance_frame_count + 1) +
304 0xc0, 312 0xc0,
305 0x40) + 313 0x40) +
306 output_sz; 314 output_sz;
@@ -324,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
324 rb.PushIpcInterface<Audio::IAudioDevice>(); 332 rb.PushIpcInterface<Audio::IAudioDevice>();
325} 333}
326 334
335void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
336 LOG_DEBUG(Service_Audio, "called");
337
338 OpenAudioRendererImpl(ctx);
339}
340
327void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 341void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
328 LOG_WARNING(Service_Audio, "(STUBBED) called"); 342 LOG_WARNING(Service_Audio, "(STUBBED) called");
329 343
@@ -334,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
334 // based on the current revision 348 // based on the current revision
335} 349}
336 350
351void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
352 IPC::RequestParser rp{ctx};
353 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
354 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
355
356 rb.Push(RESULT_SUCCESS);
357 rb.PushIpcInterface<IAudioRenderer>(params);
358}
359
337bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 360bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
338 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 361 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
339 switch (feature) { 362 switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 3d63388fb..e55d25973 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,8 +21,11 @@ private:
21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx); 21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx); 23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
24 void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
24 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); 25 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
25 26
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28
26 enum class AudioFeatures : u32 { 29 enum class AudioFeatures : u32 {
27 Splitter, 30 Splitter,
28 }; 31 };
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Audio {
10
11constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
12constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
13constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
14
15} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 11eba4a12..cb4a1160d 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -8,44 +8,34 @@
8#include <vector> 8#include <vector>
9 9
10#include <opus.h> 10#include <opus.h>
11#include <opus_multistream.h>
11 12
12#include "common/common_funcs.h" 13#include "common/assert.h"
13#include "common/logging/log.h" 14#include "common/logging/log.h"
14#include "core/hle/ipc_helpers.h" 15#include "core/hle/ipc_helpers.h"
15#include "core/hle/kernel/hle_ipc.h" 16#include "core/hle/kernel/hle_ipc.h"
16#include "core/hle/service/audio/hwopus.h" 17#include "core/hle/service/audio/hwopus.h"
17 18
18namespace Service::Audio { 19namespace Service::Audio {
19 20namespace {
20struct OpusDeleter { 21struct OpusDeleter {
21 void operator()(void* ptr) const { 22 void operator()(OpusMSDecoder* ptr) const {
22 operator delete(ptr); 23 opus_multistream_decoder_destroy(ptr);
23 } 24 }
24}; 25};
25 26
26class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 27using OpusDecoderPtr = std::unique_ptr<OpusMSDecoder, OpusDeleter>;
27public:
28 IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
29 u32 channel_count)
30 : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
31 sample_rate(sample_rate), channel_count(channel_count) {
32 // clang-format off
33 static const FunctionInfo functions[] = {
34 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
35 {1, nullptr, "SetContext"},
36 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
37 {3, nullptr, "SetContextForMultiStream"},
38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
39 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
40 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
41 {7, nullptr, "DecodeInterleavedForMultiStream"},
42 };
43 // clang-format on
44 28
45 RegisterHandlers(functions); 29struct OpusPacketHeader {
46 } 30 // Packet size in bytes.
31 u32_be size;
32 // Indicates the final range of the codec's entropy coder.
33 u32_be final_range;
34};
35static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
47 36
48private: 37class OpusDecoderState {
38public:
49 /// Describes extra behavior that may be asked of the decoding context. 39 /// Describes extra behavior that may be asked of the decoding context.
50 enum class ExtraBehavior { 40 enum class ExtraBehavior {
51 /// No extra behavior. 41 /// No extra behavior.
@@ -55,30 +45,27 @@ private:
55 ResetContext, 45 ResetContext,
56 }; 46 };
57 47
58 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { 48 enum class PerfTime {
59 LOG_DEBUG(Audio, "called"); 49 Disabled,
60 50 Enabled,
61 DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); 51 };
62 }
63
64 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
65 LOG_DEBUG(Audio, "called");
66
67 u64 performance = 0;
68 DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
69 }
70
71 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
72 LOG_DEBUG(Audio, "called");
73
74 IPC::RequestParser rp{ctx};
75 const auto extra_behavior =
76 rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
77 52
78 u64 performance = 0; 53 explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
79 DecodeInterleavedHelper(ctx, &performance, extra_behavior); 54 : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
55
56 // Decodes interleaved Opus packets. Optionally allows reporting time taken to
57 // perform the decoding, as well as any relevant extra behavior.
58 void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
59 ExtraBehavior extra_behavior) {
60 if (perf_time == PerfTime::Disabled) {
61 DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
62 } else {
63 u64 performance = 0;
64 DecodeInterleavedHelper(ctx, &performance, extra_behavior);
65 }
80 } 66 }
81 67
68private:
82 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, 69 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
83 ExtraBehavior extra_behavior) { 70 ExtraBehavior extra_behavior) {
84 u32 consumed = 0; 71 u32 consumed = 0;
@@ -89,8 +76,7 @@ private:
89 ResetDecoderContext(); 76 ResetDecoderContext();
90 } 77 }
91 78
92 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, 79 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
93 performance)) {
94 LOG_ERROR(Audio, "Failed to decode opus data"); 80 LOG_ERROR(Audio, "Failed to decode opus data");
95 IPC::ResponseBuilder rb{ctx, 2}; 81 IPC::ResponseBuilder rb{ctx, 2};
96 // TODO(ogniK): Use correct error code 82 // TODO(ogniK): Use correct error code
@@ -109,27 +95,27 @@ private:
109 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 95 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
110 } 96 }
111 97
112 bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, 98 bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
113 std::vector<opus_int16>& output, u64* out_performance_time) { 99 std::vector<opus_int16>& output, u64* out_performance_time) const {
114 const auto start_time = std::chrono::high_resolution_clock::now(); 100 const auto start_time = std::chrono::high_resolution_clock::now();
115 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 101 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
116 if (sizeof(OpusHeader) > input.size()) { 102 if (sizeof(OpusPacketHeader) > input.size()) {
117 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", 103 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
118 sizeof(OpusHeader), input.size()); 104 sizeof(OpusPacketHeader), input.size());
119 return false; 105 return false;
120 } 106 }
121 107
122 OpusHeader hdr{}; 108 OpusPacketHeader hdr{};
123 std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); 109 std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
124 if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { 110 if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
125 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", 111 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
126 sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); 112 sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
127 return false; 113 return false;
128 } 114 }
129 115
130 const auto frame = input.data() + sizeof(OpusHeader); 116 const auto frame = input.data() + sizeof(OpusPacketHeader);
131 const auto decoded_sample_count = opus_packet_get_nb_samples( 117 const auto decoded_sample_count = opus_packet_get_nb_samples(
132 frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), 118 frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
133 static_cast<opus_int32>(sample_rate)); 119 static_cast<opus_int32>(sample_rate));
134 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { 120 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
135 LOG_ERROR( 121 LOG_ERROR(
@@ -141,18 +127,18 @@ private:
141 127
142 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); 128 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
143 const auto out_sample_count = 129 const auto out_sample_count =
144 opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); 130 opus_multistream_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
145 if (out_sample_count < 0) { 131 if (out_sample_count < 0) {
146 LOG_ERROR(Audio, 132 LOG_ERROR(Audio,
147 "Incorrect sample count received from opus_decode, " 133 "Incorrect sample count received from opus_decode, "
148 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", 134 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
149 out_sample_count, frame_size, static_cast<u32>(hdr.sz)); 135 out_sample_count, frame_size, static_cast<u32>(hdr.size));
150 return false; 136 return false;
151 } 137 }
152 138
153 const auto end_time = std::chrono::high_resolution_clock::now() - start_time; 139 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
154 sample_count = out_sample_count; 140 sample_count = out_sample_count;
155 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 141 consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
156 if (out_performance_time != nullptr) { 142 if (out_performance_time != nullptr) {
157 *out_performance_time = 143 *out_performance_time =
158 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); 144 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -164,25 +150,86 @@ private:
164 void ResetDecoderContext() { 150 void ResetDecoderContext() {
165 ASSERT(decoder != nullptr); 151 ASSERT(decoder != nullptr);
166 152
167 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); 153 opus_multistream_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
168 } 154 }
169 155
170 struct OpusHeader { 156 OpusDecoderPtr decoder;
171 u32_be sz; // Needs to be BE for some odd reason
172 INSERT_PADDING_WORDS(1);
173 };
174 static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
175
176 std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
177 u32 sample_rate; 157 u32 sample_rate;
178 u32 channel_count; 158 u32 channel_count;
179}; 159};
180 160
181static std::size_t WorkerBufferSize(u32 channel_count) { 161class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
162public:
163 explicit IHardwareOpusDecoderManager(OpusDecoderState decoder_state)
164 : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
165 // clang-format off
166 static const FunctionInfo functions[] = {
167 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
168 {1, nullptr, "SetContext"},
169 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
170 {3, nullptr, "SetContextForMultiStream"},
171 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
172 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
173 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
174 {7, nullptr, "DecodeInterleavedForMultiStream"},
175 };
176 // clang-format on
177
178 RegisterHandlers(functions);
179 }
180
181private:
182 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
183 LOG_DEBUG(Audio, "called");
184
185 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Disabled,
186 OpusDecoderState::ExtraBehavior::None);
187 }
188
189 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
190 LOG_DEBUG(Audio, "called");
191
192 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled,
193 OpusDecoderState::ExtraBehavior::None);
194 }
195
196 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
197 LOG_DEBUG(Audio, "called");
198
199 IPC::RequestParser rp{ctx};
200 const auto extra_behavior = rp.Pop<bool>() ? OpusDecoderState::ExtraBehavior::ResetContext
201 : OpusDecoderState::ExtraBehavior::None;
202
203 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled, extra_behavior);
204 }
205
206 OpusDecoderState decoder_state;
207};
208
209std::size_t WorkerBufferSize(u32 channel_count) {
182 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 210 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
183 return opus_decoder_get_size(static_cast<int>(channel_count)); 211 constexpr int num_streams = 1;
212 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
213 return opus_multistream_decoder_get_size(num_streams, num_stereo_streams);
184} 214}
185 215
216// Creates the mapping table that maps the input channels to the particular
217// output channels. In the stereo case, we map the left and right input channels
218// to the left and right output channels respectively.
219//
220// However, in the monophonic case, we only map the one available channel
221// to the sole output channel. We specify 255 for the would-be right channel
222// as this is a special value defined by Opus to indicate to the decoder to
223// ignore that channel.
224std::array<u8, 2> CreateMappingTable(u32 channel_count) {
225 if (channel_count == 2) {
226 return {{0, 1}};
227 }
228
229 return {{0, 255}};
230}
231} // Anonymous namespace
232
186void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { 233void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
187 IPC::RequestParser rp{ctx}; 234 IPC::RequestParser rp{ctx};
188 const auto sample_rate = rp.Pop<u32>(); 235 const auto sample_rate = rp.Pop<u32>();
@@ -220,10 +267,15 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
220 const std::size_t worker_sz = WorkerBufferSize(channel_count); 267 const std::size_t worker_sz = WorkerBufferSize(channel_count);
221 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); 268 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
222 269
223 std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ 270 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
224 static_cast<OpusDecoder*>(operator new(worker_sz))}; 271 const auto mapping_table = CreateMappingTable(channel_count);
225 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { 272
226 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); 273 int error = 0;
274 OpusDecoderPtr decoder{
275 opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
276 num_stereo_streams, mapping_table.data(), &error)};
277 if (error != OPUS_OK || decoder == nullptr) {
278 LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
227 IPC::ResponseBuilder rb{ctx, 2}; 279 IPC::ResponseBuilder rb{ctx, 2};
228 // TODO(ogniK): Use correct error code 280 // TODO(ogniK): Use correct error code
229 rb.Push(ResultCode(-1)); 281 rb.Push(ResultCode(-1));
@@ -232,8 +284,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
232 284
233 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 285 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
234 rb.Push(RESULT_SUCCESS); 286 rb.Push(RESULT_SUCCESS);
235 rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, 287 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
236 channel_count); 288 OpusDecoderState{std::move(decoder), sample_rate, channel_count});
237} 289}
238 290
239HwOpus::HwOpus() : ServiceFramework("hwopus") { 291HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 770590d0b..2c229bcad 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -25,21 +25,34 @@ Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
25Module::Interface::~Interface() = default; 25Module::Interface::~Interface() = default;
26 26
27struct FatalInfo { 27struct FatalInfo {
28 std::array<u64_le, 31> registers{}; // TODO(ogniK): See if this actually is registers or 28 enum class Architecture : s32 {
29 // not(find a game which has non zero valeus) 29 AArch64,
30 u64_le unk0{}; 30 AArch32,
31 u64_le unk1{}; 31 };
32 u64_le unk2{}; 32
33 u64_le unk3{}; 33 const char* ArchAsString() const {
34 u64_le unk4{}; 34 return arch == Architecture::AArch64 ? "AArch64" : "AArch32";
35 u64_le unk5{}; 35 }
36 u64_le unk6{}; 36
37 std::array<u64_le, 31> registers{};
38 u64_le sp{};
39 u64_le pc{};
40 u64_le pstate{};
41 u64_le afsr0{};
42 u64_le afsr1{};
43 u64_le esr{};
44 u64_le far{};
37 45
38 std::array<u64_le, 32> backtrace{}; 46 std::array<u64_le, 32> backtrace{};
39 u64_le unk7{}; 47 u64_le program_entry_point{};
40 u64_le unk8{}; 48
49 // Bit flags that indicate which registers have been set with values
50 // for this context. The service itself uses these to determine which
51 // registers to specifically print out.
52 u64_le set_flags{};
53
41 u32_le backtrace_size{}; 54 u32_le backtrace_size{};
42 u32_le unk9{}; 55 Architecture arch{};
43 u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding? 56 u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding?
44}; 57};
45static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size"); 58static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size");
@@ -52,36 +65,36 @@ enum class FatalType : u32 {
52 65
53static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) { 66static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
54 const auto title_id = Core::CurrentProcess()->GetTitleID(); 67 const auto title_id = Core::CurrentProcess()->GetTitleID();
55 std::string crash_report = 68 std::string crash_report = fmt::format(
56 fmt::format("Yuzu {}-{} crash report\n" 69 "Yuzu {}-{} crash report\n"
57 "Title ID: {:016x}\n" 70 "Title ID: {:016x}\n"
58 "Result: 0x{:X} ({:04}-{:04d})\n" 71 "Result: 0x{:X} ({:04}-{:04d})\n"
59 "\n", 72 "Set flags: 0x{:16X}\n"
60 Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw, 73 "Program entry point: 0x{:16X}\n"
61 2000 + static_cast<u32>(error_code.module.Value()), 74 "\n",
62 static_cast<u32>(error_code.description.Value()), info.unk8, info.unk7); 75 Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
76 2000 + static_cast<u32>(error_code.module.Value()),
77 static_cast<u32>(error_code.description.Value()), info.set_flags, info.program_entry_point);
63 if (info.backtrace_size != 0x0) { 78 if (info.backtrace_size != 0x0) {
64 crash_report += "Registers:\n"; 79 crash_report += "Registers:\n";
65 // TODO(ogniK): This is just a guess, find a game which actually has non zero values
66 for (size_t i = 0; i < info.registers.size(); i++) { 80 for (size_t i = 0; i < info.registers.size(); i++) {
67 crash_report += 81 crash_report +=
68 fmt::format(" X[{:02d}]: {:016x}\n", i, info.registers[i]); 82 fmt::format(" X[{:02d}]: {:016x}\n", i, info.registers[i]);
69 } 83 }
70 crash_report += fmt::format(" Unknown 0: {:016x}\n", info.unk0); 84 crash_report += fmt::format(" SP: {:016x}\n", info.sp);
71 crash_report += fmt::format(" Unknown 1: {:016x}\n", info.unk1); 85 crash_report += fmt::format(" PC: {:016x}\n", info.pc);
72 crash_report += fmt::format(" Unknown 2: {:016x}\n", info.unk2); 86 crash_report += fmt::format(" PSTATE: {:016x}\n", info.pstate);
73 crash_report += fmt::format(" Unknown 3: {:016x}\n", info.unk3); 87 crash_report += fmt::format(" AFSR0: {:016x}\n", info.afsr0);
74 crash_report += fmt::format(" Unknown 4: {:016x}\n", info.unk4); 88 crash_report += fmt::format(" AFSR1: {:016x}\n", info.afsr1);
75 crash_report += fmt::format(" Unknown 5: {:016x}\n", info.unk5); 89 crash_report += fmt::format(" ESR: {:016x}\n", info.esr);
76 crash_report += fmt::format(" Unknown 6: {:016x}\n", info.unk6); 90 crash_report += fmt::format(" FAR: {:016x}\n", info.far);
77 crash_report += "\nBacktrace:\n"; 91 crash_report += "\nBacktrace:\n";
78 for (size_t i = 0; i < info.backtrace_size; i++) { 92 for (size_t i = 0; i < info.backtrace_size; i++) {
79 crash_report += 93 crash_report +=
80 fmt::format(" Backtrace[{:02d}]: {:016x}\n", i, info.backtrace[i]); 94 fmt::format(" Backtrace[{:02d}]: {:016x}\n", i, info.backtrace[i]);
81 } 95 }
82 crash_report += fmt::format("\nUnknown 7: 0x{:016x}\n", info.unk7); 96
83 crash_report += fmt::format("Unknown 8: 0x{:016x}\n", info.unk8); 97 crash_report += fmt::format("Architecture: {}\n", info.ArchAsString());
84 crash_report += fmt::format("Unknown 9: 0x{:016x}\n", info.unk9);
85 crash_report += fmt::format("Unknown 10: 0x{:016x}\n", info.unk10); 98 crash_report += fmt::format("Unknown 10: 0x{:016x}\n", info.unk10);
86 } 99 }
87 100
@@ -125,13 +138,13 @@ static void ThrowFatalError(ResultCode error_code, FatalType fatal_type, const F
125 case FatalType::ErrorReport: 138 case FatalType::ErrorReport:
126 GenerateErrorReport(error_code, info); 139 GenerateErrorReport(error_code, info);
127 break; 140 break;
128 }; 141 }
129} 142}
130 143
131void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) { 144void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
132 LOG_ERROR(Service_Fatal, "called"); 145 LOG_ERROR(Service_Fatal, "called");
133 IPC::RequestParser rp{ctx}; 146 IPC::RequestParser rp{ctx};
134 auto error_code = rp.Pop<ResultCode>(); 147 const auto error_code = rp.Pop<ResultCode>();
135 148
136 ThrowFatalError(error_code, FatalType::ErrorScreen, {}); 149 ThrowFatalError(error_code, FatalType::ErrorScreen, {});
137 IPC::ResponseBuilder rb{ctx, 2}; 150 IPC::ResponseBuilder rb{ctx, 2};
@@ -141,8 +154,8 @@ void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
141void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) { 154void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
142 LOG_ERROR(Service_Fatal, "called"); 155 LOG_ERROR(Service_Fatal, "called");
143 IPC::RequestParser rp(ctx); 156 IPC::RequestParser rp(ctx);
144 auto error_code = rp.Pop<ResultCode>(); 157 const auto error_code = rp.Pop<ResultCode>();
145 auto fatal_type = rp.PopEnum<FatalType>(); 158 const auto fatal_type = rp.PopEnum<FatalType>();
146 159
147 ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy 160 ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy
148 IPC::ResponseBuilder rb{ctx, 2}; 161 IPC::ResponseBuilder rb{ctx, 2};
@@ -152,9 +165,9 @@ void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
152void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) { 165void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
153 LOG_ERROR(Service_Fatal, "called"); 166 LOG_ERROR(Service_Fatal, "called");
154 IPC::RequestParser rp(ctx); 167 IPC::RequestParser rp(ctx);
155 auto error_code = rp.Pop<ResultCode>(); 168 const auto error_code = rp.Pop<ResultCode>();
156 auto fatal_type = rp.PopEnum<FatalType>(); 169 const auto fatal_type = rp.PopEnum<FatalType>();
157 auto fatal_info = ctx.ReadBuffer(); 170 const auto fatal_info = ctx.ReadBuffer();
158 FatalInfo info{}; 171 FatalInfo info{};
159 172
160 ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!"); 173 ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!");
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 54959edd8..f03fb629c 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -733,7 +733,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
733FSP_SRV::~FSP_SRV() = default; 733FSP_SRV::~FSP_SRV() = default;
734 734
735void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) { 735void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
736 LOG_WARNING(Service_FS, "(STUBBED) called"); 736 IPC::RequestParser rp{ctx};
737 current_process_id = rp.Pop<u64>();
738
739 LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);
737 740
738 IPC::ResponseBuilder rb{ctx, 2}; 741 IPC::ResponseBuilder rb{ctx, 2};
739 rb.Push(RESULT_SUCCESS); 742 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index 3a5f4e200..d7572ba7a 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -32,6 +32,7 @@ private:
32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
33 33
34 FileSys::VirtualFile romfs; 34 FileSys::VirtualFile romfs;
35 u64 current_process_id = 0;
35}; 36};
36 37
37} // namespace Service::FileSystem 38} // namespace Service::FileSystem
diff --git a/src/core/hle/service/hid/controllers/controller_base.h b/src/core/hle/service/hid/controllers/controller_base.h
index f0e092b1b..5e5097a03 100644
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/swap.h" 8#include "common/swap.h"
9 9
10namespace Core::Timing {
11class CoreTiming;
12}
13
10namespace Service::HID { 14namespace Service::HID {
11class ControllerBase { 15class ControllerBase {
12public: 16public:
@@ -20,7 +24,8 @@ public:
20 virtual void OnRelease() = 0; 24 virtual void OnRelease() = 0;
21 25
22 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
23 virtual void OnUpdate(u8* data, std::size_t size) = 0; 27 virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
28 std::size_t size) = 0;
24 29
25 // Called when input devices should be loaded 30 // Called when input devices should be loaded
26 virtual void OnLoadInputDevices() = 0; 31 virtual void OnLoadInputDevices() = 0;
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index c22357d8c..c5c2e032a 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}
21 21
22void Controller_DebugPad::OnRelease() {} 22void Controller_DebugPad::OnRelease() {}
23 23
24void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) { 24void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
25 shared_memory.header.timestamp = CoreTiming::GetTicks(); 25 std::size_t size) {
26 shared_memory.header.timestamp = core_timing.GetTicks();
26 shared_memory.header.total_entry_count = 17; 27 shared_memory.header.total_entry_count = 17;
27 28
28 if (!IsControllerActivated()) { 29 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/debug_pad.h b/src/core/hle/service/hid/controllers/debug_pad.h
index 68b734248..e584b92ec 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -26,7 +26,7 @@ public:
26 void OnRelease() override; 26 void OnRelease() override;
27 27
28 // When the controller is requesting an update for the shared memory 28 // When the controller is requesting an update for the shared memory
29 void OnUpdate(u8* data, std::size_t size) override; 29 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
30 30
31 // Called when input devices should be loaded 31 // Called when input devices should be loaded
32 void OnLoadInputDevices() override; 32 void OnLoadInputDevices() override;
@@ -41,20 +41,20 @@ private:
41 struct PadState { 41 struct PadState {
42 union { 42 union {
43 u32_le raw{}; 43 u32_le raw{};
44 BitField<0, 1, u32_le> a; 44 BitField<0, 1, u32> a;
45 BitField<1, 1, u32_le> b; 45 BitField<1, 1, u32> b;
46 BitField<2, 1, u32_le> x; 46 BitField<2, 1, u32> x;
47 BitField<3, 1, u32_le> y; 47 BitField<3, 1, u32> y;
48 BitField<4, 1, u32_le> l; 48 BitField<4, 1, u32> l;
49 BitField<5, 1, u32_le> r; 49 BitField<5, 1, u32> r;
50 BitField<6, 1, u32_le> zl; 50 BitField<6, 1, u32> zl;
51 BitField<7, 1, u32_le> zr; 51 BitField<7, 1, u32> zr;
52 BitField<8, 1, u32_le> plus; 52 BitField<8, 1, u32> plus;
53 BitField<9, 1, u32_le> minus; 53 BitField<9, 1, u32> minus;
54 BitField<10, 1, u32_le> d_left; 54 BitField<10, 1, u32> d_left;
55 BitField<11, 1, u32_le> d_up; 55 BitField<11, 1, u32> d_up;
56 BitField<12, 1, u32_le> d_right; 56 BitField<12, 1, u32> d_right;
57 BitField<13, 1, u32_le> d_down; 57 BitField<13, 1, u32> d_down;
58 }; 58 };
59 }; 59 };
60 static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size"); 60 static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size");
@@ -62,7 +62,7 @@ private:
62 struct Attributes { 62 struct Attributes {
63 union { 63 union {
64 u32_le raw{}; 64 u32_le raw{};
65 BitField<0, 1, u32_le> connected; 65 BitField<0, 1, u32> connected;
66 }; 66 };
67 }; 67 };
68 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size"); 68 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 898572277..a179252e3 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}
17 17
18void Controller_Gesture::OnRelease() {} 18void Controller_Gesture::OnRelease() {}
19 19
20void Controller_Gesture::OnUpdate(u8* data, std::size_t size) { 20void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 shared_memory.header.timestamp = CoreTiming::GetTicks(); 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks();
22 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
23 24
24 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.h b/src/core/hle/service/hid/controllers/gesture.h
index 1056ffbcd..f305fe90f 100644
--- a/src/core/hle/service/hid/controllers/gesture.h
+++ b/src/core/hle/service/hid/controllers/gesture.h
@@ -22,7 +22,7 @@ public:
22 void OnRelease() override; 22 void OnRelease() override;
23 23
24 // When the controller is requesting an update for the shared memory 24 // When the controller is requesting an update for the shared memory
25 void OnUpdate(u8* data, size_t size) override; 25 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;
26 26
27 // Called when input devices should be loaded 27 // Called when input devices should be loaded
28 void OnLoadInputDevices() override; 28 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index ca75adc2b..92d7bfb52 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}
19 19
20void Controller_Keyboard::OnRelease() {} 20void Controller_Keyboard::OnRelease() {}
21 21
22void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) { 22void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
23 shared_memory.header.timestamp = CoreTiming::GetTicks(); 23 std::size_t size) {
24 shared_memory.header.timestamp = core_timing.GetTicks();
24 shared_memory.header.total_entry_count = 17; 25 shared_memory.header.total_entry_count = 17;
25 26
26 if (!IsControllerActivated()) { 27 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.h b/src/core/hle/service/hid/controllers/keyboard.h
index f52775456..73cd2c7bb 100644
--- a/src/core/hle/service/hid/controllers/keyboard.h
+++ b/src/core/hle/service/hid/controllers/keyboard.h
@@ -25,7 +25,7 @@ public:
25 void OnRelease() override; 25 void OnRelease() override;
26 26
27 // When the controller is requesting an update for the shared memory 27 // When the controller is requesting an update for the shared memory
28 void OnUpdate(u8* data, std::size_t size) override; 28 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
29 29
30 // Called when input devices should be loaded 30 // Called when input devices should be loaded
31 void OnLoadInputDevices() override; 31 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 63391dbe9..11ab096d9 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
17void Controller_Mouse::OnInit() {} 17void Controller_Mouse::OnInit() {}
18void Controller_Mouse::OnRelease() {} 18void Controller_Mouse::OnRelease() {}
19 19
20void Controller_Mouse::OnUpdate(u8* data, std::size_t size) { 20void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 shared_memory.header.timestamp = CoreTiming::GetTicks(); 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks();
22 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
23 24
24 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.h b/src/core/hle/service/hid/controllers/mouse.h
index 70b654d07..9d46eecbe 100644
--- a/src/core/hle/service/hid/controllers/mouse.h
+++ b/src/core/hle/service/hid/controllers/mouse.h
@@ -24,7 +24,7 @@ public:
24 void OnRelease() override; 24 void OnRelease() override;
25 25
26 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
27 void OnUpdate(u8* data, std::size_t size) override; 27 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
28 28
29 // Called when input devices should be loaded 29 // Called when input devices should be loaded
30 void OnLoadInputDevices() override; 30 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 04c8c35a8..e7fc7a619 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
288 rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX); 288 rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
289} 289}
290 290
291void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) { 291void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
292 std::size_t data_len) {
292 if (!IsControllerActivated()) 293 if (!IsControllerActivated())
293 return; 294 return;
294 for (std::size_t i = 0; i < shared_memory_entries.size(); i++) { 295 for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
308 const auto& last_entry = 309 const auto& last_entry =
309 main_controller->npad[main_controller->common.last_entry_index]; 310 main_controller->npad[main_controller->common.last_entry_index];
310 311
311 main_controller->common.timestamp = CoreTiming::GetTicks(); 312 main_controller->common.timestamp = core_timing.GetTicks();
312 main_controller->common.last_entry_index = 313 main_controller->common.last_entry_index =
313 (main_controller->common.last_entry_index + 1) % 17; 314 (main_controller->common.last_entry_index + 1) % 17;
314 315
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 106cf58c8..4ff50b3cd 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -30,7 +30,7 @@ public:
30 void OnRelease() override; 30 void OnRelease() override;
31 31
32 // When the controller is requesting an update for the shared memory 32 // When the controller is requesting an update for the shared memory
33 void OnUpdate(u8* data, std::size_t size) override; 33 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
34 34
35 // Called when input devices should be loaded 35 // Called when input devices should be loaded
36 void OnLoadInputDevices() override; 36 void OnLoadInputDevices() override;
@@ -39,13 +39,13 @@ public:
39 union { 39 union {
40 u32_le raw{}; 40 u32_le raw{};
41 41
42 BitField<0, 1, u32_le> pro_controller; 42 BitField<0, 1, u32> pro_controller;
43 BitField<1, 1, u32_le> handheld; 43 BitField<1, 1, u32> handheld;
44 BitField<2, 1, u32_le> joycon_dual; 44 BitField<2, 1, u32> joycon_dual;
45 BitField<3, 1, u32_le> joycon_left; 45 BitField<3, 1, u32> joycon_left;
46 BitField<4, 1, u32_le> joycon_right; 46 BitField<4, 1, u32> joycon_right;
47 47
48 BitField<6, 1, u32_le> pokeball; // TODO(ogniK): Confirm when possible 48 BitField<6, 1, u32> pokeball; // TODO(ogniK): Confirm when possible
49 }; 49 };
50 }; 50 };
51 static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size"); 51 static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size");
@@ -150,43 +150,43 @@ private:
150 union { 150 union {
151 u64_le raw{}; 151 u64_le raw{};
152 // Button states 152 // Button states
153 BitField<0, 1, u64_le> a; 153 BitField<0, 1, u64> a;
154 BitField<1, 1, u64_le> b; 154 BitField<1, 1, u64> b;
155 BitField<2, 1, u64_le> x; 155 BitField<2, 1, u64> x;
156 BitField<3, 1, u64_le> y; 156 BitField<3, 1, u64> y;
157 BitField<4, 1, u64_le> l_stick; 157 BitField<4, 1, u64> l_stick;
158 BitField<5, 1, u64_le> r_stick; 158 BitField<5, 1, u64> r_stick;
159 BitField<6, 1, u64_le> l; 159 BitField<6, 1, u64> l;
160 BitField<7, 1, u64_le> r; 160 BitField<7, 1, u64> r;
161 BitField<8, 1, u64_le> zl; 161 BitField<8, 1, u64> zl;
162 BitField<9, 1, u64_le> zr; 162 BitField<9, 1, u64> zr;
163 BitField<10, 1, u64_le> plus; 163 BitField<10, 1, u64> plus;
164 BitField<11, 1, u64_le> minus; 164 BitField<11, 1, u64> minus;
165 165
166 // D-Pad 166 // D-Pad
167 BitField<12, 1, u64_le> d_left; 167 BitField<12, 1, u64> d_left;
168 BitField<13, 1, u64_le> d_up; 168 BitField<13, 1, u64> d_up;
169 BitField<14, 1, u64_le> d_right; 169 BitField<14, 1, u64> d_right;
170 BitField<15, 1, u64_le> d_down; 170 BitField<15, 1, u64> d_down;
171 171
172 // Left JoyStick 172 // Left JoyStick
173 BitField<16, 1, u64_le> l_stick_left; 173 BitField<16, 1, u64> l_stick_left;
174 BitField<17, 1, u64_le> l_stick_up; 174 BitField<17, 1, u64> l_stick_up;
175 BitField<18, 1, u64_le> l_stick_right; 175 BitField<18, 1, u64> l_stick_right;
176 BitField<19, 1, u64_le> l_stick_down; 176 BitField<19, 1, u64> l_stick_down;
177 177
178 // Right JoyStick 178 // Right JoyStick
179 BitField<20, 1, u64_le> r_stick_left; 179 BitField<20, 1, u64> r_stick_left;
180 BitField<21, 1, u64_le> r_stick_up; 180 BitField<21, 1, u64> r_stick_up;
181 BitField<22, 1, u64_le> r_stick_right; 181 BitField<22, 1, u64> r_stick_right;
182 BitField<23, 1, u64_le> r_stick_down; 182 BitField<23, 1, u64> r_stick_down;
183 183
184 // Not always active? 184 // Not always active?
185 BitField<24, 1, u64_le> left_sl; 185 BitField<24, 1, u64> left_sl;
186 BitField<25, 1, u64_le> left_sr; 186 BitField<25, 1, u64> left_sr;
187 187
188 BitField<26, 1, u64_le> right_sl; 188 BitField<26, 1, u64> right_sl;
189 BitField<27, 1, u64_le> right_sr; 189 BitField<27, 1, u64> right_sr;
190 }; 190 };
191 }; 191 };
192 static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size"); 192 static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size");
@@ -200,12 +200,12 @@ private:
200 struct ConnectionState { 200 struct ConnectionState {
201 union { 201 union {
202 u32_le raw{}; 202 u32_le raw{};
203 BitField<0, 1, u32_le> IsConnected; 203 BitField<0, 1, u32> IsConnected;
204 BitField<1, 1, u32_le> IsWired; 204 BitField<1, 1, u32> IsWired;
205 BitField<2, 1, u32_le> IsLeftJoyConnected; 205 BitField<2, 1, u32> IsLeftJoyConnected;
206 BitField<3, 1, u32_le> IsLeftJoyWired; 206 BitField<3, 1, u32> IsLeftJoyWired;
207 BitField<4, 1, u32_le> IsRightJoyConnected; 207 BitField<4, 1, u32> IsRightJoyConnected;
208 BitField<5, 1, u32_le> IsRightJoyWired; 208 BitField<5, 1, u32> IsRightJoyWired;
209 }; 209 };
210 }; 210 };
211 static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size"); 211 static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size");
@@ -240,23 +240,23 @@ private:
240 struct NPadProperties { 240 struct NPadProperties {
241 union { 241 union {
242 s64_le raw{}; 242 s64_le raw{};
243 BitField<11, 1, s64_le> is_vertical; 243 BitField<11, 1, s64> is_vertical;
244 BitField<12, 1, s64_le> is_horizontal; 244 BitField<12, 1, s64> is_horizontal;
245 BitField<13, 1, s64_le> use_plus; 245 BitField<13, 1, s64> use_plus;
246 BitField<14, 1, s64_le> use_minus; 246 BitField<14, 1, s64> use_minus;
247 }; 247 };
248 }; 248 };
249 249
250 struct NPadDevice { 250 struct NPadDevice {
251 union { 251 union {
252 u32_le raw{}; 252 u32_le raw{};
253 BitField<0, 1, s32_le> pro_controller; 253 BitField<0, 1, s32> pro_controller;
254 BitField<1, 1, s32_le> handheld; 254 BitField<1, 1, s32> handheld;
255 BitField<2, 1, s32_le> handheld_left; 255 BitField<2, 1, s32> handheld_left;
256 BitField<3, 1, s32_le> handheld_right; 256 BitField<3, 1, s32> handheld_right;
257 BitField<4, 1, s32_le> joycon_left; 257 BitField<4, 1, s32> joycon_left;
258 BitField<5, 1, s32_le> joycon_right; 258 BitField<5, 1, s32> joycon_right;
259 BitField<6, 1, s32_le> pokeball; 259 BitField<6, 1, s32> pokeball;
260 }; 260 };
261 }; 261 };
262 262
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 02fcfadd9..946948f5e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}
16 16
17void Controller_Stubbed::OnRelease() {} 17void Controller_Stubbed::OnRelease() {}
18 18
19void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) { 19void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
20 std::size_t size) {
20 if (!smart_update) { 21 if (!smart_update) {
21 return; 22 return;
22 } 23 }
23 24
24 CommonHeader header{}; 25 CommonHeader header{};
25 header.timestamp = CoreTiming::GetTicks(); 26 header.timestamp = core_timing.GetTicks();
26 header.total_entry_count = 17; 27 header.total_entry_count = 17;
27 header.entry_count = 0; 28 header.entry_count = 0;
28 header.last_entry_index = 0; 29 header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/stubbed.h b/src/core/hle/service/hid/controllers/stubbed.h
index 4a21c643e..24469f03e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.h
+++ b/src/core/hle/service/hid/controllers/stubbed.h
@@ -20,7 +20,7 @@ public:
20 void OnRelease() override; 20 void OnRelease() override;
21 21
22 // When the controller is requesting an update for the shared memory 22 // When the controller is requesting an update for the shared memory
23 void OnUpdate(u8* data, std::size_t size) override; 23 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
24 24
25 // Called when input devices should be loaded 25 // Called when input devices should be loaded
26 void OnLoadInputDevices() override; 26 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index f666b1bd8..1a8445a43 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}
20 20
21void Controller_Touchscreen::OnRelease() {} 21void Controller_Touchscreen::OnRelease() {}
22 22
23void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { 23void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
24 shared_memory.header.timestamp = CoreTiming::GetTicks(); 24 std::size_t size) {
25 shared_memory.header.timestamp = core_timing.GetTicks();
25 shared_memory.header.total_entry_count = 17; 26 shared_memory.header.total_entry_count = 17;
26 27
27 if (!IsControllerActivated()) { 28 if (!IsControllerActivated()) {
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
48 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; 49 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
49 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; 50 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
50 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; 51 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
51 const u64 tick = CoreTiming::GetTicks(); 52 const u64 tick = core_timing.GetTicks();
52 touch_entry.delta_time = tick - last_touch; 53 touch_entry.delta_time = tick - last_touch;
53 last_touch = tick; 54 last_touch = tick;
54 touch_entry.finger = Settings::values.touchscreen.finger; 55 touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index 94cd0eba9..76fc340e9 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -24,7 +24,7 @@ public:
24 void OnRelease() override; 24 void OnRelease() override;
25 25
26 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
27 void OnUpdate(u8* data, std::size_t size) override; 27 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
28 28
29 // Called when input devices should be loaded 29 // Called when input devices should be loaded
30 void OnLoadInputDevices() override; 30 void OnLoadInputDevices() override;
@@ -33,8 +33,8 @@ private:
33 struct Attributes { 33 struct Attributes {
34 union { 34 union {
35 u32 raw{}; 35 u32 raw{};
36 BitField<0, 1, u32_le> start_touch; 36 BitField<0, 1, u32> start_touch;
37 BitField<1, 1, u32_le> end_touch; 37 BitField<1, 1, u32> end_touch;
38 }; 38 };
39 }; 39 };
40 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size"); 40 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index cd397c70b..1a9da9576 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}
17 17
18void Controller_XPad::OnRelease() {} 18void Controller_XPad::OnRelease() {}
19 19
20void Controller_XPad::OnUpdate(u8* data, std::size_t size) { 20void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 std::size_t size) {
21 for (auto& xpad_entry : shared_memory.shared_memory_entries) { 22 for (auto& xpad_entry : shared_memory.shared_memory_entries) {
22 xpad_entry.header.timestamp = CoreTiming::GetTicks(); 23 xpad_entry.header.timestamp = core_timing.GetTicks();
23 xpad_entry.header.total_entry_count = 17; 24 xpad_entry.header.total_entry_count = 17;
24 25
25 if (!IsControllerActivated()) { 26 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/xpad.h b/src/core/hle/service/hid/controllers/xpad.h
index ff836989f..2864e6617 100644
--- a/src/core/hle/service/hid/controllers/xpad.h
+++ b/src/core/hle/service/hid/controllers/xpad.h
@@ -22,7 +22,7 @@ public:
22 void OnRelease() override; 22 void OnRelease() override;
23 23
24 // When the controller is requesting an update for the shared memory 24 // When the controller is requesting an update for the shared memory
25 void OnUpdate(u8* data, std::size_t size) override; 25 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
26 26
27 // Called when input devices should be loaded 27 // Called when input devices should be loaded
28 void OnLoadInputDevices() override; 28 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 008bf3f02..63b55758b 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
36 36
37// Updating period for each HID device. 37// Updating period for each HID device.
38// TODO(ogniK): Find actual polling rate of hid 38// TODO(ogniK): Find actual polling rate of hid
39constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66; 39constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
40constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; 40constexpr s64 accelerometer_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
41constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; 41constexpr s64 gyroscope_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
42constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; 42constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
43 43
44IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") { 44IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -73,14 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
73 GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000); 73 GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);
74 74
75 // Register update callbacks 75 // Register update callbacks
76 auto& core_timing = Core::System::GetInstance().CoreTiming();
76 pad_update_event = 77 pad_update_event =
77 CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) { 78 core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
78 UpdateControllers(userdata, cycles_late); 79 UpdateControllers(userdata, cycles_late);
79 }); 80 });
80 81
81 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) 82 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
82 83
83 CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event); 84 core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);
84 85
85 ReloadInputDevices(); 86 ReloadInputDevices();
86} 87}
@@ -94,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
94} 95}
95 96
96IAppletResource ::~IAppletResource() { 97IAppletResource ::~IAppletResource() {
97 CoreTiming::UnscheduleEvent(pad_update_event, 0); 98 Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
98} 99}
99 100
100void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { 101void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -105,16 +106,18 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
105 rb.PushCopyObjects(shared_mem); 106 rb.PushCopyObjects(shared_mem);
106} 107}
107 108
108void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) { 109void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
110 auto& core_timing = Core::System::GetInstance().CoreTiming();
111
109 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); 112 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
110 for (const auto& controller : controllers) { 113 for (const auto& controller : controllers) {
111 if (should_reload) { 114 if (should_reload) {
112 controller->OnLoadInputDevices(); 115 controller->OnLoadInputDevices();
113 } 116 }
114 controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE); 117 controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
115 } 118 }
116 119
117 CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); 120 core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
118} 121}
119 122
120class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { 123class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index eca27c056..d3660cad2 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -4,10 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/hid/controllers/controller_base.h"
8#include "core/hle/service/service.h"
9
7#include "controllers/controller_base.h" 10#include "controllers/controller_base.h"
8#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
9 12
10namespace CoreTiming { 13namespace Core::Timing {
11struct EventType; 14struct EventType;
12} 15}
13 16
@@ -15,7 +18,7 @@ namespace Kernel {
15class SharedMemory; 18class SharedMemory;
16} 19}
17 20
18namespace SM { 21namespace Service::SM {
19class ServiceManager; 22class ServiceManager;
20} 23}
21 24
@@ -62,11 +65,11 @@ private:
62 } 65 }
63 66
64 void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx); 67 void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
65 void UpdateControllers(u64 userdata, int cycles_late); 68 void UpdateControllers(u64 userdata, s64 cycles_late);
66 69
67 Kernel::SharedPtr<Kernel::SharedMemory> shared_mem; 70 Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
68 71
69 CoreTiming::EventType* pad_update_event; 72 Core::Timing::EventType* pad_update_event;
70 73
71 std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)> 74 std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
72 controllers{}; 75 controllers{};
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 3c7f8b1ee..2c4625c99 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
98 98
99 IPC::ResponseBuilder rb{ctx, 5}; 99 IPC::ResponseBuilder rb{ctx, 5};
100 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
101 rb.PushRaw<u64>(CoreTiming::GetTicks()); 101 rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
102 rb.PushRaw<u32>(0); 102 rb.PushRaw<u32>(0);
103} 103}
104 104
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9df7ac50f..d65693fc7 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -319,15 +319,14 @@ public:
319 } 319 }
320 320
321 ASSERT(vm_manager 321 ASSERT(vm_manager
322 .MirrorMemory(*map_address, nro_addr, nro_size, 322 .MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
323 Kernel::MemoryState::ModuleCodeStatic)
324 .IsSuccess()); 323 .IsSuccess());
325 ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess()); 324 ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
326 325
327 if (bss_size > 0) { 326 if (bss_size > 0) {
328 ASSERT(vm_manager 327 ASSERT(vm_manager
329 .MirrorMemory(*map_address + nro_size, bss_addr, bss_size, 328 .MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
330 Kernel::MemoryState::ModuleCodeStatic) 329 Kernel::MemoryState::ModuleCode)
331 .IsSuccess()); 330 .IsSuccess());
332 ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess()); 331 ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
333 } 332 }
@@ -388,8 +387,7 @@ public:
388 const auto& nro_size = iter->second.size; 387 const auto& nro_size = iter->second.size;
389 388
390 ASSERT(vm_manager 389 ASSERT(vm_manager
391 .MirrorMemory(heap_addr, mapped_addr, nro_size, 390 .MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
392 Kernel::MemoryState::ModuleCodeStatic)
393 .IsSuccess()); 391 .IsSuccess());
394 ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess()); 392 ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());
395 393
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index 1f462e087..2a61593e2 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -42,7 +42,7 @@ private:
42 union { 42 union {
43 BitField<0, 16, Flags> flags; 43 BitField<0, 16, Flags> flags;
44 BitField<16, 8, Severity> severity; 44 BitField<16, 8, Severity> severity;
45 BitField<24, 8, u32_le> verbosity; 45 BitField<24, 8, u32> verbosity;
46 }; 46 };
47 u32_le payload_size; 47 u32_le payload_size;
48 48
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index 5c62d42ba..ca88bf97f 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -150,7 +150,7 @@ private:
150 150
151 IPC::ResponseBuilder rb{ctx, 3}; 151 IPC::ResponseBuilder rb{ctx, 3};
152 rb.Push(RESULT_SUCCESS); 152 rb.Push(RESULT_SUCCESS);
153 rb.PushRaw<u8>(Settings::values.enable_nfc); 153 rb.PushRaw<u8>(true);
154 } 154 }
155 155
156 void GetStateOld(Kernel::HLERequestContext& ctx) { 156 void GetStateOld(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 1c4482e47..c6babdd4d 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -335,7 +335,7 @@ void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
335} 335}
336 336
337bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) { 337bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
338 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 338 std::lock_guard lock{HLE::g_hle_lock};
339 if (buffer.size() < sizeof(AmiiboFile)) { 339 if (buffer.size() < sizeof(AmiiboFile)) {
340 return false; 340 return false;
341 } 341 }
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 0f02a1a18..4f6042b00 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -19,11 +19,11 @@ public:
19 virtual ~nvdevice() = default; 19 virtual ~nvdevice() = default;
20 union Ioctl { 20 union Ioctl {
21 u32_le raw; 21 u32_le raw;
22 BitField<0, 8, u32_le> cmd; 22 BitField<0, 8, u32> cmd;
23 BitField<8, 8, u32_le> group; 23 BitField<8, 8, u32> group;
24 BitField<16, 14, u32_le> length; 24 BitField<16, 14, u32> length;
25 BitField<30, 1, u32_le> is_in; 25 BitField<30, 1, u32> is_in;
26 BitField<31, 1, u32_le> is_out; 26 BitField<31, 1, u32> is_out;
27 }; 27 };
28 28
29 /** 29 /**
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 92acc57b1..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,11 +23,11 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
23 23
24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, 24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, 25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
26 const MathUtil::Rectangle<int>& crop_rect) { 26 const Common::Rectangle<int>& crop_rect) {
27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); 27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
28 LOG_WARNING(Service, 28 LOG_TRACE(Service,
29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
30 addr, offset, width, height, stride, format); 30 addr, offset, width, height, stride, format);
31 31
32 using PixelFormat = Tegra::FramebufferConfig::PixelFormat; 32 using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
33 const Tegra::FramebufferConfig framebuffer{ 33 const Tegra::FramebufferConfig framebuffer{
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..ace71169f 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 25 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
27 NVFlinger::BufferQueue::BufferTransformFlags transform, 27 NVFlinger::BufferQueue::BufferTransformFlags transform,
28 const MathUtil::Rectangle<int>& crop_rect); 28 const Common::Rectangle<int>& crop_rect);
29 29
30private: 30private:
31 std::shared_ptr<nvmap> nvmap_dev; 31 std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..af62d33d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
12#include "core/hle/service/nvdrv/devices/nvmap.h" 12#include "core/hle/service/nvdrv/devices/nvmap.h"
13#include "core/memory.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
@@ -88,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
88 for (const auto& entry : entries) { 89 for (const auto& entry : entries) {
89 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", 90 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
90 entry.offset, entry.nvmap_handle, entry.pages); 91 entry.offset, entry.nvmap_handle, entry.pages);
91 Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10; 92 GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
92 auto object = nvmap_dev->GetObject(entry.nvmap_handle); 93 auto object = nvmap_dev->GetObject(entry.nvmap_handle);
93 if (!object) { 94 if (!object) {
94 LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle); 95 LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -101,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
101 u64 size = static_cast<u64>(entry.pages) << 0x10; 102 u64 size = static_cast<u64>(entry.pages) << 0x10;
102 ASSERT(size <= object->size); 103 ASSERT(size <= object->size);
103 104
104 Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size); 105 GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
105 ASSERT(returned == offset); 106 ASSERT(returned == offset);
106 } 107 }
107 std::memcpy(output.data(), entries.data(), output.size()); 108 std::memcpy(output.data(), entries.data(), output.size());
@@ -172,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
172 return 0; 173 return 0;
173 } 174 }
174 175
175 auto& system_instance = Core::System::GetInstance(); 176 params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
176 177 itr->second.size);
177 // Remove this memory region from the rasterizer cache.
178 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
182
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184
185 buffer_mappings.erase(itr->second.offset); 178 buffer_mappings.erase(itr->second.offset);
186 179
187 std::memcpy(output.data(), &params, output.size()); 180 std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index d57a54ee8..45812d238 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,7 @@
5#include <cstring> 5#include <cstring>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/core_timing_util.h" 10#include "core/core_timing_util.h"
10#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
184 185
185 IoctlGetGpuTime params{}; 186 IoctlGetGpuTime params{};
186 std::memcpy(&params, input.data(), input.size()); 187 std::memcpy(&params, input.data(), input.size());
187 params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks()); 188 params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
188 std::memcpy(output.data(), &params, output.size()); 189 std::memcpy(output.data(), &params, output.size());
189 return 0; 190 return 0;
190} 191}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 63}
64 64
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const MathUtil::Rectangle<int>& crop_rect) { 66 const Common::Rectangle<int>& crop_rect) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 67 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 68 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 69 ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index b171f256c..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -13,10 +13,6 @@
13#include "core/hle/kernel/object.h" 13#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/writable_event.h" 14#include "core/hle/kernel/writable_event.h"
15 15
16namespace CoreTiming {
17struct EventType;
18}
19
20namespace Service::NVFlinger { 16namespace Service::NVFlinger {
21 17
22struct IGBPBuffer { 18struct IGBPBuffer {
@@ -71,14 +67,14 @@ public:
71 Status status = Status::Free; 67 Status status = Status::Free;
72 IGBPBuffer igbp_buffer; 68 IGBPBuffer igbp_buffer;
73 BufferTransformFlags transform; 69 BufferTransformFlags transform;
74 MathUtil::Rectangle<int> crop_rect; 70 Common::Rectangle<int> crop_rect;
75 }; 71 };
76 72
77 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
78 std::optional<u32> DequeueBuffer(u32 width, u32 height); 74 std::optional<u32> DequeueBuffer(u32 width, u32 height);
79 const IGBPBuffer& RequestBuffer(u32 slot) const; 75 const IGBPBuffer& RequestBuffer(u32 slot) const;
80 void QueueBuffer(u32 slot, BufferTransformFlags transform, 76 void QueueBuffer(u32 slot, BufferTransformFlags transform,
81 const MathUtil::Rectangle<int>& crop_rect); 77 const Common::Rectangle<int>& crop_rect);
82 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
83 void ReleaseBuffer(u32 slot); 79 void ReleaseBuffer(u32 slot);
84 u32 Query(QueryType type); 80 u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index cde06916d..c7f5bbf28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -14,32 +14,39 @@
14#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
15#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/readable_event.h" 16#include "core/hle/kernel/readable_event.h"
17#include "core/hle/kernel/writable_event.h"
18#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 17#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
19#include "core/hle/service/nvdrv/nvdrv.h" 18#include "core/hle/service/nvdrv/nvdrv.h"
20#include "core/hle/service/nvflinger/buffer_queue.h" 19#include "core/hle/service/nvflinger/buffer_queue.h"
21#include "core/hle/service/nvflinger/nvflinger.h" 20#include "core/hle/service/nvflinger/nvflinger.h"
21#include "core/hle/service/vi/display/vi_display.h"
22#include "core/hle/service/vi/layer/vi_layer.h"
22#include "core/perf_stats.h" 23#include "core/perf_stats.h"
23#include "video_core/renderer_base.h" 24#include "video_core/renderer_base.h"
24 25
25namespace Service::NVFlinger { 26namespace Service::NVFlinger {
26 27
27constexpr std::size_t SCREEN_REFRESH_RATE = 60; 28constexpr std::size_t SCREEN_REFRESH_RATE = 60;
28constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); 29constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
30
31NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
32 displays.emplace_back(0, "Default");
33 displays.emplace_back(1, "External");
34 displays.emplace_back(2, "Edid");
35 displays.emplace_back(3, "Internal");
36 displays.emplace_back(4, "Null");
29 37
30NVFlinger::NVFlinger() {
31 // Schedule the screen composition events 38 // Schedule the screen composition events
32 composition_event = 39 composition_event =
33 CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { 40 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
34 Compose(); 41 Compose();
35 CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event); 42 this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
36 }); 43 });
37 44
38 CoreTiming::ScheduleEvent(frame_ticks, composition_event); 45 core_timing.ScheduleEvent(frame_ticks, composition_event);
39} 46}
40 47
41NVFlinger::~NVFlinger() { 48NVFlinger::~NVFlinger() {
42 CoreTiming::UnscheduleEvent(composition_event, 0); 49 core_timing.UnscheduleEvent(composition_event, 0);
43} 50}
44 51
45void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { 52void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
@@ -52,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
52 // TODO(Subv): Currently we only support the Default display. 59 // TODO(Subv): Currently we only support the Default display.
53 ASSERT(name == "Default"); 60 ASSERT(name == "Default");
54 61
55 const auto itr = std::find_if(displays.begin(), displays.end(), 62 const auto itr =
56 [&](const Display& display) { return display.name == name; }); 63 std::find_if(displays.begin(), displays.end(),
64 [&](const VI::Display& display) { return display.GetName() == name; });
57 if (itr == displays.end()) { 65 if (itr == displays.end()) {
58 return {}; 66 return {};
59 } 67 }
60 68
61 return itr->id; 69 return itr->GetID();
62} 70}
63 71
64std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { 72std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -68,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
68 return {}; 76 return {};
69 } 77 }
70 78
71 ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
72
73 const u64 layer_id = next_layer_id++; 79 const u64 layer_id = next_layer_id++;
74 const u32 buffer_queue_id = next_buffer_queue_id++; 80 const u32 buffer_queue_id = next_buffer_queue_id++;
75 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); 81 buffer_queues.emplace_back(buffer_queue_id, layer_id);
76 display->layers.emplace_back(layer_id, buffer_queue); 82 display->CreateLayer(layer_id, buffer_queues.back());
77 buffer_queues.emplace_back(std::move(buffer_queue));
78 return layer_id; 83 return layer_id;
79} 84}
80 85
@@ -85,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
85 return {}; 90 return {};
86 } 91 }
87 92
88 return layer->buffer_queue->GetId(); 93 return layer->GetBufferQueue().GetId();
89} 94}
90 95
91Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { 96Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -95,20 +100,29 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
95 return nullptr; 100 return nullptr;
96 } 101 }
97 102
98 return display->vsync_event.readable; 103 return display->GetVSyncEvent();
104}
105
106BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
107 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
108 [id](const auto& queue) { return queue.GetId() == id; });
109
110 ASSERT(itr != buffer_queues.end());
111 return *itr;
99} 112}
100 113
101std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { 114const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
102 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 115 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
103 [&](const auto& queue) { return queue->GetId() == id; }); 116 [id](const auto& queue) { return queue.GetId() == id; });
104 117
105 ASSERT(itr != buffer_queues.end()); 118 ASSERT(itr != buffer_queues.end());
106 return *itr; 119 return *itr;
107} 120}
108 121
109Display* NVFlinger::FindDisplay(u64 display_id) { 122VI::Display* NVFlinger::FindDisplay(u64 display_id) {
110 const auto itr = std::find_if(displays.begin(), displays.end(), 123 const auto itr =
111 [&](const Display& display) { return display.id == display_id; }); 124 std::find_if(displays.begin(), displays.end(),
125 [&](const VI::Display& display) { return display.GetID() == display_id; });
112 126
113 if (itr == displays.end()) { 127 if (itr == displays.end()) {
114 return nullptr; 128 return nullptr;
@@ -117,9 +131,10 @@ Display* NVFlinger::FindDisplay(u64 display_id) {
117 return &*itr; 131 return &*itr;
118} 132}
119 133
120const Display* NVFlinger::FindDisplay(u64 display_id) const { 134const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
121 const auto itr = std::find_if(displays.begin(), displays.end(), 135 const auto itr =
122 [&](const Display& display) { return display.id == display_id; }); 136 std::find_if(displays.begin(), displays.end(),
137 [&](const VI::Display& display) { return display.GetID() == display_id; });
123 138
124 if (itr == displays.end()) { 139 if (itr == displays.end()) {
125 return nullptr; 140 return nullptr;
@@ -128,57 +143,41 @@ const Display* NVFlinger::FindDisplay(u64 display_id) const {
128 return &*itr; 143 return &*itr;
129} 144}
130 145
131Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) { 146VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
132 auto* const display = FindDisplay(display_id); 147 auto* const display = FindDisplay(display_id);
133 148
134 if (display == nullptr) { 149 if (display == nullptr) {
135 return nullptr; 150 return nullptr;
136 } 151 }
137 152
138 const auto itr = std::find_if(display->layers.begin(), display->layers.end(), 153 return display->FindLayer(layer_id);
139 [&](const Layer& layer) { return layer.id == layer_id; });
140
141 if (itr == display->layers.end()) {
142 return nullptr;
143 }
144
145 return &*itr;
146} 154}
147 155
148const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { 156const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
149 const auto* const display = FindDisplay(display_id); 157 const auto* const display = FindDisplay(display_id);
150 158
151 if (display == nullptr) { 159 if (display == nullptr) {
152 return nullptr; 160 return nullptr;
153 } 161 }
154 162
155 const auto itr = std::find_if(display->layers.begin(), display->layers.end(), 163 return display->FindLayer(layer_id);
156 [&](const Layer& layer) { return layer.id == layer_id; });
157
158 if (itr == display->layers.end()) {
159 return nullptr;
160 }
161
162 return &*itr;
163} 164}
164 165
165void NVFlinger::Compose() { 166void NVFlinger::Compose() {
166 for (auto& display : displays) { 167 for (auto& display : displays) {
167 // Trigger vsync for this display at the end of drawing 168 // Trigger vsync for this display at the end of drawing
168 SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); 169 SCOPE_EXIT({ display.SignalVSyncEvent(); });
169 170
170 // Don't do anything for displays without layers. 171 // Don't do anything for displays without layers.
171 if (display.layers.empty()) 172 if (!display.HasLayers())
172 continue; 173 continue;
173 174
174 // TODO(Subv): Support more than 1 layer. 175 // TODO(Subv): Support more than 1 layer.
175 ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); 176 VI::Layer& layer = display.GetLayer(0);
176 177 auto& buffer_queue = layer.GetBufferQueue();
177 Layer& layer = display.layers[0];
178 auto& buffer_queue = layer.buffer_queue;
179 178
180 // Search for a queued buffer and acquire it 179 // Search for a queued buffer and acquire it
181 auto buffer = buffer_queue->AcquireBuffer(); 180 auto buffer = buffer_queue.AcquireBuffer();
182 181
183 MicroProfileFlip(); 182 MicroProfileFlip();
184 183
@@ -187,7 +186,7 @@ void NVFlinger::Compose() {
187 186
188 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
189 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
190 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
191 continue; 190 continue;
192 } 191 }
193 192
@@ -203,19 +202,8 @@ void NVFlinger::Compose() {
203 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 202 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
204 buffer->get().transform, buffer->get().crop_rect); 203 buffer->get().transform, buffer->get().crop_rect);
205 204
206 buffer_queue->ReleaseBuffer(buffer->get().slot); 205 buffer_queue.ReleaseBuffer(buffer->get().slot);
207 } 206 }
208} 207}
209 208
210Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
211Layer::~Layer() = default;
212
213Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
214 auto& kernel = Core::System::GetInstance().Kernel();
215 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
216 fmt::format("Display VSync Event {}", id));
217}
218
219Display::~Display() = default;
220
221} // namespace Service::NVFlinger 209} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 4c55e99f4..c0a83fffb 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <memory> 7#include <memory>
9#include <optional> 8#include <optional>
10#include <string> 9#include <string>
@@ -14,9 +13,10 @@
14#include "common/common_types.h" 13#include "common/common_types.h"
15#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
16 15
17namespace CoreTiming { 16namespace Core::Timing {
17class CoreTiming;
18struct EventType; 18struct EventType;
19} 19} // namespace Core::Timing
20 20
21namespace Kernel { 21namespace Kernel {
22class ReadableEvent; 22class ReadableEvent;
@@ -25,34 +25,20 @@ class WritableEvent;
25 25
26namespace Service::Nvidia { 26namespace Service::Nvidia {
27class Module; 27class Module;
28} 28} // namespace Service::Nvidia
29
30namespace Service::VI {
31class Display;
32class Layer;
33} // namespace Service::VI
29 34
30namespace Service::NVFlinger { 35namespace Service::NVFlinger {
31 36
32class BufferQueue; 37class BufferQueue;
33 38
34struct Layer {
35 Layer(u64 id, std::shared_ptr<BufferQueue> queue);
36 ~Layer();
37
38 u64 id;
39 std::shared_ptr<BufferQueue> buffer_queue;
40};
41
42struct Display {
43 Display(u64 id, std::string name);
44 ~Display();
45
46 u64 id;
47 std::string name;
48
49 std::vector<Layer> layers;
50 Kernel::EventPair vsync_event;
51};
52
53class NVFlinger final { 39class NVFlinger final {
54public: 40public:
55 NVFlinger(); 41 explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
56 ~NVFlinger(); 42 ~NVFlinger();
57 43
58 /// Sets the NVDrv module instance to use to send buffers to the GPU. 44 /// Sets the NVDrv module instance to use to send buffers to the GPU.
@@ -79,7 +65,10 @@ public:
79 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; 65 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
80 66
81 /// Obtains a buffer queue identified by the ID. 67 /// Obtains a buffer queue identified by the ID.
82 std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; 68 BufferQueue& FindBufferQueue(u32 id);
69
70 /// Obtains a buffer queue identified by the ID.
71 const BufferQueue& FindBufferQueue(u32 id) const;
83 72
84 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 73 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
85 /// finished. 74 /// finished.
@@ -87,27 +76,21 @@ public:
87 76
88private: 77private:
89 /// Finds the display identified by the specified ID. 78 /// Finds the display identified by the specified ID.
90 Display* FindDisplay(u64 display_id); 79 VI::Display* FindDisplay(u64 display_id);
91 80
92 /// Finds the display identified by the specified ID. 81 /// Finds the display identified by the specified ID.
93 const Display* FindDisplay(u64 display_id) const; 82 const VI::Display* FindDisplay(u64 display_id) const;
94 83
95 /// Finds the layer identified by the specified ID in the desired display. 84 /// Finds the layer identified by the specified ID in the desired display.
96 Layer* FindLayer(u64 display_id, u64 layer_id); 85 VI::Layer* FindLayer(u64 display_id, u64 layer_id);
97 86
98 /// Finds the layer identified by the specified ID in the desired display. 87 /// Finds the layer identified by the specified ID in the desired display.
99 const Layer* FindLayer(u64 display_id, u64 layer_id) const; 88 const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
100 89
101 std::shared_ptr<Nvidia::Module> nvdrv; 90 std::shared_ptr<Nvidia::Module> nvdrv;
102 91
103 std::array<Display, 5> displays{{ 92 std::vector<VI::Display> displays;
104 {0, "Default"}, 93 std::vector<BufferQueue> buffer_queues;
105 {1, "External"},
106 {2, "Edid"},
107 {3, "Internal"},
108 {4, "Null"},
109 }};
110 std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
111 94
112 /// Id to use for the next layer that is created, this counter is shared among all displays. 95 /// Id to use for the next layer that is created, this counter is shared among all displays.
113 u64 next_layer_id = 1; 96 u64 next_layer_id = 1;
@@ -115,8 +98,11 @@ private:
115 /// layers. 98 /// layers.
116 u32 next_buffer_queue_id = 1; 99 u32 next_buffer_queue_id = 1;
117 100
118 /// CoreTiming event that handles screen composition. 101 /// Event that handles screen composition.
119 CoreTiming::EventType* composition_event; 102 Core::Timing::EventType* composition_event;
103
104 /// Core timing instance for registering/unregistering the composition event.
105 Core::Timing::CoreTiming& core_timing;
120}; 106};
121 107
122} // namespace Service::NVFlinger 108} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index d25b80ab0..00806b0ed 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -11,7 +11,6 @@
11#include "core/hle/ipc.h" 11#include "core/hle/ipc.h"
12#include "core/hle/ipc_helpers.h" 12#include "core/hle/ipc_helpers.h"
13#include "core/hle/kernel/client_port.h" 13#include "core/hle/kernel/client_port.h"
14#include "core/hle/kernel/handle_table.h"
15#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/server_port.h" 16#include "core/hle/kernel/server_port.h"
@@ -76,7 +75,8 @@ namespace Service {
76 * Creates a function string for logging, complete with the name (or header code, depending 75 * Creates a function string for logging, complete with the name (or header code, depending
77 * on what's passed in) the port name, and all the cmd_buff arguments. 76 * on what's passed in) the port name, and all the cmd_buff arguments.
78 */ 77 */
79[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name, 78[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
79 std::string_view port_name,
80 const u32* cmd_buff) { 80 const u32* cmd_buff) {
81 // Number of params == bits 0-5 + bits 6-11 81 // Number of params == bits 0-5 + bits 6-11
82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); 82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +158,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
158 return ReportUnimplementedFunction(ctx, info); 158 return ReportUnimplementedFunction(ctx, info);
159 } 159 }
160 160
161 LOG_TRACE( 161 LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
162 Service, "{}",
163 MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
164 handler_invoker(this, info->handler_callback, ctx); 162 handler_invoker(this, info->handler_callback, ctx);
165} 163}
166 164
@@ -169,7 +167,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
169 case IPC::CommandType::Close: { 167 case IPC::CommandType::Close: {
170 IPC::ResponseBuilder rb{context, 2}; 168 IPC::ResponseBuilder rb{context, 2};
171 rb.Push(RESULT_SUCCESS); 169 rb.Push(RESULT_SUCCESS);
172 return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead); 170 return IPC::ERR_REMOTE_PROCESS_DEAD;
173 } 171 }
174 case IPC::CommandType::ControlWithContext: 172 case IPC::CommandType::ControlWithContext:
175 case IPC::CommandType::Control: { 173 case IPC::CommandType::Control: {
@@ -194,10 +192,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
194// Module interface 192// Module interface
195 193
196/// Initialize ServiceManager 194/// Initialize ServiceManager
197void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) { 195void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
196 FileSys::VfsFilesystem& vfs) {
198 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it 197 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
199 // here and pass it into the respective InstallInterfaces functions. 198 // here and pass it into the respective InstallInterfaces functions.
200 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(); 199 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
201 200
202 SM::ServiceManager::InstallInterfaces(sm); 201 SM::ServiceManager::InstallInterfaces(sm);
203 202
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 029533628..830790269 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -14,6 +14,14 @@
14//////////////////////////////////////////////////////////////////////////////////////////////////// 14////////////////////////////////////////////////////////////////////////////////////////////////////
15// Namespace Service 15// Namespace Service
16 16
17namespace Core {
18class System;
19}
20
21namespace FileSys {
22class VfsFilesystem;
23}
24
17namespace Kernel { 25namespace Kernel {
18class ClientPort; 26class ClientPort;
19class ServerPort; 27class ServerPort;
@@ -21,10 +29,6 @@ class ServerSession;
21class HLERequestContext; 29class HLERequestContext;
22} // namespace Kernel 30} // namespace Kernel
23 31
24namespace FileSys {
25class VfsFilesystem;
26}
27
28namespace Service { 32namespace Service {
29 33
30namespace SM { 34namespace SM {
@@ -178,7 +182,8 @@ private:
178}; 182};
179 183
180/// Initialize ServiceManager 184/// Initialize ServiceManager
181void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs); 185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
186 FileSys::VfsFilesystem& vfs);
182 187
183/// Shutdown ServiceManager 188/// Shutdown ServiceManager
184void Shutdown(); 189void Shutdown();
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index c9b4da5b0..ecee554bf 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -2,13 +2,88 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/file_sys/errors.h"
8#include "core/file_sys/system_archive/system_version.h"
6#include "core/hle/ipc_helpers.h" 9#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/client_port.h" 10#include "core/hle/kernel/client_port.h"
11#include "core/hle/service/filesystem/filesystem.h"
8#include "core/hle/service/set/set_sys.h" 12#include "core/hle/service/set/set_sys.h"
9 13
10namespace Service::Set { 14namespace Service::Set {
11 15
16namespace {
17constexpr u64 SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET = 0x05;
18
19enum class GetFirmwareVersionType {
20 Version1,
21 Version2,
22};
23
24void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionType type) {
25 LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'",
26 FileSys::SystemArchive::GetLongDisplayVersion());
27
28 ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100,
29 "FirmwareVersion output buffer must be 0x100 bytes in size!");
30
31 // Instead of using the normal procedure of checking for the real system archive and if it
32 // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a
33 // used is using a really old or really new SystemVersion title. The synthesized one ensures
34 // consistence (currently reports as 5.1.0-0.0)
35 const auto archive = FileSys::SystemArchive::SystemVersion();
36
37 const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) {
38 LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
39 desc.c_str());
40 IPC::ResponseBuilder rb{ctx, 2};
41 rb.Push(code);
42 };
43
44 if (archive == nullptr) {
45 early_exit_failure("The system version archive couldn't be synthesized.",
46 FileSys::ERROR_FAILED_MOUNT_ARCHIVE);
47 return;
48 }
49
50 const auto ver_file = archive->GetFile("file");
51 if (ver_file == nullptr) {
52 early_exit_failure("The system version archive didn't contain the file 'file'.",
53 FileSys::ERROR_INVALID_ARGUMENT);
54 return;
55 }
56
57 auto data = ver_file->ReadAllBytes();
58 if (data.size() != 0x100) {
59 early_exit_failure("The system version file 'file' was not the correct size.",
60 FileSys::ERROR_OUT_OF_BOUNDS);
61 return;
62 }
63
64 // If the command is GetFirmwareVersion (as opposed to GetFirmwareVersion2), hardware will
65 // zero out the REVISION_MINOR field.
66 if (type == GetFirmwareVersionType::Version1) {
67 data[SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET] = 0;
68 }
69
70 ctx.WriteBuffer(data);
71
72 IPC::ResponseBuilder rb{ctx, 2};
73 rb.Push(RESULT_SUCCESS);
74}
75} // Anonymous namespace
76
77void SET_SYS::GetFirmwareVersion(Kernel::HLERequestContext& ctx) {
78 LOG_DEBUG(Service_SET, "called");
79 GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1);
80}
81
82void SET_SYS::GetFirmwareVersion2(Kernel::HLERequestContext& ctx) {
83 LOG_DEBUG(Service_SET, "called");
84 GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2);
85}
86
12void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) { 87void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) {
13 LOG_DEBUG(Service_SET, "called"); 88 LOG_DEBUG(Service_SET, "called");
14 89
@@ -33,8 +108,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
33 {0, nullptr, "SetLanguageCode"}, 108 {0, nullptr, "SetLanguageCode"},
34 {1, nullptr, "SetNetworkSettings"}, 109 {1, nullptr, "SetNetworkSettings"},
35 {2, nullptr, "GetNetworkSettings"}, 110 {2, nullptr, "GetNetworkSettings"},
36 {3, nullptr, "GetFirmwareVersion"}, 111 {3, &SET_SYS::GetFirmwareVersion, "GetFirmwareVersion"},
37 {4, nullptr, "GetFirmwareVersion2"}, 112 {4, &SET_SYS::GetFirmwareVersion2, "GetFirmwareVersion2"},
38 {5, nullptr, "GetFirmwareVersionDigest"}, 113 {5, nullptr, "GetFirmwareVersionDigest"},
39 {7, nullptr, "GetLockScreenFlag"}, 114 {7, nullptr, "GetLockScreenFlag"},
40 {8, nullptr, "SetLockScreenFlag"}, 115 {8, nullptr, "SetLockScreenFlag"},
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index f602f3c77..13ee2cf46 100644
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -20,6 +20,8 @@ private:
20 BasicBlack = 1, 20 BasicBlack = 1,
21 }; 21 };
22 22
23 void GetFirmwareVersion(Kernel::HLERequestContext& ctx);
24 void GetFirmwareVersion2(Kernel::HLERequestContext& ctx);
23 void GetColorSetId(Kernel::HLERequestContext& ctx); 25 void GetColorSetId(Kernel::HLERequestContext& ctx);
24 void SetColorSetId(Kernel::HLERequestContext& ctx); 26 void SetColorSetId(Kernel::HLERequestContext& ctx);
25 27
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index bef25433e..b9d6381b4 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -67,7 +67,7 @@ public:
67 if (port == nullptr) { 67 if (port == nullptr) {
68 return nullptr; 68 return nullptr;
69 } 69 }
70 return std::static_pointer_cast<T>(port->hle_handler); 70 return std::static_pointer_cast<T>(port->GetHLEHandler());
71 } 71 }
72 72
73 void InvokeControlRequest(Kernel::HLERequestContext& context); 73 void InvokeControlRequest(Kernel::HLERequestContext& context);
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index c13640ad8..aa115935d 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -5,6 +5,7 @@
5#include <chrono> 5#include <chrono>
6#include <ctime> 6#include <ctime>
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/core_timing_util.h" 10#include "core/core_timing_util.h"
10#include "core/hle/ipc_helpers.h" 11#include "core/hle/ipc_helpers.h"
@@ -106,8 +107,9 @@ private:
106 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { 107 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
107 LOG_DEBUG(Service_Time, "called"); 108 LOG_DEBUG(Service_Time, "called");
108 109
109 SteadyClockTimePoint steady_clock_time_point{ 110 const auto& core_timing = Core::System::GetInstance().CoreTiming();
110 CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000}; 111 const SteadyClockTimePoint steady_clock_time_point{
112 Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
111 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; 113 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
112 rb.Push(RESULT_SUCCESS); 114 rb.Push(RESULT_SUCCESS);
113 rb.PushRaw(steady_clock_time_point); 115 rb.PushRaw(steady_clock_time_point);
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
281 return; 283 return;
282 } 284 }
283 285
286 const auto& core_timing = Core::System::GetInstance().CoreTiming();
284 const SteadyClockTimePoint steady_clock_time_point{ 287 const SteadyClockTimePoint steady_clock_time_point{
285 CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}}; 288 Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};
286 289
287 CalendarTime calendar_time{}; 290 CalendarTime calendar_time{};
288 calendar_time.year = tm->tm_year + 1900; 291 calendar_time.year = tm->tm_year + 1900;
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
new file mode 100644
index 000000000..01d80311b
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -0,0 +1,71 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "core/core.h"
12#include "core/hle/kernel/readable_event.h"
13#include "core/hle/service/vi/display/vi_display.h"
14#include "core/hle/service/vi/layer/vi_layer.h"
15
16namespace Service::VI {
17
18Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
19 auto& kernel = Core::System::GetInstance().Kernel();
20 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
21 fmt::format("Display VSync Event {}", id));
22}
23
24Display::~Display() = default;
25
26Layer& Display::GetLayer(std::size_t index) {
27 return layers.at(index);
28}
29
30const Layer& Display::GetLayer(std::size_t index) const {
31 return layers.at(index);
32}
33
34Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
35 return vsync_event.readable;
36}
37
38void Display::SignalVSyncEvent() {
39 vsync_event.writable->Signal();
40}
41
42void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
43 // TODO(Subv): Support more than 1 layer.
44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
45
46 layers.emplace_back(id, buffer_queue);
47}
48
49Layer* Display::FindLayer(u64 id) {
50 const auto itr = std::find_if(layers.begin(), layers.end(),
51 [id](const VI::Layer& layer) { return layer.GetID() == id; });
52
53 if (itr == layers.end()) {
54 return nullptr;
55 }
56
57 return &*itr;
58}
59
60const Layer* Display::FindLayer(u64 id) const {
61 const auto itr = std::find_if(layers.begin(), layers.end(),
62 [id](const VI::Layer& layer) { return layer.GetID() == id; });
63
64 if (itr == layers.end()) {
65 return nullptr;
66 }
67
68 return &*itr;
69}
70
71} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
new file mode 100644
index 000000000..2acd46ff8
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <vector>
9
10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12
13namespace Service::NVFlinger {
14class BufferQueue;
15}
16
17namespace Service::VI {
18
19class Layer;
20
21/// Represents a single display type
22class Display {
23public:
24 /// Constructs a display with a given unique ID and name.
25 ///
26 /// @param id The unique ID for this display.
27 /// @param name The name for this display.
28 ///
29 Display(u64 id, std::string name);
30 ~Display();
31
32 Display(const Display&) = delete;
33 Display& operator=(const Display&) = delete;
34
35 Display(Display&&) = default;
36 Display& operator=(Display&&) = default;
37
38 /// Gets the unique ID assigned to this display.
39 u64 GetID() const {
40 return id;
41 }
42
43 /// Gets the name of this display
44 const std::string& GetName() const {
45 return name;
46 }
47
48 /// Whether or not this display has any layers added to it.
49 bool HasLayers() const {
50 return !layers.empty();
51 }
52
53 /// Gets a layer for this display based off an index.
54 Layer& GetLayer(std::size_t index);
55
56 /// Gets a layer for this display based off an index.
57 const Layer& GetLayer(std::size_t index) const;
58
59 /// Gets the readable vsync event.
60 Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
61
62 /// Signals the internal vsync event.
63 void SignalVSyncEvent();
64
65 /// Creates and adds a layer to this display with the given ID.
66 ///
67 /// @param id The ID to assign to the created layer.
68 /// @param buffer_queue The buffer queue for the layer instance to use.
69 ///
70 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
71
72 /// Attempts to find a layer with the given ID.
73 ///
74 /// @param id The layer ID.
75 ///
76 /// @returns If found, the Layer instance with the given ID.
77 /// If not found, then nullptr is returned.
78 ///
79 Layer* FindLayer(u64 id);
80
81 /// Attempts to find a layer with the given ID.
82 ///
83 /// @param id The layer ID.
84 ///
85 /// @returns If found, the Layer instance with the given ID.
86 /// If not found, then nullptr is returned.
87 ///
88 const Layer* FindLayer(u64 id) const;
89
90private:
91 u64 id;
92 std::string name;
93
94 std::vector<Layer> layers;
95 Kernel::EventPair vsync_event;
96};
97
98} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp
new file mode 100644
index 000000000..954225c26
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/vi/layer/vi_layer.h"
6
7namespace Service::VI {
8
9Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
10
11Layer::~Layer() = default;
12
13} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h
new file mode 100644
index 000000000..c6bfd01f6
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -0,0 +1,52 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Service::NVFlinger {
10class BufferQueue;
11}
12
13namespace Service::VI {
14
15/// Represents a single display layer.
16class Layer {
17public:
18 /// Constructs a layer with a given ID and buffer queue.
19 ///
20 /// @param id The ID to assign to this layer.
21 /// @param queue The buffer queue for this layer to use.
22 ///
23 Layer(u64 id, NVFlinger::BufferQueue& queue);
24 ~Layer();
25
26 Layer(const Layer&) = delete;
27 Layer& operator=(const Layer&) = delete;
28
29 Layer(Layer&&) = default;
30 Layer& operator=(Layer&&) = delete;
31
32 /// Gets the ID for this layer.
33 u64 GetID() const {
34 return id;
35 }
36
37 /// Gets a reference to the buffer queue this layer is using.
38 NVFlinger::BufferQueue& GetBufferQueue() {
39 return buffer_queue;
40 }
41
42 /// Gets a const reference to the buffer queue this layer is using.
43 const NVFlinger::BufferQueue& GetBufferQueue() const {
44 return buffer_queue;
45 }
46
47private:
48 u64 id;
49 NVFlinger::BufferQueue& buffer_queue;
50};
51
52} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index a317a2885..566cd6006 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
24#include "core/hle/service/nvdrv/nvdrv.h" 24#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 25#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 26#include "core/hle/service/nvflinger/nvflinger.h"
27#include "core/hle/service/service.h"
27#include "core/hle/service/vi/vi.h" 28#include "core/hle/service/vi/vi.h"
28#include "core/hle/service/vi/vi_m.h" 29#include "core/hle/service/vi/vi_m.h"
29#include "core/hle/service/vi/vi_s.h" 30#include "core/hle/service/vi/vi_s.h"
@@ -33,6 +34,7 @@
33namespace Service::VI { 34namespace Service::VI {
34 35
35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; 36constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
37constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; 38constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
37constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7}; 39constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
38 40
@@ -420,7 +422,7 @@ public:
420 u32_le fence_is_valid; 422 u32_le fence_is_valid;
421 std::array<Fence, 2> fences; 423 std::array<Fence, 2> fences;
422 424
423 MathUtil::Rectangle<int> GetCropRect() const { 425 Common::Rectangle<int> GetCropRect() const {
424 return {crop_left, crop_top, crop_right, crop_bottom}; 426 return {crop_left, crop_top, crop_right, crop_bottom};
425 } 427 }
426 }; 428 };
@@ -525,7 +527,7 @@ private:
525 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 527 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
526 static_cast<u32>(transaction), flags); 528 static_cast<u32>(transaction), flags);
527 529
528 auto buffer_queue = nv_flinger->FindBufferQueue(id); 530 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
529 531
530 if (transaction == TransactionId::Connect) { 532 if (transaction == TransactionId::Connect) {
531 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 533 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +540,7 @@ private:
538 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 540 } else if (transaction == TransactionId::SetPreallocatedBuffer) {
539 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 541 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
540 542
541 buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); 543 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
542 544
543 IGBPSetPreallocatedBufferResponseParcel response{}; 545 IGBPSetPreallocatedBufferResponseParcel response{};
544 ctx.WriteBuffer(response.Serialize()); 546 ctx.WriteBuffer(response.Serialize());
@@ -546,7 +548,7 @@ private:
546 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 548 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
547 const u32 width{request.data.width}; 549 const u32 width{request.data.width};
548 const u32 height{request.data.height}; 550 const u32 height{request.data.height};
549 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 551 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
550 552
551 if (slot) { 553 if (slot) {
552 // Buffer is available 554 // Buffer is available
@@ -559,8 +561,8 @@ private:
559 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, 561 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
560 Kernel::ThreadWakeupReason reason) { 562 Kernel::ThreadWakeupReason reason) {
561 // Repeat TransactParcel DequeueBuffer when a buffer is available 563 // Repeat TransactParcel DequeueBuffer when a buffer is available
562 auto buffer_queue = nv_flinger->FindBufferQueue(id); 564 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
563 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 565 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
564 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 566 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
565 567
566 IGBPDequeueBufferResponseParcel response{*slot}; 568 IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +570,28 @@ private:
568 IPC::ResponseBuilder rb{ctx, 2}; 570 IPC::ResponseBuilder rb{ctx, 2};
569 rb.Push(RESULT_SUCCESS); 571 rb.Push(RESULT_SUCCESS);
570 }, 572 },
571 buffer_queue->GetWritableBufferWaitEvent()); 573 buffer_queue.GetWritableBufferWaitEvent());
572 } 574 }
573 } else if (transaction == TransactionId::RequestBuffer) { 575 } else if (transaction == TransactionId::RequestBuffer) {
574 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 576 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
575 577
576 auto& buffer = buffer_queue->RequestBuffer(request.slot); 578 auto& buffer = buffer_queue.RequestBuffer(request.slot);
577 579
578 IGBPRequestBufferResponseParcel response{buffer}; 580 IGBPRequestBufferResponseParcel response{buffer};
579 ctx.WriteBuffer(response.Serialize()); 581 ctx.WriteBuffer(response.Serialize());
580 } else if (transaction == TransactionId::QueueBuffer) { 582 } else if (transaction == TransactionId::QueueBuffer) {
581 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 583 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
582 584
583 buffer_queue->QueueBuffer(request.data.slot, request.data.transform, 585 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
584 request.data.GetCropRect()); 586 request.data.GetCropRect());
585 587
586 IGBPQueueBufferResponseParcel response{1280, 720}; 588 IGBPQueueBufferResponseParcel response{1280, 720};
587 ctx.WriteBuffer(response.Serialize()); 589 ctx.WriteBuffer(response.Serialize());
588 } else if (transaction == TransactionId::Query) { 590 } else if (transaction == TransactionId::Query) {
589 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 591 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
590 592
591 u32 value = 593 const u32 value =
592 buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); 594 buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
593 595
594 IGBPQueryResponseParcel response{value}; 596 IGBPQueryResponseParcel response{value};
595 ctx.WriteBuffer(response.Serialize()); 597 ctx.WriteBuffer(response.Serialize());
@@ -629,12 +631,12 @@ private:
629 631
630 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 632 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
631 633
632 const auto buffer_queue = nv_flinger->FindBufferQueue(id); 634 const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
633 635
634 // TODO(Subv): Find out what this actually is. 636 // TODO(Subv): Find out what this actually is.
635 IPC::ResponseBuilder rb{ctx, 2, 1}; 637 IPC::ResponseBuilder rb{ctx, 2, 1};
636 rb.Push(RESULT_SUCCESS); 638 rb.Push(RESULT_SUCCESS);
637 rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); 639 rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
638 } 640 }
639 641
640 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 642 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,6 +754,7 @@ public:
752 {1102, nullptr, "GetDisplayResolution"}, 754 {1102, nullptr, "GetDisplayResolution"},
753 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"}, 755 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
754 {2011, nullptr, "DestroyManagedLayer"}, 756 {2011, nullptr, "DestroyManagedLayer"},
757 {2012, nullptr, "CreateStrayLayer"},
755 {2050, nullptr, "CreateIndirectLayer"}, 758 {2050, nullptr, "CreateIndirectLayer"},
756 {2051, nullptr, "DestroyIndirectLayer"}, 759 {2051, nullptr, "DestroyIndirectLayer"},
757 {2052, nullptr, "CreateIndirectProducerEndPoint"}, 760 {2052, nullptr, "CreateIndirectProducerEndPoint"},
@@ -1202,26 +1205,40 @@ IApplicationDisplayService::IApplicationDisplayService(
1202 RegisterHandlers(functions); 1205 RegisterHandlers(functions);
1203} 1206}
1204 1207
1205Module::Interface::Interface(std::shared_ptr<Module> module, const char* name, 1208static bool IsValidServiceAccess(Permission permission, Policy policy) {
1206 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 1209 if (permission == Permission::User) {
1207 : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {} 1210 return policy == Policy::User;
1211 }
1212
1213 if (permission == Permission::System || permission == Permission::Manager) {
1214 return policy == Policy::User || policy == Policy::Compositor;
1215 }
1208 1216
1209Module::Interface::~Interface() = default; 1217 return false;
1218}
1210 1219
1211void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) { 1220void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
1212 LOG_WARNING(Service_VI, "(STUBBED) called"); 1221 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
1222 Permission permission) {
1223 IPC::RequestParser rp{ctx};
1224 const auto policy = rp.PopEnum<Policy>();
1225
1226 if (!IsValidServiceAccess(permission, policy)) {
1227 IPC::ResponseBuilder rb{ctx, 2};
1228 rb.Push(ERR_PERMISSION_DENIED);
1229 return;
1230 }
1213 1231
1214 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 1232 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1215 rb.Push(RESULT_SUCCESS); 1233 rb.Push(RESULT_SUCCESS);
1216 rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger); 1234 rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
1217} 1235}
1218 1236
1219void InstallInterfaces(SM::ServiceManager& service_manager, 1237void InstallInterfaces(SM::ServiceManager& service_manager,
1220 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { 1238 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
1221 auto module = std::make_shared<Module>(); 1239 std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
1222 std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager); 1240 std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
1223 std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager); 1241 std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
1224 std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
1225} 1242}
1226 1243
1227} // namespace Service::VI 1244} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index e3963502a..6b66f8b81 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/service.h" 7#include <memory>
8#include "common/common_types.h"
9
10namespace Kernel {
11class HLERequestContext;
12}
8 13
9namespace Service::NVFlinger { 14namespace Service::NVFlinger {
10class NVFlinger; 15class NVFlinger;
11} 16}
12 17
18namespace Service::SM {
19class ServiceManager;
20}
21
13namespace Service::VI { 22namespace Service::VI {
14 23
15enum class DisplayResolution : u32 { 24enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
19 UndockedHeight = 720, 28 UndockedHeight = 720,
20}; 29};
21 30
22class Module final { 31/// Permission level for a particular VI service instance
23public: 32enum class Permission {
24 class Interface : public ServiceFramework<Interface> { 33 User,
25 public: 34 System,
26 explicit Interface(std::shared_ptr<Module> module, const char* name, 35 Manager,
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 36};
28 ~Interface() override;
29
30 void GetDisplayService(Kernel::HLERequestContext& ctx);
31 37
32 protected: 38/// A policy type that may be requested via GetDisplayService and
33 std::shared_ptr<Module> module; 39/// GetDisplayServiceWithProxyNameExchange
34 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 40enum class Policy {
35 }; 41 User,
42 Compositor,
36}; 43};
37 44
45namespace detail {
46void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
47 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
48} // namespace detail
49
38/// Registers all VI services with the specified service manager. 50/// Registers all VI services with the specified service manager.
39void InstallInterfaces(SM::ServiceManager& service_manager, 51void InstallInterfaces(SM::ServiceManager& service_manager,
40 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 52 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp
index 207c06b16..06070087f 100644
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_m.h" 7#include "core/hle/service/vi/vi_m.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {2, &VI_M::GetDisplayService, "GetDisplayService"}, 14 {2, &VI_M::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_M::~VI_M() = default; 20VI_M::~VI_M() = default;
19 21
22void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h
index 487d58d50..290e06689 100644
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_M final : public Module::Interface { 19class VI_M final : public ServiceFramework<VI_M> {
12public: 20public:
13 explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_M() override; 22 ~VI_M() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp
index 920e6a1f6..57c596cc4 100644
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_s.h" 7#include "core/hle/service/vi/vi_s.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {1, &VI_S::GetDisplayService, "GetDisplayService"}, 14 {1, &VI_S::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_S::~VI_S() = default; 20VI_S::~VI_S() = default;
19 21
22void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h
index bbc31148f..47804dc0b 100644
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_S final : public Module::Interface { 19class VI_S final : public ServiceFramework<VI_S> {
12public: 20public:
13 explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_S() override; 22 ~VI_S() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp
index d81e410d6..9d5ceb608 100644
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_u.h" 7#include "core/hle/service/vi/vi_u.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {0, &VI_U::GetDisplayService, "GetDisplayService"}, 14 {0, &VI_U::GetDisplayService, "GetDisplayService"},
13 }; 15 };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
16 18
17VI_U::~VI_U() = default; 19VI_U::~VI_U() = default;
18 20
21void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
22 LOG_DEBUG(Service_VI, "called");
23
24 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
25}
26
19} // namespace Service::VI 27} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h
index b92f28c92..19bdb73b0 100644
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_U final : public Module::Interface { 19class VI_U final : public ServiceFramework<VI_U> {
12public: 20public:
13 explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_U() override; 22 ~VI_U() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6057c7f26..46ac372f6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/file_util.h" 10#include "common/file_util.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
13#include "core/hle/kernel/vm_manager.h" 14#include "core/hle/kernel/vm_manager.h"
14#include "core/loader/elf.h" 15#include "core/loader/elf.h"
@@ -340,7 +341,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
340 } 341 }
341 342
342 codeset.entrypoint = base_addr + header->e_entry; 343 codeset.entrypoint = base_addr + header->e_entry;
343 codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image)); 344 codeset.memory = std::move(program_image);
344 345
345 LOG_DEBUG(Loader, "Done loading."); 346 LOG_DEBUG(Loader, "Done loading.");
346 347
diff --git a/src/core/loader/linker.cpp b/src/core/loader/linker.cpp
deleted file mode 100644
index 57ca8c3ee..000000000
--- a/src/core/loader/linker.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_funcs.h"
8#include "common/logging/log.h"
9#include "common/swap.h"
10#include "core/loader/linker.h"
11#include "core/memory.h"
12
13namespace Loader {
14
15enum class RelocationType : u32 { ABS64 = 257, GLOB_DAT = 1025, JUMP_SLOT = 1026, RELATIVE = 1027 };
16
17enum DynamicType : u32 {
18 DT_NULL = 0,
19 DT_PLTRELSZ = 2,
20 DT_STRTAB = 5,
21 DT_SYMTAB = 6,
22 DT_RELA = 7,
23 DT_RELASZ = 8,
24 DT_STRSZ = 10,
25 DT_JMPREL = 23,
26};
27
28struct Elf64_Rela {
29 u64_le offset;
30 RelocationType type;
31 u32_le symbol;
32 s64_le addend;
33};
34static_assert(sizeof(Elf64_Rela) == 0x18, "Elf64_Rela has incorrect size.");
35
36struct Elf64_Dyn {
37 u64_le tag;
38 u64_le value;
39};
40static_assert(sizeof(Elf64_Dyn) == 0x10, "Elf64_Dyn has incorrect size.");
41
42struct Elf64_Sym {
43 u32_le name;
44 INSERT_PADDING_BYTES(0x2);
45 u16_le shndx;
46 u64_le value;
47 u64_le size;
48};
49static_assert(sizeof(Elf64_Sym) == 0x18, "Elf64_Sym has incorrect size.");
50
51void Linker::WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
52 u64 relocation_offset, u64 size, VAddr load_base) {
53 for (u64 i = 0; i < size; i += sizeof(Elf64_Rela)) {
54 Elf64_Rela rela;
55 std::memcpy(&rela, &program_image[relocation_offset + i], sizeof(Elf64_Rela));
56
57 const Symbol& symbol = symbols[rela.symbol];
58 switch (rela.type) {
59 case RelocationType::RELATIVE: {
60 const u64 value = load_base + rela.addend;
61 if (!symbol.name.empty()) {
62 exports[symbol.name] = value;
63 }
64 std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
65 break;
66 }
67 case RelocationType::JUMP_SLOT:
68 case RelocationType::GLOB_DAT:
69 if (!symbol.value) {
70 imports[symbol.name] = {rela.offset + load_base, 0};
71 } else {
72 exports[symbol.name] = symbol.value;
73 std::memcpy(&program_image[rela.offset], &symbol.value, sizeof(u64));
74 }
75 break;
76 case RelocationType::ABS64:
77 if (!symbol.value) {
78 imports[symbol.name] = {rela.offset + load_base, rela.addend};
79 } else {
80 const u64 value = symbol.value + rela.addend;
81 exports[symbol.name] = value;
82 std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
83 }
84 break;
85 default:
86 LOG_CRITICAL(Loader, "Unknown relocation type: {}", static_cast<int>(rela.type));
87 break;
88 }
89 }
90}
91
92void Linker::Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base) {
93 std::map<u64, u64> dynamic;
94 while (dynamic_section_offset < program_image.size()) {
95 Elf64_Dyn dyn;
96 std::memcpy(&dyn, &program_image[dynamic_section_offset], sizeof(Elf64_Dyn));
97 dynamic_section_offset += sizeof(Elf64_Dyn);
98
99 if (dyn.tag == DT_NULL) {
100 break;
101 }
102 dynamic[dyn.tag] = dyn.value;
103 }
104
105 u64 offset = dynamic[DT_SYMTAB];
106 std::vector<Symbol> symbols;
107 while (offset < program_image.size()) {
108 Elf64_Sym sym;
109 std::memcpy(&sym, &program_image[offset], sizeof(Elf64_Sym));
110 offset += sizeof(Elf64_Sym);
111
112 if (sym.name >= dynamic[DT_STRSZ]) {
113 break;
114 }
115
116 std::string name = reinterpret_cast<char*>(&program_image[dynamic[DT_STRTAB] + sym.name]);
117 if (sym.value) {
118 exports[name] = load_base + sym.value;
119 symbols.emplace_back(std::move(name), load_base + sym.value);
120 } else {
121 symbols.emplace_back(std::move(name), 0);
122 }
123 }
124
125 if (dynamic.find(DT_RELA) != dynamic.end()) {
126 WriteRelocations(program_image, symbols, dynamic[DT_RELA], dynamic[DT_RELASZ], load_base);
127 }
128
129 if (dynamic.find(DT_JMPREL) != dynamic.end()) {
130 WriteRelocations(program_image, symbols, dynamic[DT_JMPREL], dynamic[DT_PLTRELSZ],
131 load_base);
132 }
133}
134
135void Linker::ResolveImports() {
136 // Resolve imports
137 for (const auto& import : imports) {
138 const auto& search = exports.find(import.first);
139 if (search != exports.end()) {
140 Memory::Write64(import.second.ea, search->second + import.second.addend);
141 } else {
142 LOG_ERROR(Loader, "Unresolved import: {}", import.first);
143 }
144 }
145}
146
147} // namespace Loader
diff --git a/src/core/loader/linker.h b/src/core/loader/linker.h
deleted file mode 100644
index 107625837..000000000
--- a/src/core/loader/linker.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <string>
9#include "common/common_types.h"
10
11namespace Loader {
12
13class Linker {
14protected:
15 struct Symbol {
16 Symbol(std::string&& name, u64 value) : name(std::move(name)), value(value) {}
17 std::string name;
18 u64 value;
19 };
20
21 struct Import {
22 VAddr ea;
23 s64 addend;
24 };
25
26 void WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
27 u64 relocation_offset, u64 size, VAddr load_base);
28 void Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base);
29
30 void ResolveImports();
31
32 std::map<std::string, Import> imports;
33 std::map<std::string, VAddr> exports;
34};
35
36} // namespace Loader
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 4fad0c0dd..31e4a0c84 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -14,6 +14,7 @@
14#include "core/file_sys/romfs_factory.h" 14#include "core/file_sys/romfs_factory.h"
15#include "core/file_sys/vfs_offset.h" 15#include "core/file_sys/vfs_offset.h"
16#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
17#include "core/hle/kernel/code_set.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
19#include "core/hle/service/filesystem/filesystem.h" 20#include "core/hle/service/filesystem/filesystem.h"
@@ -186,7 +187,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
186 program_image.resize(static_cast<u32>(program_image.size()) + bss_size); 187 program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
187 188
188 // Load codeset for current process 189 // Load codeset for current process
189 codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image)); 190 codeset.memory = std::move(program_image);
190 process.LoadModule(std::move(codeset), load_base); 191 process.LoadModule(std::move(codeset), load_base);
191 192
192 // Register module with GDBStub 193 // Register module with GDBStub
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 013d629c0..85b0ed644 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <string> 8#include <string>
8#include <vector> 9#include <vector>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "core/loader/linker.h"
11#include "core/loader/loader.h" 11#include "core/loader/loader.h"
12 12
13namespace FileSys { 13namespace FileSys {
@@ -21,7 +21,7 @@ class Process;
21namespace Loader { 21namespace Loader {
22 22
23/// Loads an NRO file 23/// Loads an NRO file
24class AppLoader_NRO final : public AppLoader, Linker { 24class AppLoader_NRO final : public AppLoader {
25public: 25public:
26 explicit AppLoader_NRO(FileSys::VirtualFile file); 26 explicit AppLoader_NRO(FileSys::VirtualFile file);
27 ~AppLoader_NRO() override; 27 ~AppLoader_NRO() override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 6ded0b707..ffe2eea8a 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -4,13 +4,17 @@
4 4
5#include <cinttypes> 5#include <cinttypes>
6#include <vector> 6#include <vector>
7#include <lz4.h> 7
8#include "common/common_funcs.h" 8#include "common/common_funcs.h"
9#include "common/file_util.h" 9#include "common/file_util.h"
10#include "common/hex_util.h"
10#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/lz4_compression.h"
11#include "common/swap.h" 13#include "common/swap.h"
14#include "core/core.h"
12#include "core/file_sys/patch_manager.h" 15#include "core/file_sys/patch_manager.h"
13#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
17#include "core/hle/kernel/code_set.h"
14#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
16#include "core/loader/nso.h" 20#include "core/loader/nso.h"
@@ -18,36 +22,8 @@
18#include "core/settings.h" 22#include "core/settings.h"
19 23
20namespace Loader { 24namespace Loader {
21 25namespace {
22struct NsoSegmentHeader { 26struct MODHeader {
23 u32_le offset;
24 u32_le location;
25 u32_le size;
26 union {
27 u32_le alignment;
28 u32_le bss_size;
29 };
30};
31static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
32
33struct NsoHeader {
34 u32_le magic;
35 u32_le version;
36 INSERT_PADDING_WORDS(1);
37 u8 flags;
38 std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
39 std::array<u8, 0x20> build_id;
40 std::array<u32_le, 3> segments_compressed_size;
41
42 bool IsSegmentCompressed(size_t segment_num) const {
43 ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
44 return ((flags >> segment_num) & 1);
45 }
46};
47static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
48static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
49
50struct ModHeader {
51 u32_le magic; 27 u32_le magic;
52 u32_le dynamic_offset; 28 u32_le dynamic_offset;
53 u32_le bss_start_offset; 29 u32_le bss_start_offset;
@@ -56,7 +32,28 @@ struct ModHeader {
56 u32_le eh_frame_hdr_end_offset; 32 u32_le eh_frame_hdr_end_offset;
57 u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base 33 u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base
58}; 34};
59static_assert(sizeof(ModHeader) == 0x1c, "ModHeader has incorrect size."); 35static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
36
37std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
38 const NSOSegmentHeader& header) {
39 const std::vector<u8> uncompressed_data =
40 Common::Compression::DecompressDataLZ4(compressed_data, header.size);
41
42 ASSERT_MSG(uncompressed_data.size() == static_cast<int>(header.size), "{} != {}", header.size,
43 uncompressed_data.size());
44
45 return uncompressed_data;
46}
47
48constexpr u32 PageAlignSize(u32 size) {
49 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
50}
51} // Anonymous namespace
52
53bool NSOHeader::IsSegmentCompressed(size_t segment_num) const {
54 ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
55 return ((flags >> segment_num) & 1) != 0;
56}
60 57
61AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {} 58AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}
62 59
@@ -73,38 +70,22 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
73 return FileType::NSO; 70 return FileType::NSO;
74} 71}
75 72
76static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
77 const NsoSegmentHeader& header) {
78 std::vector<u8> uncompressed_data(header.size);
79 const int bytes_uncompressed =
80 LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
81 reinterpret_cast<char*>(uncompressed_data.data()),
82 static_cast<int>(compressed_data.size()), header.size);
83
84 ASSERT_MSG(bytes_uncompressed == static_cast<int>(header.size) &&
85 bytes_uncompressed == static_cast<int>(uncompressed_data.size()),
86 "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
87
88 return uncompressed_data;
89}
90
91static constexpr u32 PageAlignSize(u32 size) {
92 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
93}
94
95std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, 73std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
96 const FileSys::VfsFile& file, VAddr load_base, 74 const FileSys::VfsFile& file, VAddr load_base,
97 bool should_pass_arguments, 75 bool should_pass_arguments,
98 std::optional<FileSys::PatchManager> pm) { 76 std::optional<FileSys::PatchManager> pm) {
99 if (file.GetSize() < sizeof(NsoHeader)) 77 if (file.GetSize() < sizeof(NSOHeader)) {
100 return {}; 78 return {};
79 }
101 80
102 NsoHeader nso_header{}; 81 NSOHeader nso_header{};
103 if (sizeof(NsoHeader) != file.ReadObject(&nso_header)) 82 if (sizeof(NSOHeader) != file.ReadObject(&nso_header)) {
104 return {}; 83 return {};
84 }
105 85
106 if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) 86 if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
107 return {}; 87 return {};
88 }
108 89
109 // Build program image 90 // Build program image
110 Kernel::CodeSet codeset; 91 Kernel::CodeSet codeset;
@@ -140,10 +121,10 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
140 std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32)); 121 std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
141 122
142 // Read MOD header 123 // Read MOD header
143 ModHeader mod_header{}; 124 MODHeader mod_header{};
144 // Default .bss to size in segment header if MOD0 section doesn't exist 125 // Default .bss to size in segment header if MOD0 section doesn't exist
145 u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)}; 126 u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
146 std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader)); 127 std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader));
147 const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')}; 128 const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
148 if (has_mod_header) { 129 if (has_mod_header) {
149 // Resize program image to include .bss section and page align each section 130 // Resize program image to include .bss section and page align each section
@@ -155,17 +136,29 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
155 136
156 // Apply patches if necessary 137 // Apply patches if necessary
157 if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) { 138 if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
158 std::vector<u8> pi_header(program_image.size() + 0x100); 139 std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size());
159 std::memcpy(pi_header.data(), &nso_header, sizeof(NsoHeader)); 140 pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
160 std::memcpy(pi_header.data() + 0x100, program_image.data(), program_image.size()); 141 reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
142 pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
143 program_image.end());
161 144
162 pi_header = pm->PatchNSO(pi_header); 145 pi_header = pm->PatchNSO(pi_header);
163 146
164 std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size()); 147 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
148 }
149
150 // Apply cheats if they exist and the program has a valid title ID
151 if (pm) {
152 auto& system = Core::System::GetInstance();
153 const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
154 if (!cheats.empty()) {
155 system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
156 load_base, load_base + program_image.size());
157 }
165 } 158 }
166 159
167 // Load codeset for current process 160 // Load codeset for current process
168 codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image)); 161 codeset.memory = std::move(program_image);
169 process.LoadModule(std::move(codeset), load_base); 162 process.LoadModule(std::move(codeset), load_base);
170 163
171 // Register module with GDBStub 164 // Register module with GDBStub
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 135b6ea5a..4674c3724 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,10 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <optional> 8#include <optional>
9#include <type_traits>
8#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/swap.h"
9#include "core/file_sys/patch_manager.h" 12#include "core/file_sys/patch_manager.h"
10#include "core/loader/linker.h"
11#include "core/loader/loader.h" 13#include "core/loader/loader.h"
12 14
13namespace Kernel { 15namespace Kernel {
@@ -16,6 +18,43 @@ class Process;
16 18
17namespace Loader { 19namespace Loader {
18 20
21struct NSOSegmentHeader {
22 u32_le offset;
23 u32_le location;
24 u32_le size;
25 union {
26 u32_le alignment;
27 u32_le bss_size;
28 };
29};
30static_assert(sizeof(NSOSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
31
32struct NSOHeader {
33 using SHA256Hash = std::array<u8, 0x20>;
34
35 struct RODataRelativeExtent {
36 u32_le data_offset;
37 u32_le size;
38 };
39
40 u32_le magic;
41 u32_le version;
42 u32 reserved;
43 u32_le flags;
44 std::array<NSOSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
45 std::array<u8, 0x20> build_id;
46 std::array<u32_le, 3> segments_compressed_size;
47 std::array<u8, 0x1C> padding;
48 RODataRelativeExtent api_info_extent;
49 RODataRelativeExtent dynstr_extent;
50 RODataRelativeExtent dynsyn_extent;
51 std::array<SHA256Hash, 3> segment_hashes;
52
53 bool IsSegmentCompressed(size_t segment_num) const;
54};
55static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size.");
56static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable.");
57
19constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000; 58constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000;
20 59
21struct NSOArgumentHeader { 60struct NSOArgumentHeader {
@@ -26,7 +65,7 @@ struct NSOArgumentHeader {
26static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size."); 65static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size.");
27 66
28/// Loads an NSO file 67/// Loads an NSO file
29class AppLoader_NSO final : public AppLoader, Linker { 68class AppLoader_NSO final : public AppLoader {
30public: 69public:
31 explicit AppLoader_NSO(FileSys::VirtualFile file); 70 explicit AppLoader_NSO(FileSys::VirtualFile file);
32 71
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..332c1037c 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/page_table.h"
13#include "common/swap.h" 14#include "common/swap.h"
14#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
15#include "core/core.h" 16#include "core/core.h"
@@ -18,13 +19,14 @@
18#include "core/hle/lock.h" 19#include "core/hle/lock.h"
19#include "core/memory.h" 20#include "core/memory.h"
20#include "core/memory_setup.h" 21#include "core/memory_setup.h"
22#include "video_core/gpu.h"
21#include "video_core/renderer_base.h" 23#include "video_core/renderer_base.h"
22 24
23namespace Memory { 25namespace Memory {
24 26
25static PageTable* current_page_table = nullptr; 27static Common::PageTable* current_page_table = nullptr;
26 28
27void SetCurrentPageTable(PageTable* page_table) { 29void SetCurrentPageTable(Common::PageTable* page_table) {
28 current_page_table = page_table; 30 current_page_table = page_table;
29 31
30 auto& system = Core::System::GetInstance(); 32 auto& system = Core::System::GetInstance();
@@ -36,88 +38,80 @@ void SetCurrentPageTable(PageTable* page_table) {
36 } 38 }
37} 39}
38 40
39PageTable* GetCurrentPageTable() { 41Common::PageTable* GetCurrentPageTable() {
40 return current_page_table; 42 return current_page_table;
41} 43}
42 44
43PageTable::PageTable() = default; 45static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
44 46 Common::PageType type) {
45PageTable::PageTable(std::size_t address_space_width_in_bits) {
46 Resize(address_space_width_in_bits);
47}
48
49PageTable::~PageTable() = default;
50
51void PageTable::Resize(std::size_t address_space_width_in_bits) {
52 const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
53
54 pointers.resize(num_page_table_entries);
55 attributes.resize(num_page_table_entries);
56
57 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
58 // vector size is subsequently decreased (via resize), the vector might not automatically
59 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
60 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
61
62 pointers.shrink_to_fit();
63 attributes.shrink_to_fit();
64}
65
66static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 47 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
68 (base + size) * PAGE_SIZE); 48 (base + size) * PAGE_SIZE);
69 49
70 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, 50 // During boot, current_page_table might not be set yet, in which case we need not flush
71 FlushMode::FlushAndInvalidate); 51 if (Core::System::GetInstance().IsPoweredOn()) {
52 Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
53 size * PAGE_SIZE);
54 }
72 55
73 VAddr end = base + size; 56 VAddr end = base + size;
74 while (base != end) { 57 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 58 base + page_table.pointers.size());
76 59
77 page_table.attributes[base] = type; 60 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
78 page_table.pointers[base] = memory;
79 61
80 base += 1; 62 if (memory == nullptr) {
81 if (memory != nullptr) 63 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
64 } else {
65 while (base != end) {
66 page_table.pointers[base] = memory;
67
68 base += 1;
82 memory += PAGE_SIZE; 69 memory += PAGE_SIZE;
70 }
83 } 71 }
84} 72}
85 73
86void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) { 74void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
87 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 75 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
88 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 76 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
89 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory); 77 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
90} 78}
91 79
92void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) { 80void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
81 Common::MemoryHookPointer mmio_handler) {
93 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 82 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
94 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 83 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
95 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special); 84 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
96 85
97 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 86 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
98 SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)}; 87 Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
99 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 88 page_table.special_regions.add(
89 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
100} 90}
101 91
102void UnmapRegion(PageTable& page_table, VAddr base, u64 size) { 92void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
103 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 93 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
104 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 94 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
105 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped); 95 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
106 96
107 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 97 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
108 page_table.special_regions.erase(interval); 98 page_table.special_regions.erase(interval);
109} 99}
110 100
111void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 101void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
102 Common::MemoryHookPointer hook) {
112 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 103 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
113 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 104 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
114 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 105 page_table.special_regions.add(
106 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
115} 107}
116 108
117void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 109void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
110 Common::MemoryHookPointer hook) {
118 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 111 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
119 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 112 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
120 page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region})); 113 page_table.special_regions.subtract(
114 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
121} 115}
122 116
123/** 117/**
@@ -166,22 +160,19 @@ T Read(const VAddr vaddr) {
166 return value; 160 return value;
167 } 161 }
168 162
169 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state 163 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
170 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
171
172 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
173 switch (type) { 164 switch (type) {
174 case PageType::Unmapped: 165 case Common::PageType::Unmapped:
175 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); 166 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
176 return 0; 167 return 0;
177 case PageType::Memory: 168 case Common::PageType::Memory:
178 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 169 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
179 break; 170 break;
180 case PageType::RasterizerCachedMemory: { 171 case Common::PageType::RasterizerCachedMemory: {
181 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); 172 auto host_ptr{GetPointerFromVMA(vaddr)};
182 173 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
183 T value; 174 T value;
184 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); 175 std::memcpy(&value, host_ptr, sizeof(T));
185 return value; 176 return value;
186 } 177 }
187 default: 178 default:
@@ -199,21 +190,19 @@ void Write(const VAddr vaddr, const T data) {
199 return; 190 return;
200 } 191 }
201 192
202 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state 193 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
203 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
204
205 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
206 switch (type) { 194 switch (type) {
207 case PageType::Unmapped: 195 case Common::PageType::Unmapped:
208 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 196 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
209 static_cast<u32>(data), vaddr); 197 static_cast<u32>(data), vaddr);
210 return; 198 return;
211 case PageType::Memory: 199 case Common::PageType::Memory:
212 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 200 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
213 break; 201 break;
214 case PageType::RasterizerCachedMemory: { 202 case Common::PageType::RasterizerCachedMemory: {
215 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); 203 auto host_ptr{GetPointerFromVMA(vaddr)};
216 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); 204 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
205 std::memcpy(host_ptr, &data, sizeof(T));
217 break; 206 break;
218 } 207 }
219 default: 208 default:
@@ -228,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
228 if (page_pointer) 217 if (page_pointer)
229 return true; 218 return true;
230 219
231 if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) 220 if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
232 return true; 221 return true;
233 222
234 if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) 223 if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
235 return false; 224 return false;
236 225
237 return false; 226 return false;
@@ -251,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
251 return page_pointer + (vaddr & PAGE_MASK); 240 return page_pointer + (vaddr & PAGE_MASK);
252 } 241 }
253 242
254 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) { 243 if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
244 Common::PageType::RasterizerCachedMemory) {
255 return GetPointerFromVMA(vaddr); 245 return GetPointerFromVMA(vaddr);
256 } 246 }
257 247
@@ -285,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
285 275
286 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; 276 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
287 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { 277 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
288 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; 278 Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
289 279
290 if (cached) { 280 if (cached) {
291 // Switch page type to cached if now cached 281 // Switch page type to cached if now cached
292 switch (page_type) { 282 switch (page_type) {
293 case PageType::Unmapped: 283 case Common::PageType::Unmapped:
294 // It is not necessary for a process to have this region mapped into its address 284 // It is not necessary for a process to have this region mapped into its address
295 // space, for example, a system module need not have a VRAM mapping. 285 // space, for example, a system module need not have a VRAM mapping.
296 break; 286 break;
297 case PageType::Memory: 287 case Common::PageType::Memory:
298 page_type = PageType::RasterizerCachedMemory; 288 page_type = Common::PageType::RasterizerCachedMemory;
299 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; 289 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
300 break; 290 break;
301 case PageType::RasterizerCachedMemory: 291 case Common::PageType::RasterizerCachedMemory:
302 // There can be more than one GPU region mapped per CPU region, so it's common that 292 // There can be more than one GPU region mapped per CPU region, so it's common that
303 // this area is already marked as cached. 293 // this area is already marked as cached.
304 break; 294 break;
@@ -308,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
308 } else { 298 } else {
309 // Switch page type to uncached if now uncached 299 // Switch page type to uncached if now uncached
310 switch (page_type) { 300 switch (page_type) {
311 case PageType::Unmapped: 301 case Common::PageType::Unmapped:
312 // It is not necessary for a process to have this region mapped into its address 302 // It is not necessary for a process to have this region mapped into its address
313 // space, for example, a system module need not have a VRAM mapping. 303 // space, for example, a system module need not have a VRAM mapping.
314 break; 304 break;
315 case PageType::Memory: 305 case Common::PageType::Memory:
316 // There can be more than one GPU region mapped per CPU region, so it's common that 306 // There can be more than one GPU region mapped per CPU region, so it's common that
317 // this area is already unmarked as cached. 307 // this area is already unmarked as cached.
318 break; 308 break;
319 case PageType::RasterizerCachedMemory: { 309 case Common::PageType::RasterizerCachedMemory: {
320 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); 310 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
321 if (pointer == nullptr) { 311 if (pointer == nullptr) {
322 // It's possible that this function has been called while updating the pagetable 312 // It's possible that this function has been called while updating the pagetable
323 // after unmapping a VMA. In that case the underlying VMA will no longer exist, 313 // after unmapping a VMA. In that case the underlying VMA will no longer exist,
324 // and we should just leave the pagetable entry blank. 314 // and we should just leave the pagetable entry blank.
325 page_type = PageType::Unmapped; 315 page_type = Common::PageType::Unmapped;
326 } else { 316 } else {
327 page_type = PageType::Memory; 317 page_type = Common::PageType::Memory;
328 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; 318 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
329 } 319 }
330 break; 320 break;
@@ -336,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
336 } 326 }
337} 327}
338 328
339void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
340 auto& system_instance = Core::System::GetInstance();
341
342 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
343 // null here
344 if (!system_instance.IsPoweredOn()) {
345 return;
346 }
347
348 const VAddr end = start + size;
349
350 const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
351 if (start >= region_end || end <= region_start) {
352 // No overlap with region
353 return;
354 }
355
356 const VAddr overlap_start = std::max(start, region_start);
357 const VAddr overlap_end = std::min(end, region_end);
358 const VAddr overlap_size = overlap_end - overlap_start;
359
360 auto& rasterizer = system_instance.Renderer().Rasterizer();
361 switch (mode) {
362 case FlushMode::Flush:
363 rasterizer.FlushRegion(overlap_start, overlap_size);
364 break;
365 case FlushMode::Invalidate:
366 rasterizer.InvalidateRegion(overlap_start, overlap_size);
367 break;
368 case FlushMode::FlushAndInvalidate:
369 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
370 break;
371 }
372 };
373
374 const auto& vm_manager = Core::CurrentProcess()->VMManager();
375
376 CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
377 CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
378}
379
380u8 Read8(const VAddr addr) { 329u8 Read8(const VAddr addr) {
381 return Read<u8>(addr); 330 return Read<u8>(addr);
382} 331}
@@ -407,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
407 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 356 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
408 357
409 switch (page_table.attributes[page_index]) { 358 switch (page_table.attributes[page_index]) {
410 case PageType::Unmapped: { 359 case Common::PageType::Unmapped: {
411 LOG_ERROR(HW_Memory, 360 LOG_ERROR(HW_Memory,
412 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 361 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
413 current_vaddr, src_addr, size); 362 current_vaddr, src_addr, size);
414 std::memset(dest_buffer, 0, copy_amount); 363 std::memset(dest_buffer, 0, copy_amount);
415 break; 364 break;
416 } 365 }
417 case PageType::Memory: { 366 case Common::PageType::Memory: {
418 DEBUG_ASSERT(page_table.pointers[page_index]); 367 DEBUG_ASSERT(page_table.pointers[page_index]);
419 368
420 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 369 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
421 std::memcpy(dest_buffer, src_ptr, copy_amount); 370 std::memcpy(dest_buffer, src_ptr, copy_amount);
422 break; 371 break;
423 } 372 }
424 case PageType::RasterizerCachedMemory: { 373 case Common::PageType::RasterizerCachedMemory: {
425 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 374 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
426 FlushMode::Flush); 375 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
427 std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); 376 std::memcpy(dest_buffer, host_ptr, copy_amount);
428 break; 377 break;
429 } 378 }
430 default: 379 default:
@@ -471,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
471 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 420 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
472 421
473 switch (page_table.attributes[page_index]) { 422 switch (page_table.attributes[page_index]) {
474 case PageType::Unmapped: { 423 case Common::PageType::Unmapped: {
475 LOG_ERROR(HW_Memory, 424 LOG_ERROR(HW_Memory,
476 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 425 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
477 current_vaddr, dest_addr, size); 426 current_vaddr, dest_addr, size);
478 break; 427 break;
479 } 428 }
480 case PageType::Memory: { 429 case Common::PageType::Memory: {
481 DEBUG_ASSERT(page_table.pointers[page_index]); 430 DEBUG_ASSERT(page_table.pointers[page_index]);
482 431
483 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 432 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
484 std::memcpy(dest_ptr, src_buffer, copy_amount); 433 std::memcpy(dest_ptr, src_buffer, copy_amount);
485 break; 434 break;
486 } 435 }
487 case PageType::RasterizerCachedMemory: { 436 case Common::PageType::RasterizerCachedMemory: {
488 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 437 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
489 FlushMode::Invalidate); 438 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
490 std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); 439 std::memcpy(host_ptr, src_buffer, copy_amount);
491 break; 440 break;
492 } 441 }
493 default: 442 default:
@@ -517,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
517 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 466 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
518 467
519 switch (page_table.attributes[page_index]) { 468 switch (page_table.attributes[page_index]) {
520 case PageType::Unmapped: { 469 case Common::PageType::Unmapped: {
521 LOG_ERROR(HW_Memory, 470 LOG_ERROR(HW_Memory,
522 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 471 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
523 current_vaddr, dest_addr, size); 472 current_vaddr, dest_addr, size);
524 break; 473 break;
525 } 474 }
526 case PageType::Memory: { 475 case Common::PageType::Memory: {
527 DEBUG_ASSERT(page_table.pointers[page_index]); 476 DEBUG_ASSERT(page_table.pointers[page_index]);
528 477
529 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 478 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
530 std::memset(dest_ptr, 0, copy_amount); 479 std::memset(dest_ptr, 0, copy_amount);
531 break; 480 break;
532 } 481 }
533 case PageType::RasterizerCachedMemory: { 482 case Common::PageType::RasterizerCachedMemory: {
534 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 483 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
535 FlushMode::Invalidate); 484 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
536 std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); 485 std::memset(host_ptr, 0, copy_amount);
537 break; 486 break;
538 } 487 }
539 default: 488 default:
@@ -559,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
559 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 508 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
560 509
561 switch (page_table.attributes[page_index]) { 510 switch (page_table.attributes[page_index]) {
562 case PageType::Unmapped: { 511 case Common::PageType::Unmapped: {
563 LOG_ERROR(HW_Memory, 512 LOG_ERROR(HW_Memory,
564 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 513 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
565 current_vaddr, src_addr, size); 514 current_vaddr, src_addr, size);
566 ZeroBlock(process, dest_addr, copy_amount); 515 ZeroBlock(process, dest_addr, copy_amount);
567 break; 516 break;
568 } 517 }
569 case PageType::Memory: { 518 case Common::PageType::Memory: {
570 DEBUG_ASSERT(page_table.pointers[page_index]); 519 DEBUG_ASSERT(page_table.pointers[page_index]);
571 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 520 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
572 WriteBlock(process, dest_addr, src_ptr, copy_amount); 521 WriteBlock(process, dest_addr, src_ptr, copy_amount);
573 break; 522 break;
574 } 523 }
575 case PageType::RasterizerCachedMemory: { 524 case Common::PageType::RasterizerCachedMemory: {
576 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 525 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
577 FlushMode::Flush); 526 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
578 WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); 527 WriteBlock(process, dest_addr, host_ptr, copy_amount);
579 break; 528 break;
580 } 529 }
581 default: 530 default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..1d38cdca8 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -6,11 +6,11 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include <string> 8#include <string>
9#include <tuple>
10#include <vector>
11#include <boost/icl/interval_map.hpp>
12#include "common/common_types.h" 9#include "common/common_types.h"
13#include "core/memory_hook.h" 10
11namespace Common {
12struct PageTable;
13}
14 14
15namespace Kernel { 15namespace Kernel {
16class Process; 16class Process;
@@ -26,71 +26,6 @@ constexpr std::size_t PAGE_BITS = 12;
26constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS; 26constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
27constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 27constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
28 28
29enum class PageType : u8 {
30 /// Page is unmapped and should cause an access error.
31 Unmapped,
32 /// Page is mapped to regular memory. This is the only type you can get pointers to.
33 Memory,
34 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
35 /// invalidation
36 RasterizerCachedMemory,
37 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
38 Special,
39};
40
41struct SpecialRegion {
42 enum class Type {
43 DebugHook,
44 IODevice,
45 } type;
46
47 MemoryHookPointer handler;
48
49 bool operator<(const SpecialRegion& other) const {
50 return std::tie(type, handler) < std::tie(other.type, other.handler);
51 }
52
53 bool operator==(const SpecialRegion& other) const {
54 return std::tie(type, handler) == std::tie(other.type, other.handler);
55 }
56};
57
58/**
59 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
60 * mimics the way a real CPU page table works.
61 */
62struct PageTable {
63 explicit PageTable();
64 explicit PageTable(std::size_t address_space_width_in_bits);
65 ~PageTable();
66
67 /**
68 * Resizes the page table to be able to accomodate enough pages within
69 * a given address space.
70 *
71 * @param address_space_width_in_bits The address size width in bits.
72 */
73 void Resize(std::size_t address_space_width_in_bits);
74
75 /**
76 * Vector of memory pointers backing each page. An entry can only be non-null if the
77 * corresponding entry in the `attributes` vector is of type `Memory`.
78 */
79 std::vector<u8*> pointers;
80
81 /**
82 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
83 * of type `Special`.
84 */
85 boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
86
87 /**
88 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
89 * the corresponding entry in `pointers` MUST be set to null.
90 */
91 std::vector<PageType> attributes;
92};
93
94/// Virtual user-space memory regions 29/// Virtual user-space memory regions
95enum : VAddr { 30enum : VAddr {
96 /// Read-only page containing kernel and system configuration values. 31 /// Read-only page containing kernel and system configuration values.
@@ -116,8 +51,8 @@ enum : VAddr {
116}; 51};
117 52
118/// Currently active page table 53/// Currently active page table
119void SetCurrentPageTable(PageTable* page_table); 54void SetCurrentPageTable(Common::PageTable* page_table);
120PageTable* GetCurrentPageTable(); 55Common::PageTable* GetCurrentPageTable();
121 56
122/// Determines if the given VAddr is valid for the specified process. 57/// Determines if the given VAddr is valid for the specified process.
123bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); 58bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +96,4 @@ enum class FlushMode {
161 */ 96 */
162void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); 97void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
163 98
164/**
165 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
166 * address region.
167 */
168void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
169
170} // namespace Memory 99} // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/memory_hook.h" 8#include "common/memory_hook.h"
9
10namespace Common {
11struct PageTable;
12}
9 13
10namespace Memory { 14namespace Memory {
11 15
@@ -17,7 +21,7 @@ namespace Memory {
17 * @param size The amount of bytes to map. Must be page-aligned. 21 * @param size The amount of bytes to map. Must be page-aligned.
18 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`. 22 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
19 */ 23 */
20void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target); 24void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
21 25
22/** 26/**
23 * Maps a region of the emulated process address space as a IO region. 27 * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
26 * @param size The amount of bytes to map. Must be page-aligned. 30 * @param size The amount of bytes to map. Must be page-aligned.
27 * @param mmio_handler The handler that backs the mapping. 31 * @param mmio_handler The handler that backs the mapping.
28 */ 32 */
29void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler); 33void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
34 Common::MemoryHookPointer mmio_handler);
30 35
31void UnmapRegion(PageTable& page_table, VAddr base, u64 size); 36void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
32 37
33void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 38void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
34void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 39 Common::MemoryHookPointer hook);
40void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
41 Common::MemoryHookPointer hook);
35 42
36} // namespace Memory 43} // namespace Memory
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index c716a462b..4afd6c8a3 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -18,13 +18,13 @@ using std::chrono::microseconds;
18namespace Core { 18namespace Core {
19 19
20void PerfStats::BeginSystemFrame() { 20void PerfStats::BeginSystemFrame() {
21 std::lock_guard<std::mutex> lock(object_mutex); 21 std::lock_guard lock{object_mutex};
22 22
23 frame_begin = Clock::now(); 23 frame_begin = Clock::now();
24} 24}
25 25
26void PerfStats::EndSystemFrame() { 26void PerfStats::EndSystemFrame() {
27 std::lock_guard<std::mutex> lock(object_mutex); 27 std::lock_guard lock{object_mutex};
28 28
29 auto frame_end = Clock::now(); 29 auto frame_end = Clock::now();
30 accumulated_frametime += frame_end - frame_begin; 30 accumulated_frametime += frame_end - frame_begin;
@@ -35,13 +35,13 @@ void PerfStats::EndSystemFrame() {
35} 35}
36 36
37void PerfStats::EndGameFrame() { 37void PerfStats::EndGameFrame() {
38 std::lock_guard<std::mutex> lock(object_mutex); 38 std::lock_guard lock{object_mutex};
39 39
40 game_frames += 1; 40 game_frames += 1;
41} 41}
42 42
43PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) { 43PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
44 std::lock_guard<std::mutex> lock(object_mutex); 44 std::lock_guard lock{object_mutex};
45 45
46 const auto now = Clock::now(); 46 const auto now = Clock::now();
47 // Walltime elapsed since stats were reset 47 // Walltime elapsed since stats were reset
@@ -67,7 +67,7 @@ PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us
67} 67}
68 68
69double PerfStats::GetLastFrameTimeScale() { 69double PerfStats::GetLastFrameTimeScale() {
70 std::lock_guard<std::mutex> lock(object_mutex); 70 std::lock_guard lock{object_mutex};
71 71
72 constexpr double FRAME_LENGTH = 1.0 / 60; 72 constexpr double FRAME_LENGTH = 1.0 / 60;
73 return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH; 73 return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6d32ebea3 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -82,7 +82,6 @@ void LogSetting(const std::string& name, const T& value) {
82void LogSettings() { 82void LogSettings() {
83 LOG_INFO(Config, "yuzu Configuration:"); 83 LOG_INFO(Config, "yuzu Configuration:");
84 LogSetting("System_UseDockedMode", Settings::values.use_docked_mode); 84 LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
85 LogSetting("System_EnableNfc", Settings::values.enable_nfc);
86 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); 85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
87 LogSetting("System_CurrentUser", Settings::values.current_user); 86 LogSetting("System_CurrentUser", Settings::values.current_user);
88 LogSetting("System_LanguageIndex", Settings::values.language_index); 87 LogSetting("System_LanguageIndex", Settings::values.language_index);
@@ -91,7 +90,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 92 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
93 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
95 LogSetting("Renderer_UseAsynchronousGpuEmulation",
96 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 97 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 98 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 99 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index 7e76e0466..d543eb32f 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -349,7 +349,6 @@ struct TouchscreenInput {
349struct Values { 349struct Values {
350 // System 350 // System
351 bool use_docked_mode; 351 bool use_docked_mode;
352 bool enable_nfc;
353 std::optional<u32> rng_seed; 352 std::optional<u32> rng_seed;
354 // Measured in seconds since epoch 353 // Measured in seconds since epoch
355 std::optional<std::chrono::seconds> custom_rtc; 354 std::optional<std::chrono::seconds> custom_rtc;
@@ -393,6 +392,7 @@ struct Values {
393 u16 frame_limit; 392 u16 frame_limit;
394 bool use_disk_shader_cache; 393 bool use_disk_shader_cache;
395 bool use_accurate_gpu_emulation; 394 bool use_accurate_gpu_emulation;
395 bool use_asynchronous_gpu_emulation;
396 396
397 float bg_red; 397 float bg_red;
398 float bg_green; 398 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 58dfcc4df..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
162 Settings::values.use_disk_shader_cache); 162 Settings::values.use_disk_shader_cache);
163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
164 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
166 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
167} 169}
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 1c7db28c0..5b4e032bd 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -7,15 +7,18 @@ add_library(input_common STATIC
7 main.h 7 main.h
8 motion_emu.cpp 8 motion_emu.cpp
9 motion_emu.h 9 motion_emu.h
10 10 sdl/sdl.cpp
11 $<$<BOOL:${SDL2_FOUND}>:sdl/sdl.cpp sdl/sdl.h> 11 sdl/sdl.h
12) 12)
13 13
14create_target_directory_groups(input_common)
15
16target_link_libraries(input_common PUBLIC core PRIVATE common)
17
18if(SDL2_FOUND) 14if(SDL2_FOUND)
15 target_sources(input_common PRIVATE
16 sdl/sdl_impl.cpp
17 sdl/sdl_impl.h
18 )
19 target_link_libraries(input_common PRIVATE SDL2) 19 target_link_libraries(input_common PRIVATE SDL2)
20 target_compile_definitions(input_common PRIVATE HAVE_SDL2) 20 target_compile_definitions(input_common PRIVATE HAVE_SDL2)
21endif() 21endif()
22
23create_target_directory_groups(input_common)
24target_link_libraries(input_common PUBLIC core PRIVATE common)
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 525fe6abc..078374be5 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -36,18 +36,18 @@ struct KeyButtonPair {
36class KeyButtonList { 36class KeyButtonList {
37public: 37public:
38 void AddKeyButton(int key_code, KeyButton* key_button) { 38 void AddKeyButton(int key_code, KeyButton* key_button) {
39 std::lock_guard<std::mutex> guard(mutex); 39 std::lock_guard guard{mutex};
40 list.push_back(KeyButtonPair{key_code, key_button}); 40 list.push_back(KeyButtonPair{key_code, key_button});
41 } 41 }
42 42
43 void RemoveKeyButton(const KeyButton* key_button) { 43 void RemoveKeyButton(const KeyButton* key_button) {
44 std::lock_guard<std::mutex> guard(mutex); 44 std::lock_guard guard{mutex};
45 list.remove_if( 45 list.remove_if(
46 [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; }); 46 [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; });
47 } 47 }
48 48
49 void ChangeKeyStatus(int key_code, bool pressed) { 49 void ChangeKeyStatus(int key_code, bool pressed) {
50 std::lock_guard<std::mutex> guard(mutex); 50 std::lock_guard guard{mutex};
51 for (const KeyButtonPair& pair : list) { 51 for (const KeyButtonPair& pair : list) {
52 if (pair.key_code == key_code) 52 if (pair.key_code == key_code)
53 pair.key_button->status.store(pressed); 53 pair.key_button->status.store(pressed);
@@ -55,7 +55,7 @@ public:
55 } 55 }
56 56
57 void ChangeAllKeyStatus(bool pressed) { 57 void ChangeAllKeyStatus(bool pressed) {
58 std::lock_guard<std::mutex> guard(mutex); 58 std::lock_guard guard{mutex};
59 for (const KeyButtonPair& pair : list) { 59 for (const KeyButtonPair& pair : list) {
60 pair.key_button->status.store(pressed); 60 pair.key_button->status.store(pressed);
61 } 61 }
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 37f572853..8e66c1b15 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -17,10 +17,7 @@ namespace InputCommon {
17 17
18static std::shared_ptr<Keyboard> keyboard; 18static std::shared_ptr<Keyboard> keyboard;
19static std::shared_ptr<MotionEmu> motion_emu; 19static std::shared_ptr<MotionEmu> motion_emu;
20 20static std::unique_ptr<SDL::State> sdl;
21#ifdef HAVE_SDL2
22static std::thread poll_thread;
23#endif
24 21
25void Init() { 22void Init() {
26 keyboard = std::make_shared<Keyboard>(); 23 keyboard = std::make_shared<Keyboard>();
@@ -30,15 +27,7 @@ void Init() {
30 motion_emu = std::make_shared<MotionEmu>(); 27 motion_emu = std::make_shared<MotionEmu>();
31 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu); 28 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu);
32 29
33#ifdef HAVE_SDL2 30 sdl = SDL::Init();
34 SDL::Init();
35#endif
36}
37
38void StartJoystickEventHandler() {
39#ifdef HAVE_SDL2
40 poll_thread = std::thread(SDL::PollLoop);
41#endif
42} 31}
43 32
44void Shutdown() { 33void Shutdown() {
@@ -47,11 +36,7 @@ void Shutdown() {
47 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button"); 36 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button");
48 Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); 37 Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
49 motion_emu.reset(); 38 motion_emu.reset();
50 39 sdl.reset();
51#ifdef HAVE_SDL2
52 SDL::Shutdown();
53 poll_thread.join();
54#endif
55} 40}
56 41
57Keyboard* GetKeyboard() { 42Keyboard* GetKeyboard() {
@@ -88,7 +73,7 @@ namespace Polling {
88 73
89std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) { 74std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) {
90#ifdef HAVE_SDL2 75#ifdef HAVE_SDL2
91 return SDL::Polling::GetPollers(type); 76 return sdl->GetPollers(type);
92#else 77#else
93 return {}; 78 return {};
94#endif 79#endif
diff --git a/src/input_common/main.h b/src/input_common/main.h
index 9eb13106e..77a0ce90b 100644
--- a/src/input_common/main.h
+++ b/src/input_common/main.h
@@ -20,8 +20,6 @@ void Init();
20/// Deregisters all built-in input device factories and shuts them down. 20/// Deregisters all built-in input device factories and shuts them down.
21void Shutdown(); 21void Shutdown();
22 22
23void StartJoystickEventHandler();
24
25class Keyboard; 23class Keyboard;
26 24
27/// Gets the keyboard button device factory. 25/// Gets the keyboard button device factory.
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..868251628 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,32 +32,32 @@ public:
32 } 32 }
33 33
34 void BeginTilt(int x, int y) { 34 void BeginTilt(int x, int y) {
35 mouse_origin = Math::MakeVec(x, y); 35 mouse_origin = Common::MakeVec(x, y);
36 is_tilting = true; 36 is_tilting = true;
37 } 37 }
38 38
39 void Tilt(int x, int y) { 39 void Tilt(int x, int y) {
40 auto mouse_move = Math::MakeVec(x, y) - mouse_origin; 40 auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
41 if (is_tilting) { 41 if (is_tilting) {
42 std::lock_guard<std::mutex> guard(tilt_mutex); 42 std::lock_guard guard{tilt_mutex};
43 if (mouse_move.x == 0 && mouse_move.y == 0) { 43 if (mouse_move.x == 0 && mouse_move.y == 0) {
44 tilt_angle = 0; 44 tilt_angle = 0;
45 } else { 45 } else {
46 tilt_direction = mouse_move.Cast<float>(); 46 tilt_direction = mouse_move.Cast<float>();
47 tilt_angle = 47 tilt_angle =
48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); 48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
49 } 49 }
50 } 50 }
51 } 51 }
52 52
53 void EndTilt() { 53 void EndTilt() {
54 std::lock_guard<std::mutex> guard(tilt_mutex); 54 std::lock_guard guard{tilt_mutex};
55 tilt_angle = 0; 55 tilt_angle = 0;
56 is_tilting = false; 56 is_tilting = false;
57 } 57 }
58 58
59 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { 59 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
60 std::lock_guard<std::mutex> guard(status_mutex); 60 std::lock_guard guard{status_mutex};
61 return status; 61 return status;
62 } 62 }
63 63
@@ -66,17 +66,17 @@ private:
66 const std::chrono::steady_clock::duration update_duration; 66 const std::chrono::steady_clock::duration update_duration;
67 const float sensitivity; 67 const float sensitivity;
68 68
69 Math::Vec2<int> mouse_origin; 69 Common::Vec2<int> mouse_origin;
70 70
71 std::mutex tilt_mutex; 71 std::mutex tilt_mutex;
72 Math::Vec2<float> tilt_direction; 72 Common::Vec2<float> tilt_direction;
73 float tilt_angle = 0; 73 float tilt_angle = 0;
74 74
75 bool is_tilting = false; 75 bool is_tilting = false;
76 76
77 Common::Event shutdown_event; 77 Common::Event shutdown_event;
78 78
79 std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; 79 std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
80 std::mutex status_mutex; 80 std::mutex status_mutex;
81 81
82 // Note: always keep the thread declaration at the end so that other objects are initialized 82 // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,29 +85,29 @@ private:
85 85
86 void MotionEmuThread() { 86 void MotionEmuThread() {
87 auto update_time = std::chrono::steady_clock::now(); 87 auto update_time = std::chrono::steady_clock::now();
88 Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); 88 Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
89 Math::Quaternion<float> old_q; 89 Common::Quaternion<float> old_q;
90 90
91 while (!shutdown_event.WaitUntil(update_time)) { 91 while (!shutdown_event.WaitUntil(update_time)) {
92 update_time += update_duration; 92 update_time += update_duration;
93 old_q = q; 93 old_q = q;
94 94
95 { 95 {
96 std::lock_guard<std::mutex> guard(tilt_mutex); 96 std::lock_guard guard{tilt_mutex};
97 97
98 // Find the quaternion describing current 3DS tilting 98 // Find the quaternion describing current 3DS tilting
99 q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), 99 q = Common::MakeQuaternion(
100 tilt_angle); 100 Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
101 } 101 }
102 102
103 auto inv_q = q.Inverse(); 103 auto inv_q = q.Inverse();
104 104
105 // Set the gravity vector in world space 105 // Set the gravity vector in world space
106 auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); 106 auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
107 107
108 // Find the angular rate vector in world space 108 // Find the angular rate vector in world space
109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2; 109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
110 angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; 110 angular_rate *= 1000 / update_millisecond / Common::PI * 180;
111 111
112 // Transform the two vectors from world space to 3DS space 112 // Transform the two vectors from world space to 3DS space
113 gravity = QuaternionRotate(inv_q, gravity); 113 gravity = QuaternionRotate(inv_q, gravity);
@@ -115,7 +115,7 @@ private:
115 115
116 // Update the sensor state 116 // Update the sensor state
117 { 117 {
118 std::lock_guard<std::mutex> guard(status_mutex); 118 std::lock_guard guard{status_mutex};
119 status = std::make_tuple(gravity, angular_rate); 119 status = std::make_tuple(gravity, angular_rate);
120 } 120 }
121 } 121 }
@@ -131,7 +131,7 @@ public:
131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); 131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
132 } 132 }
133 133
134 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { 134 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
135 return device->GetStatus(); 135 return device->GetStatus();
136 } 136 }
137 137
diff --git a/src/input_common/sdl/sdl.cpp b/src/input_common/sdl/sdl.cpp
index faf3c1fa3..644db3448 100644
--- a/src/input_common/sdl/sdl.cpp
+++ b/src/input_common/sdl/sdl.cpp
@@ -1,631 +1,19 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "input_common/main.h"
24#include "input_common/sdl/sdl.h" 5#include "input_common/sdl/sdl.h"
6#ifdef HAVE_SDL2
7#include "input_common/sdl/sdl_impl.h"
8#endif
25 9
26namespace InputCommon { 10namespace InputCommon::SDL {
27 11
28namespace SDL { 12std::unique_ptr<State> Init() {
29 13#ifdef HAVE_SDL2
30class SDLJoystick; 14 return std::make_unique<SDLState>();
31class SDLButtonFactory; 15#else
32class SDLAnalogFactory; 16 return std::make_unique<NullState>();
33 17#endif
34/// Map of GUID of a list of corresponding virtual Joysticks
35static std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
36static std::mutex joystick_map_mutex;
37
38static std::shared_ptr<SDLButtonFactory> button_factory;
39static std::shared_ptr<SDLAnalogFactory> analog_factory;
40
41/// Used by the Pollers during config
42static std::atomic<bool> polling;
43static Common::SPSCQueue<SDL_Event> event_queue;
44
45static std::atomic<bool> initialized = false;
46
47static std::string GetGUID(SDL_Joystick* joystick) {
48 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
49 char guid_str[33];
50 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
51 return guid_str;
52}
53
54class SDLJoystick {
55public:
56 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
57 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
58 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
59
60 void SetButton(int button, bool value) {
61 std::lock_guard<std::mutex> lock(mutex);
62 state.buttons[button] = value;
63 }
64
65 bool GetButton(int button) const {
66 std::lock_guard<std::mutex> lock(mutex);
67 return state.buttons.at(button);
68 }
69
70 void SetAxis(int axis, Sint16 value) {
71 std::lock_guard<std::mutex> lock(mutex);
72 state.axes[axis] = value;
73 }
74
75 float GetAxis(int axis) const {
76 std::lock_guard<std::mutex> lock(mutex);
77 return state.axes.at(axis) / 32767.0f;
78 }
79
80 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
81 float x = GetAxis(axis_x);
82 float y = GetAxis(axis_y);
83 y = -y; // 3DS uses an y-axis inverse from SDL
84
85 // Make sure the coordinates are in the unit circle,
86 // otherwise normalize it.
87 float r = x * x + y * y;
88 if (r > 1.0f) {
89 r = std::sqrt(r);
90 x /= r;
91 y /= r;
92 }
93
94 return std::make_tuple(x, y);
95 }
96
97 void SetHat(int hat, Uint8 direction) {
98 std::lock_guard<std::mutex> lock(mutex);
99 state.hats[hat] = direction;
100 }
101
102 bool GetHatDirection(int hat, Uint8 direction) const {
103 std::lock_guard<std::mutex> lock(mutex);
104 return (state.hats.at(hat) & direction) != 0;
105 }
106 /**
107 * The guid of the joystick
108 */
109 const std::string& GetGUID() const {
110 return guid;
111 }
112
113 /**
114 * The number of joystick from the same type that were connected before this joystick
115 */
116 int GetPort() const {
117 return port;
118 }
119
120 SDL_Joystick* GetSDLJoystick() const {
121 return sdl_joystick.get();
122 }
123
124 void SetSDLJoystick(SDL_Joystick* joystick,
125 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
126 sdl_joystick =
127 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
128 }
129
130private:
131 struct State {
132 std::unordered_map<int, bool> buttons;
133 std::unordered_map<int, Sint16> axes;
134 std::unordered_map<int, Uint8> hats;
135 } state;
136 std::string guid;
137 int port;
138 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
139 mutable std::mutex mutex;
140};
141
142/**
143 * Get the nth joystick with the corresponding GUID
144 */
145static std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port) {
146 std::lock_guard<std::mutex> lock(joystick_map_mutex);
147 const auto it = joystick_map.find(guid);
148 if (it != joystick_map.end()) {
149 while (it->second.size() <= port) {
150 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
151 [](SDL_Joystick*) {});
152 it->second.emplace_back(std::move(joystick));
153 }
154 return it->second[port];
155 }
156 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
157 return joystick_map[guid].emplace_back(std::move(joystick));
158}
159
160/**
161 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
162 * it to a SDLJoystick with the same guid and that port
163 */
164static std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
165 std::lock_guard<std::mutex> lock(joystick_map_mutex);
166 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
167 const std::string guid = GetGUID(sdl_joystick);
168 auto map_it = joystick_map.find(guid);
169 if (map_it != joystick_map.end()) {
170 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
171 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
172 return sdl_joystick == joystick->GetSDLJoystick();
173 });
174 if (vec_it != map_it->second.end()) {
175 // This is the common case: There is already an existing SDL_Joystick maped to a
176 // SDLJoystick. return the SDLJoystick
177 return *vec_it;
178 }
179 // Search for a SDLJoystick without a mapped SDL_Joystick...
180 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
181 [](const std::shared_ptr<SDLJoystick>& joystick) {
182 return !joystick->GetSDLJoystick();
183 });
184 if (nullptr_it != map_it->second.end()) {
185 // ... and map it
186 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
187 return *nullptr_it;
188 }
189 // There is no SDLJoystick without a mapped SDL_Joystick
190 // Create a new SDLJoystick
191 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
192 return map_it->second.emplace_back(std::move(joystick));
193 }
194 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
195 return joystick_map[guid].emplace_back(std::move(joystick));
196}
197
198void InitJoystick(int joystick_index) {
199 std::lock_guard<std::mutex> lock(joystick_map_mutex);
200 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
201 if (!sdl_joystick) {
202 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
203 return;
204 }
205 std::string guid = GetGUID(sdl_joystick);
206 if (joystick_map.find(guid) == joystick_map.end()) {
207 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
208 joystick_map[guid].emplace_back(std::move(joystick));
209 return;
210 }
211 auto& joystick_guid_list = joystick_map[guid];
212 const auto it = std::find_if(
213 joystick_guid_list.begin(), joystick_guid_list.end(),
214 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
215 if (it != joystick_guid_list.end()) {
216 (*it)->SetSDLJoystick(sdl_joystick);
217 return;
218 }
219 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
220 joystick_guid_list.emplace_back(std::move(joystick));
221}
222
223void CloseJoystick(SDL_Joystick* sdl_joystick) {
224 std::lock_guard<std::mutex> lock(joystick_map_mutex);
225 std::string guid = GetGUID(sdl_joystick);
226 // This call to guid is save since the joystick is guranteed to be in that map
227 auto& joystick_guid_list = joystick_map[guid];
228 const auto joystick_it =
229 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
230 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
231 return joystick->GetSDLJoystick() == sdl_joystick;
232 });
233 (*joystick_it)->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
234}
235
236void HandleGameControllerEvent(const SDL_Event& event) {
237 switch (event.type) {
238 case SDL_JOYBUTTONUP: {
239 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
240 if (joystick) {
241 joystick->SetButton(event.jbutton.button, false);
242 }
243 break;
244 }
245 case SDL_JOYBUTTONDOWN: {
246 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
247 if (joystick) {
248 joystick->SetButton(event.jbutton.button, true);
249 }
250 break;
251 }
252 case SDL_JOYHATMOTION: {
253 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
254 if (joystick) {
255 joystick->SetHat(event.jhat.hat, event.jhat.value);
256 }
257 break;
258 }
259 case SDL_JOYAXISMOTION: {
260 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
261 if (joystick) {
262 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
263 }
264 break;
265 }
266 case SDL_JOYDEVICEREMOVED:
267 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
268 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
269 break;
270 case SDL_JOYDEVICEADDED:
271 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
272 InitJoystick(event.jdevice.which);
273 break;
274 }
275}
276
277void CloseSDLJoysticks() {
278 std::lock_guard<std::mutex> lock(joystick_map_mutex);
279 joystick_map.clear();
280}
281
282void PollLoop() {
283 if (SDL_Init(SDL_INIT_JOYSTICK) < 0) {
284 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
285 return;
286 }
287
288 SDL_Event event;
289 while (initialized) {
290 // Wait for 10 ms or until an event happens
291 if (SDL_WaitEventTimeout(&event, 10)) {
292 // Don't handle the event if we are configuring
293 if (polling) {
294 event_queue.Push(event);
295 } else {
296 HandleGameControllerEvent(event);
297 }
298 }
299 }
300 CloseSDLJoysticks();
301 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
302}
303
304class SDLButton final : public Input::ButtonDevice {
305public:
306 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
307 : joystick(std::move(joystick_)), button(button_) {}
308
309 bool GetStatus() const override {
310 return joystick->GetButton(button);
311 }
312
313private:
314 std::shared_ptr<SDLJoystick> joystick;
315 int button;
316};
317
318class SDLDirectionButton final : public Input::ButtonDevice {
319public:
320 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
321 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
322
323 bool GetStatus() const override {
324 return joystick->GetHatDirection(hat, direction);
325 }
326
327private:
328 std::shared_ptr<SDLJoystick> joystick;
329 int hat;
330 Uint8 direction;
331};
332
333class SDLAxisButton final : public Input::ButtonDevice {
334public:
335 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
336 bool trigger_if_greater_)
337 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
338 trigger_if_greater(trigger_if_greater_) {}
339
340 bool GetStatus() const override {
341 float axis_value = joystick->GetAxis(axis);
342 if (trigger_if_greater)
343 return axis_value > threshold;
344 return axis_value < threshold;
345 }
346
347private:
348 std::shared_ptr<SDLJoystick> joystick;
349 int axis;
350 float threshold;
351 bool trigger_if_greater;
352};
353
354class SDLAnalog final : public Input::AnalogDevice {
355public:
356 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_)
357 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_) {}
358
359 std::tuple<float, float> GetStatus() const override {
360 return joystick->GetAnalog(axis_x, axis_y);
361 }
362
363private:
364 std::shared_ptr<SDLJoystick> joystick;
365 int axis_x;
366 int axis_y;
367};
368
369/// A button device factory that creates button devices from SDL joystick
370class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
371public:
372 /**
373 * Creates a button device from a joystick button
374 * @param params contains parameters for creating the device:
375 * - "guid": the guid of the joystick to bind
376 * - "port": the nth joystick of the same type to bind
377 * - "button"(optional): the index of the button to bind
378 * - "hat"(optional): the index of the hat to bind as direction buttons
379 * - "axis"(optional): the index of the axis to bind
380 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
381 * "down", "left" or "right"
382 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
383 * triggered if the axis value crosses
384 * - "direction"(only used for axis): "+" means the button is triggered when the axis
385 * value is greater than the threshold; "-" means the button is triggered when the axis
386 * value is smaller than the threshold
387 */
388 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
389 const std::string guid = params.Get("guid", "0");
390 const int port = params.Get("port", 0);
391
392 auto joystick = GetSDLJoystickByGUID(guid, port);
393
394 if (params.Has("hat")) {
395 const int hat = params.Get("hat", 0);
396 const std::string direction_name = params.Get("direction", "");
397 Uint8 direction;
398 if (direction_name == "up") {
399 direction = SDL_HAT_UP;
400 } else if (direction_name == "down") {
401 direction = SDL_HAT_DOWN;
402 } else if (direction_name == "left") {
403 direction = SDL_HAT_LEFT;
404 } else if (direction_name == "right") {
405 direction = SDL_HAT_RIGHT;
406 } else {
407 direction = 0;
408 }
409 // This is necessary so accessing GetHat with hat won't crash
410 joystick->SetHat(hat, SDL_HAT_CENTERED);
411 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
412 }
413
414 if (params.Has("axis")) {
415 const int axis = params.Get("axis", 0);
416 const float threshold = params.Get("threshold", 0.5f);
417 const std::string direction_name = params.Get("direction", "");
418 bool trigger_if_greater;
419 if (direction_name == "+") {
420 trigger_if_greater = true;
421 } else if (direction_name == "-") {
422 trigger_if_greater = false;
423 } else {
424 trigger_if_greater = true;
425 LOG_ERROR(Input, "Unknown direction '{}'", direction_name);
426 }
427 // This is necessary so accessing GetAxis with axis won't crash
428 joystick->SetAxis(axis, 0);
429 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
430 }
431
432 const int button = params.Get("button", 0);
433 // This is necessary so accessing GetButton with button won't crash
434 joystick->SetButton(button, false);
435 return std::make_unique<SDLButton>(joystick, button);
436 }
437};
438
439/// An analog device factory that creates analog devices from SDL joystick
440class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
441public:
442 /**
443 * Creates analog device from joystick axes
444 * @param params contains parameters for creating the device:
445 * - "guid": the guid of the joystick to bind
446 * - "port": the nth joystick of the same type
447 * - "axis_x": the index of the axis to be bind as x-axis
448 * - "axis_y": the index of the axis to be bind as y-axis
449 */
450 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
451 const std::string guid = params.Get("guid", "0");
452 const int port = params.Get("port", 0);
453 const int axis_x = params.Get("axis_x", 0);
454 const int axis_y = params.Get("axis_y", 1);
455
456 auto joystick = GetSDLJoystickByGUID(guid, port);
457
458 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
459 joystick->SetAxis(axis_x, 0);
460 joystick->SetAxis(axis_y, 0);
461 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y);
462 }
463};
464
465void Init() {
466 using namespace Input;
467 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>());
468 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>());
469 polling = false;
470 initialized = true;
471}
472
473void Shutdown() {
474 if (initialized) {
475 using namespace Input;
476 UnregisterFactory<ButtonDevice>("sdl");
477 UnregisterFactory<AnalogDevice>("sdl");
478 initialized = false;
479 }
480}
481
482Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event) {
483 Common::ParamPackage params({{"engine", "sdl"}});
484 switch (event.type) {
485 case SDL_JOYAXISMOTION: {
486 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
487 params.Set("port", joystick->GetPort());
488 params.Set("guid", joystick->GetGUID());
489 params.Set("axis", event.jaxis.axis);
490 if (event.jaxis.value > 0) {
491 params.Set("direction", "+");
492 params.Set("threshold", "0.5");
493 } else {
494 params.Set("direction", "-");
495 params.Set("threshold", "-0.5");
496 }
497 break;
498 }
499 case SDL_JOYBUTTONUP: {
500 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
501 params.Set("port", joystick->GetPort());
502 params.Set("guid", joystick->GetGUID());
503 params.Set("button", event.jbutton.button);
504 break;
505 }
506 case SDL_JOYHATMOTION: {
507 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
508 params.Set("port", joystick->GetPort());
509 params.Set("guid", joystick->GetGUID());
510 params.Set("hat", event.jhat.hat);
511 switch (event.jhat.value) {
512 case SDL_HAT_UP:
513 params.Set("direction", "up");
514 break;
515 case SDL_HAT_DOWN:
516 params.Set("direction", "down");
517 break;
518 case SDL_HAT_LEFT:
519 params.Set("direction", "left");
520 break;
521 case SDL_HAT_RIGHT:
522 params.Set("direction", "right");
523 break;
524 default:
525 return {};
526 }
527 break;
528 }
529 }
530 return params;
531}
532
533namespace Polling {
534
535class SDLPoller : public InputCommon::Polling::DevicePoller {
536public:
537 void Start() override {
538 event_queue.Clear();
539 polling = true;
540 }
541
542 void Stop() override {
543 polling = false;
544 }
545};
546
547class SDLButtonPoller final : public SDLPoller {
548public:
549 Common::ParamPackage GetNextInput() override {
550 SDL_Event event;
551 while (event_queue.Pop(event)) {
552 switch (event.type) {
553 case SDL_JOYAXISMOTION:
554 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
555 break;
556 }
557 case SDL_JOYBUTTONUP:
558 case SDL_JOYHATMOTION:
559 return SDLEventToButtonParamPackage(event);
560 }
561 }
562 return {};
563 }
564};
565
566class SDLAnalogPoller final : public SDLPoller {
567public:
568 void Start() override {
569 SDLPoller::Start();
570
571 // Reset stored axes
572 analog_xaxis = -1;
573 analog_yaxis = -1;
574 analog_axes_joystick = -1;
575 }
576
577 Common::ParamPackage GetNextInput() override {
578 SDL_Event event;
579 while (event_queue.Pop(event)) {
580 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
581 continue;
582 }
583 // An analog device needs two axes, so we need to store the axis for later and wait for
584 // a second SDL event. The axes also must be from the same joystick.
585 int axis = event.jaxis.axis;
586 if (analog_xaxis == -1) {
587 analog_xaxis = axis;
588 analog_axes_joystick = event.jaxis.which;
589 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
590 analog_axes_joystick == event.jaxis.which) {
591 analog_yaxis = axis;
592 }
593 }
594 Common::ParamPackage params;
595 if (analog_xaxis != -1 && analog_yaxis != -1) {
596 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
597 params.Set("engine", "sdl");
598 params.Set("port", joystick->GetPort());
599 params.Set("guid", joystick->GetGUID());
600 params.Set("axis_x", analog_xaxis);
601 params.Set("axis_y", analog_yaxis);
602 analog_xaxis = -1;
603 analog_yaxis = -1;
604 analog_axes_joystick = -1;
605 return params;
606 }
607 return params;
608 }
609
610private:
611 int analog_xaxis = -1;
612 int analog_yaxis = -1;
613 SDL_JoystickID analog_axes_joystick = -1;
614};
615
616std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
617 InputCommon::Polling::DeviceType type) {
618 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
619 switch (type) {
620 case InputCommon::Polling::DeviceType::Analog:
621 pollers.push_back(std::make_unique<SDLAnalogPoller>());
622 break;
623 case InputCommon::Polling::DeviceType::Button:
624 pollers.push_back(std::make_unique<SDLButtonPoller>());
625 break;
626 }
627 return pollers;
628} 18}
629} // namespace Polling 19} // namespace InputCommon::SDL
630} // namespace SDL
631} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl.h b/src/input_common/sdl/sdl.h
index 0206860d3..d7f24c68a 100644
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -1,4 +1,4 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -7,45 +7,38 @@
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9#include "core/frontend/input.h" 9#include "core/frontend/input.h"
10#include "input_common/main.h"
10 11
11union SDL_Event; 12union SDL_Event;
13
12namespace Common { 14namespace Common {
13class ParamPackage; 15class ParamPackage;
14} 16} // namespace Common
15namespace InputCommon { 17
16namespace Polling { 18namespace InputCommon::Polling {
17class DevicePoller; 19class DevicePoller;
18enum class DeviceType; 20enum class DeviceType;
19} // namespace Polling 21} // namespace InputCommon::Polling
20} // namespace InputCommon
21
22namespace InputCommon {
23namespace SDL {
24
25/// Initializes and registers SDL device factories
26void Init();
27
28/// Unresisters SDL device factories and shut them down.
29void Shutdown();
30 22
31/// Needs to be called before SDL_QuitSubSystem. 23namespace InputCommon::SDL {
32void CloseSDLJoysticks();
33 24
34/// Handle SDL_Events for joysticks from SDL_PollEvent 25class State {
35void HandleGameControllerEvent(const SDL_Event& event); 26public:
27 using Pollers = std::vector<std::unique_ptr<Polling::DevicePoller>>;
36 28
37/// A Loop that calls HandleGameControllerEvent until Shutdown is called 29 /// Unregisters SDL device factories and shut them down.
38void PollLoop(); 30 virtual ~State() = default;
39 31
40/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice 32 virtual Pollers GetPollers(Polling::DeviceType type) = 0;
41Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event); 33};
42 34
43namespace Polling { 35class NullState : public State {
36public:
37 Pollers GetPollers(Polling::DeviceType type) override {
38 return {};
39 }
40};
44 41
45/// Get all DevicePoller that use the SDL backend for a specific device type 42std::unique_ptr<State> Init();
46std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
47 InputCommon::Polling::DeviceType type);
48 43
49} // namespace Polling 44} // namespace InputCommon::SDL
50} // namespace SDL
51} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
new file mode 100644
index 000000000..5949ecbae
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -0,0 +1,671 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "core/frontend/input.h"
24#include "input_common/sdl/sdl_impl.h"
25
26namespace InputCommon {
27
28namespace SDL {
29
30static std::string GetGUID(SDL_Joystick* joystick) {
31 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
32 char guid_str[33];
33 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
34 return guid_str;
35}
36
37/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
38static Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event);
39
40static int SDLEventWatcher(void* userdata, SDL_Event* event) {
41 SDLState* sdl_state = reinterpret_cast<SDLState*>(userdata);
42 // Don't handle the event if we are configuring
43 if (sdl_state->polling) {
44 sdl_state->event_queue.Push(*event);
45 } else {
46 sdl_state->HandleGameControllerEvent(*event);
47 }
48 return 0;
49}
50
51class SDLJoystick {
52public:
53 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
54 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
55 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
56
57 void SetButton(int button, bool value) {
58 std::lock_guard lock{mutex};
59 state.buttons[button] = value;
60 }
61
62 bool GetButton(int button) const {
63 std::lock_guard lock{mutex};
64 return state.buttons.at(button);
65 }
66
67 void SetAxis(int axis, Sint16 value) {
68 std::lock_guard lock{mutex};
69 state.axes[axis] = value;
70 }
71
72 float GetAxis(int axis) const {
73 std::lock_guard lock{mutex};
74 return state.axes.at(axis) / 32767.0f;
75 }
76
77 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
78 float x = GetAxis(axis_x);
79 float y = GetAxis(axis_y);
80 y = -y; // 3DS uses an y-axis inverse from SDL
81
82 // Make sure the coordinates are in the unit circle,
83 // otherwise normalize it.
84 float r = x * x + y * y;
85 if (r > 1.0f) {
86 r = std::sqrt(r);
87 x /= r;
88 y /= r;
89 }
90
91 return std::make_tuple(x, y);
92 }
93
94 void SetHat(int hat, Uint8 direction) {
95 std::lock_guard lock{mutex};
96 state.hats[hat] = direction;
97 }
98
99 bool GetHatDirection(int hat, Uint8 direction) const {
100 std::lock_guard lock{mutex};
101 return (state.hats.at(hat) & direction) != 0;
102 }
103 /**
104 * The guid of the joystick
105 */
106 const std::string& GetGUID() const {
107 return guid;
108 }
109
110 /**
111 * The number of joystick from the same type that were connected before this joystick
112 */
113 int GetPort() const {
114 return port;
115 }
116
117 SDL_Joystick* GetSDLJoystick() const {
118 return sdl_joystick.get();
119 }
120
121 void SetSDLJoystick(SDL_Joystick* joystick,
122 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
123 sdl_joystick =
124 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
125 }
126
127private:
128 struct State {
129 std::unordered_map<int, bool> buttons;
130 std::unordered_map<int, Sint16> axes;
131 std::unordered_map<int, Uint8> hats;
132 } state;
133 std::string guid;
134 int port;
135 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
136 mutable std::mutex mutex;
137};
138
139/**
140 * Get the nth joystick with the corresponding GUID
141 */
142std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
143 std::lock_guard lock{joystick_map_mutex};
144 const auto it = joystick_map.find(guid);
145 if (it != joystick_map.end()) {
146 while (it->second.size() <= port) {
147 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
148 [](SDL_Joystick*) {});
149 it->second.emplace_back(std::move(joystick));
150 }
151 return it->second[port];
152 }
153 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
154 return joystick_map[guid].emplace_back(std::move(joystick));
155}
156
157/**
158 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
159 * it to a SDLJoystick with the same guid and that port
160 */
161std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
162 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
163 const std::string guid = GetGUID(sdl_joystick);
164
165 std::lock_guard lock{joystick_map_mutex};
166 auto map_it = joystick_map.find(guid);
167 if (map_it != joystick_map.end()) {
168 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
169 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
170 return sdl_joystick == joystick->GetSDLJoystick();
171 });
172 if (vec_it != map_it->second.end()) {
173 // This is the common case: There is already an existing SDL_Joystick maped to a
174 // SDLJoystick. return the SDLJoystick
175 return *vec_it;
176 }
177 // Search for a SDLJoystick without a mapped SDL_Joystick...
178 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
179 [](const std::shared_ptr<SDLJoystick>& joystick) {
180 return !joystick->GetSDLJoystick();
181 });
182 if (nullptr_it != map_it->second.end()) {
183 // ... and map it
184 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
185 return *nullptr_it;
186 }
187 // There is no SDLJoystick without a mapped SDL_Joystick
188 // Create a new SDLJoystick
189 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
190 return map_it->second.emplace_back(std::move(joystick));
191 }
192 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
193 return joystick_map[guid].emplace_back(std::move(joystick));
194}
195
196void SDLState::InitJoystick(int joystick_index) {
197 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
198 if (!sdl_joystick) {
199 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
200 return;
201 }
202 const std::string guid = GetGUID(sdl_joystick);
203
204 std::lock_guard lock{joystick_map_mutex};
205 if (joystick_map.find(guid) == joystick_map.end()) {
206 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
207 joystick_map[guid].emplace_back(std::move(joystick));
208 return;
209 }
210 auto& joystick_guid_list = joystick_map[guid];
211 const auto it = std::find_if(
212 joystick_guid_list.begin(), joystick_guid_list.end(),
213 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
214 if (it != joystick_guid_list.end()) {
215 (*it)->SetSDLJoystick(sdl_joystick);
216 return;
217 }
218 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
219 joystick_guid_list.emplace_back(std::move(joystick));
220}
221
222void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
223 std::string guid = GetGUID(sdl_joystick);
224 std::shared_ptr<SDLJoystick> joystick;
225 {
226 std::lock_guard lock{joystick_map_mutex};
227 // This call to guid is safe since the joystick is guaranteed to be in the map
228 auto& joystick_guid_list = joystick_map[guid];
229 const auto joystick_it =
230 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
231 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
232 return joystick->GetSDLJoystick() == sdl_joystick;
233 });
234 joystick = *joystick_it;
235 }
236 // Destruct SDL_Joystick outside the lock guard because SDL can internally call event calback
237 // which locks the mutex again
238 joystick->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
239}
240
241void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
242 switch (event.type) {
243 case SDL_JOYBUTTONUP: {
244 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
245 joystick->SetButton(event.jbutton.button, false);
246 }
247 break;
248 }
249 case SDL_JOYBUTTONDOWN: {
250 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
251 joystick->SetButton(event.jbutton.button, true);
252 }
253 break;
254 }
255 case SDL_JOYHATMOTION: {
256 if (auto joystick = GetSDLJoystickBySDLID(event.jhat.which)) {
257 joystick->SetHat(event.jhat.hat, event.jhat.value);
258 }
259 break;
260 }
261 case SDL_JOYAXISMOTION: {
262 if (auto joystick = GetSDLJoystickBySDLID(event.jaxis.which)) {
263 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
264 }
265 break;
266 }
267 case SDL_JOYDEVICEREMOVED:
268 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
269 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
270 break;
271 case SDL_JOYDEVICEADDED:
272 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
273 InitJoystick(event.jdevice.which);
274 break;
275 }
276}
277
278void SDLState::CloseJoysticks() {
279 std::lock_guard lock{joystick_map_mutex};
280 joystick_map.clear();
281}
282
283class SDLButton final : public Input::ButtonDevice {
284public:
285 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
286 : joystick(std::move(joystick_)), button(button_) {}
287
288 bool GetStatus() const override {
289 return joystick->GetButton(button);
290 }
291
292private:
293 std::shared_ptr<SDLJoystick> joystick;
294 int button;
295};
296
297class SDLDirectionButton final : public Input::ButtonDevice {
298public:
299 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
300 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
301
302 bool GetStatus() const override {
303 return joystick->GetHatDirection(hat, direction);
304 }
305
306private:
307 std::shared_ptr<SDLJoystick> joystick;
308 int hat;
309 Uint8 direction;
310};
311
312class SDLAxisButton final : public Input::ButtonDevice {
313public:
314 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
315 bool trigger_if_greater_)
316 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
317 trigger_if_greater(trigger_if_greater_) {}
318
319 bool GetStatus() const override {
320 float axis_value = joystick->GetAxis(axis);
321 if (trigger_if_greater)
322 return axis_value > threshold;
323 return axis_value < threshold;
324 }
325
326private:
327 std::shared_ptr<SDLJoystick> joystick;
328 int axis;
329 float threshold;
330 bool trigger_if_greater;
331};
332
333class SDLAnalog final : public Input::AnalogDevice {
334public:
335 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, float deadzone_)
336 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_) {}
337
338 std::tuple<float, float> GetStatus() const override {
339 const auto [x, y] = joystick->GetAnalog(axis_x, axis_y);
340 const float r = std::sqrt((x * x) + (y * y));
341 if (r > deadzone) {
342 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
343 y / r * (r - deadzone) / (1 - deadzone));
344 }
345 return std::make_tuple<float, float>(0.0f, 0.0f);
346 }
347
348private:
349 std::shared_ptr<SDLJoystick> joystick;
350 const int axis_x;
351 const int axis_y;
352 const float deadzone;
353};
354
355/// A button device factory that creates button devices from SDL joystick
356class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
357public:
358 explicit SDLButtonFactory(SDLState& state_) : state(state_) {}
359
360 /**
361 * Creates a button device from a joystick button
362 * @param params contains parameters for creating the device:
363 * - "guid": the guid of the joystick to bind
364 * - "port": the nth joystick of the same type to bind
365 * - "button"(optional): the index of the button to bind
366 * - "hat"(optional): the index of the hat to bind as direction buttons
367 * - "axis"(optional): the index of the axis to bind
368 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
369 * "down", "left" or "right"
370 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
371 * triggered if the axis value crosses
372 * - "direction"(only used for axis): "+" means the button is triggered when the axis
373 * value is greater than the threshold; "-" means the button is triggered when the axis
374 * value is smaller than the threshold
375 */
376 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
377 const std::string guid = params.Get("guid", "0");
378 const int port = params.Get("port", 0);
379
380 auto joystick = state.GetSDLJoystickByGUID(guid, port);
381
382 if (params.Has("hat")) {
383 const int hat = params.Get("hat", 0);
384 const std::string direction_name = params.Get("direction", "");
385 Uint8 direction;
386 if (direction_name == "up") {
387 direction = SDL_HAT_UP;
388 } else if (direction_name == "down") {
389 direction = SDL_HAT_DOWN;
390 } else if (direction_name == "left") {
391 direction = SDL_HAT_LEFT;
392 } else if (direction_name == "right") {
393 direction = SDL_HAT_RIGHT;
394 } else {
395 direction = 0;
396 }
397 // This is necessary so accessing GetHat with hat won't crash
398 joystick->SetHat(hat, SDL_HAT_CENTERED);
399 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
400 }
401
402 if (params.Has("axis")) {
403 const int axis = params.Get("axis", 0);
404 const float threshold = params.Get("threshold", 0.5f);
405 const std::string direction_name = params.Get("direction", "");
406 bool trigger_if_greater;
407 if (direction_name == "+") {
408 trigger_if_greater = true;
409 } else if (direction_name == "-") {
410 trigger_if_greater = false;
411 } else {
412 trigger_if_greater = true;
413 LOG_ERROR(Input, "Unknown direction {}", direction_name);
414 }
415 // This is necessary so accessing GetAxis with axis won't crash
416 joystick->SetAxis(axis, 0);
417 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
418 }
419
420 const int button = params.Get("button", 0);
421 // This is necessary so accessing GetButton with button won't crash
422 joystick->SetButton(button, false);
423 return std::make_unique<SDLButton>(joystick, button);
424 }
425
426private:
427 SDLState& state;
428};
429
430/// An analog device factory that creates analog devices from SDL joystick
431class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
432public:
433 explicit SDLAnalogFactory(SDLState& state_) : state(state_) {}
434 /**
435 * Creates analog device from joystick axes
436 * @param params contains parameters for creating the device:
437 * - "guid": the guid of the joystick to bind
438 * - "port": the nth joystick of the same type
439 * - "axis_x": the index of the axis to be bind as x-axis
440 * - "axis_y": the index of the axis to be bind as y-axis
441 */
442 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
443 const std::string guid = params.Get("guid", "0");
444 const int port = params.Get("port", 0);
445 const int axis_x = params.Get("axis_x", 0);
446 const int axis_y = params.Get("axis_y", 1);
447 float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f);
448
449 auto joystick = state.GetSDLJoystickByGUID(guid, port);
450
451 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
452 joystick->SetAxis(axis_x, 0);
453 joystick->SetAxis(axis_y, 0);
454 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone);
455 }
456
457private:
458 SDLState& state;
459};
460
461SDLState::SDLState() {
462 using namespace Input;
463 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>(*this));
464 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>(*this));
465
466 // If the frontend is going to manage the event loop, then we dont start one here
467 start_thread = !SDL_WasInit(SDL_INIT_JOYSTICK);
468 if (start_thread && SDL_Init(SDL_INIT_JOYSTICK) < 0) {
469 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
470 return;
471 }
472 if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
473 LOG_ERROR(Input, "Failed to set Hint for background events", SDL_GetError());
474 }
475
476 SDL_AddEventWatch(&SDLEventWatcher, this);
477
478 initialized = true;
479 if (start_thread) {
480 poll_thread = std::thread([this] {
481 using namespace std::chrono_literals;
482 while (initialized) {
483 SDL_PumpEvents();
484 std::this_thread::sleep_for(10ms);
485 }
486 });
487 }
488 // Because the events for joystick connection happens before we have our event watcher added, we
489 // can just open all the joysticks right here
490 for (int i = 0; i < SDL_NumJoysticks(); ++i) {
491 InitJoystick(i);
492 }
493}
494
495SDLState::~SDLState() {
496 using namespace Input;
497 UnregisterFactory<ButtonDevice>("sdl");
498 UnregisterFactory<AnalogDevice>("sdl");
499
500 CloseJoysticks();
501 SDL_DelEventWatch(&SDLEventWatcher, this);
502
503 initialized = false;
504 if (start_thread) {
505 poll_thread.join();
506 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
507 }
508}
509
510Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) {
511 Common::ParamPackage params({{"engine", "sdl"}});
512
513 switch (event.type) {
514 case SDL_JOYAXISMOTION: {
515 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
516 params.Set("port", joystick->GetPort());
517 params.Set("guid", joystick->GetGUID());
518 params.Set("axis", event.jaxis.axis);
519 if (event.jaxis.value > 0) {
520 params.Set("direction", "+");
521 params.Set("threshold", "0.5");
522 } else {
523 params.Set("direction", "-");
524 params.Set("threshold", "-0.5");
525 }
526 break;
527 }
528 case SDL_JOYBUTTONUP: {
529 auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which);
530 params.Set("port", joystick->GetPort());
531 params.Set("guid", joystick->GetGUID());
532 params.Set("button", event.jbutton.button);
533 break;
534 }
535 case SDL_JOYHATMOTION: {
536 auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which);
537 params.Set("port", joystick->GetPort());
538 params.Set("guid", joystick->GetGUID());
539 params.Set("hat", event.jhat.hat);
540 switch (event.jhat.value) {
541 case SDL_HAT_UP:
542 params.Set("direction", "up");
543 break;
544 case SDL_HAT_DOWN:
545 params.Set("direction", "down");
546 break;
547 case SDL_HAT_LEFT:
548 params.Set("direction", "left");
549 break;
550 case SDL_HAT_RIGHT:
551 params.Set("direction", "right");
552 break;
553 default:
554 return {};
555 }
556 break;
557 }
558 }
559 return params;
560}
561
562namespace Polling {
563
564class SDLPoller : public InputCommon::Polling::DevicePoller {
565public:
566 explicit SDLPoller(SDLState& state_) : state(state_) {}
567
568 void Start() override {
569 state.event_queue.Clear();
570 state.polling = true;
571 }
572
573 void Stop() override {
574 state.polling = false;
575 }
576
577protected:
578 SDLState& state;
579};
580
581class SDLButtonPoller final : public SDLPoller {
582public:
583 explicit SDLButtonPoller(SDLState& state_) : SDLPoller(state_) {}
584
585 Common::ParamPackage GetNextInput() override {
586 SDL_Event event;
587 while (state.event_queue.Pop(event)) {
588 switch (event.type) {
589 case SDL_JOYAXISMOTION:
590 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
591 break;
592 }
593 case SDL_JOYBUTTONUP:
594 case SDL_JOYHATMOTION:
595 return SDLEventToButtonParamPackage(state, event);
596 }
597 }
598 return {};
599 }
600};
601
602class SDLAnalogPoller final : public SDLPoller {
603public:
604 explicit SDLAnalogPoller(SDLState& state_) : SDLPoller(state_) {}
605
606 void Start() override {
607 SDLPoller::Start();
608
609 // Reset stored axes
610 analog_xaxis = -1;
611 analog_yaxis = -1;
612 analog_axes_joystick = -1;
613 }
614
615 Common::ParamPackage GetNextInput() override {
616 SDL_Event event;
617 while (state.event_queue.Pop(event)) {
618 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
619 continue;
620 }
621 // An analog device needs two axes, so we need to store the axis for later and wait for
622 // a second SDL event. The axes also must be from the same joystick.
623 int axis = event.jaxis.axis;
624 if (analog_xaxis == -1) {
625 analog_xaxis = axis;
626 analog_axes_joystick = event.jaxis.which;
627 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
628 analog_axes_joystick == event.jaxis.which) {
629 analog_yaxis = axis;
630 }
631 }
632 Common::ParamPackage params;
633 if (analog_xaxis != -1 && analog_yaxis != -1) {
634 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
635 params.Set("engine", "sdl");
636 params.Set("port", joystick->GetPort());
637 params.Set("guid", joystick->GetGUID());
638 params.Set("axis_x", analog_xaxis);
639 params.Set("axis_y", analog_yaxis);
640 analog_xaxis = -1;
641 analog_yaxis = -1;
642 analog_axes_joystick = -1;
643 return params;
644 }
645 return params;
646 }
647
648private:
649 int analog_xaxis = -1;
650 int analog_yaxis = -1;
651 SDL_JoystickID analog_axes_joystick = -1;
652};
653} // namespace Polling
654
655SDLState::Pollers SDLState::GetPollers(InputCommon::Polling::DeviceType type) {
656 Pollers pollers;
657
658 switch (type) {
659 case InputCommon::Polling::DeviceType::Analog:
660 pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
661 break;
662 case InputCommon::Polling::DeviceType::Button:
663 pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
664 break;
665 }
666
667 return pollers;
668}
669
670} // namespace SDL
671} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
new file mode 100644
index 000000000..2579741d6
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.h
@@ -0,0 +1,63 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <thread>
10#include "common/threadsafe_queue.h"
11#include "input_common/sdl/sdl.h"
12
13union SDL_Event;
14using SDL_Joystick = struct _SDL_Joystick;
15using SDL_JoystickID = s32;
16
17namespace InputCommon::SDL {
18
19class SDLJoystick;
20class SDLButtonFactory;
21class SDLAnalogFactory;
22
23class SDLState : public State {
24public:
25 /// Initializes and registers SDL device factories
26 SDLState();
27
28 /// Unregisters SDL device factories and shut them down.
29 ~SDLState() override;
30
31 /// Handle SDL_Events for joysticks from SDL_PollEvent
32 void HandleGameControllerEvent(const SDL_Event& event);
33
34 std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id);
35 std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
36
37 /// Get all DevicePoller that use the SDL backend for a specific device type
38 Pollers GetPollers(Polling::DeviceType type) override;
39
40 /// Used by the Pollers during config
41 std::atomic<bool> polling = false;
42 Common::SPSCQueue<SDL_Event> event_queue;
43
44private:
45 void InitJoystick(int joystick_index);
46 void CloseJoystick(SDL_Joystick* sdl_joystick);
47
48 /// Needs to be called before SDL_QuitSubSystem.
49 void CloseJoysticks();
50
51 /// Map of GUID of a list of corresponding virtual Joysticks
52 std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
53 std::mutex joystick_map_mutex;
54
55 std::shared_ptr<SDLButtonFactory> button_factory;
56 std::shared_ptr<SDLAnalogFactory> analog_factory;
57
58 bool start_thread = false;
59 std::atomic<bool> initialized = false;
60
61 std::thread poll_thread;
62};
63} // namespace InputCommon::SDL
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 37f09ce5f..c7038b217 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,4 +1,7 @@
1add_executable(tests 1add_executable(tests
2 common/bit_field.cpp
3 common/bit_utils.cpp
4 common/multi_level_queue.cpp
2 common/param_package.cpp 5 common/param_package.cpp
3 common/ring_buffer.cpp 6 common/ring_buffer.cpp
4 core/arm/arm_test_common.cpp 7 core/arm/arm_test_common.cpp
diff --git a/src/tests/common/bit_field.cpp b/src/tests/common/bit_field.cpp
new file mode 100644
index 000000000..8ca1889f9
--- /dev/null
+++ b/src/tests/common/bit_field.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include <type_traits>
8#include <catch2/catch.hpp>
9#include "common/bit_field.h"
10
11TEST_CASE("BitField", "[common]") {
12 enum class TestEnum : u32 {
13 A = 0b10111101,
14 B = 0b10101110,
15 C = 0b00001111,
16 };
17
18 union LEBitField {
19 u32_le raw;
20 BitField<0, 6, u32> a;
21 BitField<6, 4, s32> b;
22 BitField<10, 8, TestEnum> c;
23 BitField<18, 14, u32> d;
24 } le_bitfield;
25
26 union BEBitField {
27 u32_be raw;
28 BitFieldBE<0, 6, u32> a;
29 BitFieldBE<6, 4, s32> b;
30 BitFieldBE<10, 8, TestEnum> c;
31 BitFieldBE<18, 14, u32> d;
32 } be_bitfield;
33
34 static_assert(sizeof(LEBitField) == sizeof(u32));
35 static_assert(sizeof(BEBitField) == sizeof(u32));
36 static_assert(std::is_trivially_copyable_v<LEBitField>);
37 static_assert(std::is_trivially_copyable_v<BEBitField>);
38
39 std::array<u8, 4> raw{{
40 0b01101100,
41 0b11110110,
42 0b10111010,
43 0b11101100,
44 }};
45
46 std::memcpy(&le_bitfield, &raw, sizeof(raw));
47 std::memcpy(&be_bitfield, &raw, sizeof(raw));
48
49 // bit fields: 11101100101110'10111101'1001'101100
50 REQUIRE(le_bitfield.raw == 0b11101100'10111010'11110110'01101100);
51 REQUIRE(le_bitfield.a == 0b101100);
52 REQUIRE(le_bitfield.b == -7); // 1001 as two's complement
53 REQUIRE(le_bitfield.c == TestEnum::A);
54 REQUIRE(le_bitfield.d == 0b11101100101110);
55
56 le_bitfield.a.Assign(0b000111);
57 le_bitfield.b.Assign(-1);
58 le_bitfield.c.Assign(TestEnum::C);
59 le_bitfield.d.Assign(0b01010101010101);
60 std::memcpy(&raw, &le_bitfield, sizeof(raw));
61 // bit fields: 01010101010101'00001111'1111'000111
62 REQUIRE(le_bitfield.raw == 0b01010101'01010100'00111111'11000111);
63 REQUIRE(raw == std::array<u8, 4>{{
64 0b11000111,
65 0b00111111,
66 0b01010100,
67 0b01010101,
68 }});
69
70 // bit fields: 01101100111101'10101110'1011'101100
71 REQUIRE(be_bitfield.raw == 0b01101100'11110110'10111010'11101100);
72 REQUIRE(be_bitfield.a == 0b101100);
73 REQUIRE(be_bitfield.b == -5); // 1011 as two's complement
74 REQUIRE(be_bitfield.c == TestEnum::B);
75 REQUIRE(be_bitfield.d == 0b01101100111101);
76
77 be_bitfield.a.Assign(0b000111);
78 be_bitfield.b.Assign(-1);
79 be_bitfield.c.Assign(TestEnum::C);
80 be_bitfield.d.Assign(0b01010101010101);
81 std::memcpy(&raw, &be_bitfield, sizeof(raw));
82 // bit fields: 01010101010101'00001111'1111'000111
83 REQUIRE(be_bitfield.raw == 0b01010101'01010100'00111111'11000111);
84 REQUIRE(raw == std::array<u8, 4>{{
85 0b01010101,
86 0b01010100,
87 0b00111111,
88 0b11000111,
89 }});
90}
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp
new file mode 100644
index 000000000..479b5995a
--- /dev/null
+++ b/src/tests/common/bit_utils.cpp
@@ -0,0 +1,23 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <catch2/catch.hpp>
6#include <math.h>
7#include "common/bit_util.h"
8
9namespace Common {
10
11TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
12 REQUIRE(Common::CountTrailingZeroes32(0) == 32);
13 REQUIRE(Common::CountTrailingZeroes64(0) == 64);
14 REQUIRE(Common::CountTrailingZeroes32(9) == 0);
15 REQUIRE(Common::CountTrailingZeroes32(8) == 3);
16 REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
17 REQUIRE(Common::CountTrailingZeroes64(9) == 0);
18 REQUIRE(Common::CountTrailingZeroes64(8) == 3);
19 REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
20 REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
21}
22
23} // namespace Common
diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp
new file mode 100644
index 000000000..cca7ec7da
--- /dev/null
+++ b/src/tests/common/multi_level_queue.cpp
@@ -0,0 +1,55 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <catch2/catch.hpp>
6#include <math.h>
7#include "common/common_types.h"
8#include "common/multi_level_queue.h"
9
10namespace Common {
11
12TEST_CASE("MultiLevelQueue", "[common]") {
13 std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
14 Common::MultiLevelQueue<f32, 64> mlq;
15 REQUIRE(mlq.empty());
16 mlq.add(values[2], 2);
17 mlq.add(values[7], 7);
18 mlq.add(values[3], 3);
19 mlq.add(values[4], 4);
20 mlq.add(values[0], 0);
21 mlq.add(values[5], 5);
22 mlq.add(values[6], 6);
23 mlq.add(values[1], 1);
24 u32 index = 0;
25 bool all_set = true;
26 for (auto& f : mlq) {
27 all_set &= (f == values[index]);
28 index++;
29 }
30 REQUIRE(all_set);
31 REQUIRE(!mlq.empty());
32 f32 v = 8.0;
33 mlq.add(v, 2);
34 v = -7.0;
35 mlq.add(v, 2, false);
36 REQUIRE(mlq.front(2) == -7.0);
37 mlq.yield(2);
38 REQUIRE(mlq.front(2) == values[2]);
39 REQUIRE(mlq.back(2) == -7.0);
40 REQUIRE(mlq.empty(8));
41 v = 10.0;
42 mlq.add(v, 8);
43 mlq.adjust(v, 8, 9);
44 REQUIRE(mlq.front(9) == v);
45 REQUIRE(mlq.empty(8));
46 REQUIRE(!mlq.empty(9));
47 mlq.adjust(values[0], 0, 9);
48 REQUIRE(mlq.highest_priority_set() == 1);
49 REQUIRE(mlq.lowest_priority_set() == 9);
50 mlq.remove(values[1], 1);
51 REQUIRE(mlq.highest_priority_set() == 2);
52 REQUIRE(mlq.empty(1));
53}
54
55} // namespace Common
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..3e1a735c3 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6 6
7#include "common/page_table.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
9#include "core/memory.h" 10#include "core/memory.h"
@@ -13,16 +14,16 @@
13namespace ArmTests { 14namespace ArmTests {
14 15
15TestEnvironment::TestEnvironment(bool mutable_memory_) 16TestEnvironment::TestEnvironment(bool mutable_memory_)
16 : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { 17 : mutable_memory(mutable_memory_),
17 18 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
18 auto process = Kernel::Process::Create(kernel, ""); 19 auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
19 kernel.MakeCurrentProcess(process.get()); 20 kernel.MakeCurrentProcess(process.get());
20 page_table = &Core::CurrentProcess()->VMManager().page_table; 21 page_table = &process->VMManager().page_table;
21 22
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 23 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 24 page_table->special_regions.clear();
24 std::fill(page_table->attributes.begin(), page_table->attributes.end(), 25 std::fill(page_table->attributes.begin(), page_table->attributes.end(),
25 Memory::PageType::Unmapped); 26 Common::PageType::Unmapped);
26 27
27 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); 28 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
28 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); 29 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
12#include "core/hle/kernel/kernel.h" 13#include "core/hle/kernel/kernel.h"
13#include "core/memory_hook.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
@@ -58,7 +58,7 @@ public:
58 58
59private: 59private:
60 friend struct TestMemory; 60 friend struct TestMemory;
61 struct TestMemory final : Memory::MemoryHook { 61 struct TestMemory final : Common::MemoryHook {
62 explicit TestMemory(TestEnvironment* env_) : env(env_) {} 62 explicit TestMemory(TestEnvironment* env_) : env(env_) {}
63 TestEnvironment* env; 63 TestEnvironment* env;
64 64
@@ -86,7 +86,7 @@ private:
86 bool mutable_memory; 86 bool mutable_memory;
87 std::shared_ptr<TestMemory> test_memory; 87 std::shared_ptr<TestMemory> test_memory;
88 std::vector<WriteRecord> write_records; 88 std::vector<WriteRecord> write_records;
89 Memory::PageTable* page_table = nullptr; 89 Common::PageTable* page_table = nullptr;
90 Kernel::KernelCore kernel; 90 Kernel::KernelCore kernel;
91}; 91};
92 92
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 2242c14cf..340d6a272 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
28 REQUIRE(lateness == cycles_late); 28 REQUIRE(lateness == cycles_late);
29} 29}
30 30
31class ScopeInit final { 31struct ScopeInit final {
32public:
33 ScopeInit() { 32 ScopeInit() {
34 CoreTiming::Init(); 33 core_timing.Initialize();
35 } 34 }
36 ~ScopeInit() { 35 ~ScopeInit() {
37 CoreTiming::Shutdown(); 36 core_timing.Shutdown();
38 } 37 }
38
39 Core::Timing::CoreTiming core_timing;
39}; 40};
40 41
41static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0, 42static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
42 int cpu_downcount = 0) { 43 int expected_lateness = 0, int cpu_downcount = 0) {
43 callbacks_ran_flags = 0; 44 callbacks_ran_flags = 0;
44 expected_callback = CB_IDS[idx]; 45 expected_callback = CB_IDS[idx];
45 lateness = expected_lateness; 46 lateness = expected_lateness;
46 47
47 CoreTiming::AddTicks(CoreTiming::GetDowncount() - 48 // Pretend we executed X cycles of instructions.
48 cpu_downcount); // Pretend we executed X cycles of instructions. 49 core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
49 CoreTiming::Advance(); 50 core_timing.Advance();
50 51
51 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); 52 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
52 REQUIRE(downcount == CoreTiming::GetDowncount()); 53 REQUIRE(downcount == core_timing.GetDowncount());
53} 54}
54 55
55TEST_CASE("CoreTiming[BasicOrder]", "[core]") { 56TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
56 ScopeInit guard; 57 ScopeInit guard;
58 auto& core_timing = guard.core_timing;
57 59
58 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 60 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
59 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 61 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
60 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 62 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
61 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); 63 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
62 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); 64 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
63 65
64 // Enter slice 0 66 // Enter slice 0
65 CoreTiming::Advance(); 67 core_timing.Advance();
66 68
67 // D -> B -> C -> A -> E 69 // D -> B -> C -> A -> E
68 CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); 70 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
69 REQUIRE(1000 == CoreTiming::GetDowncount()); 71 REQUIRE(1000 == core_timing.GetDowncount());
70 CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]); 72 core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
71 REQUIRE(500 == CoreTiming::GetDowncount()); 73 REQUIRE(500 == core_timing.GetDowncount());
72 CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]); 74 core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
73 REQUIRE(500 == CoreTiming::GetDowncount()); 75 REQUIRE(500 == core_timing.GetDowncount());
74 CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]); 76 core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
75 REQUIRE(100 == CoreTiming::GetDowncount()); 77 REQUIRE(100 == core_timing.GetDowncount());
76 CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]); 78 core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
77 REQUIRE(100 == CoreTiming::GetDowncount()); 79 REQUIRE(100 == core_timing.GetDowncount());
78 80
79 AdvanceAndCheck(3, 400); 81 AdvanceAndCheck(core_timing, 3, 400);
80 AdvanceAndCheck(1, 300); 82 AdvanceAndCheck(core_timing, 1, 300);
81 AdvanceAndCheck(2, 200); 83 AdvanceAndCheck(core_timing, 2, 200);
82 AdvanceAndCheck(0, 200); 84 AdvanceAndCheck(core_timing, 0, 200);
83 AdvanceAndCheck(4, MAX_SLICE_LENGTH); 85 AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
84} 86}
85 87
86TEST_CASE("CoreTiming[Threadsave]", "[core]") { 88TEST_CASE("CoreTiming[Threadsave]", "[core]") {
87 ScopeInit guard; 89 ScopeInit guard;
90 auto& core_timing = guard.core_timing;
88 91
89 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 92 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
90 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 93 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
91 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 94 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
92 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); 95 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
93 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); 96 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
94 97
95 // Enter slice 0 98 // Enter slice 0
96 CoreTiming::Advance(); 99 core_timing.Advance();
97 100
98 // D -> B -> C -> A -> E 101 // D -> B -> C -> A -> E
99 CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); 102 core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
100 // Manually force since ScheduleEventThreadsafe doesn't call it 103 // Manually force since ScheduleEventThreadsafe doesn't call it
101 CoreTiming::ForceExceptionCheck(1000); 104 core_timing.ForceExceptionCheck(1000);
102 REQUIRE(1000 == CoreTiming::GetDowncount()); 105 REQUIRE(1000 == core_timing.GetDowncount());
103 CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); 106 core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
104 // Manually force since ScheduleEventThreadsafe doesn't call it 107 // Manually force since ScheduleEventThreadsafe doesn't call it
105 CoreTiming::ForceExceptionCheck(500); 108 core_timing.ForceExceptionCheck(500);
106 REQUIRE(500 == CoreTiming::GetDowncount()); 109 REQUIRE(500 == core_timing.GetDowncount());
107 CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); 110 core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
108 // Manually force since ScheduleEventThreadsafe doesn't call it 111 // Manually force since ScheduleEventThreadsafe doesn't call it
109 CoreTiming::ForceExceptionCheck(800); 112 core_timing.ForceExceptionCheck(800);
110 REQUIRE(500 == CoreTiming::GetDowncount()); 113 REQUIRE(500 == core_timing.GetDowncount());
111 CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); 114 core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
112 // Manually force since ScheduleEventThreadsafe doesn't call it 115 // Manually force since ScheduleEventThreadsafe doesn't call it
113 CoreTiming::ForceExceptionCheck(100); 116 core_timing.ForceExceptionCheck(100);
114 REQUIRE(100 == CoreTiming::GetDowncount()); 117 REQUIRE(100 == core_timing.GetDowncount());
115 CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); 118 core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
116 // Manually force since ScheduleEventThreadsafe doesn't call it 119 // Manually force since ScheduleEventThreadsafe doesn't call it
117 CoreTiming::ForceExceptionCheck(1200); 120 core_timing.ForceExceptionCheck(1200);
118 REQUIRE(100 == CoreTiming::GetDowncount()); 121 REQUIRE(100 == core_timing.GetDowncount());
119 122
120 AdvanceAndCheck(3, 400); 123 AdvanceAndCheck(core_timing, 3, 400);
121 AdvanceAndCheck(1, 300); 124 AdvanceAndCheck(core_timing, 1, 300);
122 AdvanceAndCheck(2, 200); 125 AdvanceAndCheck(core_timing, 2, 200);
123 AdvanceAndCheck(0, 200); 126 AdvanceAndCheck(core_timing, 0, 200);
124 AdvanceAndCheck(4, MAX_SLICE_LENGTH); 127 AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
125} 128}
126 129
127namespace SharedSlotTest { 130namespace SharedSlotTest {
@@ -142,59 +145,63 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
142 using namespace SharedSlotTest; 145 using namespace SharedSlotTest;
143 146
144 ScopeInit guard; 147 ScopeInit guard;
148 auto& core_timing = guard.core_timing;
145 149
146 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>); 150 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
147 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>); 151 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
148 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>); 152 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
149 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>); 153 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
150 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>); 154 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);
151 155
152 CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); 156 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
153 CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); 157 core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
154 CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]); 158 core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
155 CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]); 159 core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
156 CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]); 160 core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);
157 161
158 // Enter slice 0 162 // Enter slice 0
159 CoreTiming::Advance(); 163 core_timing.Advance();
160 REQUIRE(1000 == CoreTiming::GetDowncount()); 164 REQUIRE(1000 == core_timing.GetDowncount());
161 165
162 callbacks_ran_flags = 0; 166 callbacks_ran_flags = 0;
163 counter = 0; 167 counter = 0;
164 lateness = 0; 168 lateness = 0;
165 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 169 core_timing.AddTicks(core_timing.GetDowncount());
166 CoreTiming::Advance(); 170 core_timing.Advance();
167 REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); 171 REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
168 REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); 172 REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
169} 173}
170 174
171TEST_CASE("CoreTiming[PredictableLateness]", "[core]") { 175TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
172 ScopeInit guard; 176 ScopeInit guard;
177 auto& core_timing = guard.core_timing;
173 178
174 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 179 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
175 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 180 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
176 181
177 // Enter slice 0 182 // Enter slice 0
178 CoreTiming::Advance(); 183 core_timing.Advance();
179 184
180 CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]); 185 core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
181 CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]); 186 core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
182 187
183 AdvanceAndCheck(0, 90, 10, -10); // (100 - 10) 188 AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
184 AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50); 189 AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
185} 190}
186 191
187namespace ChainSchedulingTest { 192namespace ChainSchedulingTest {
188static int reschedules = 0; 193static int reschedules = 0;
189 194
190static void RescheduleCallback(u64 userdata, s64 cycles_late) { 195static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
196 s64 cycles_late) {
191 --reschedules; 197 --reschedules;
192 REQUIRE(reschedules >= 0); 198 REQUIRE(reschedules >= 0);
193 REQUIRE(lateness == cycles_late); 199 REQUIRE(lateness == cycles_late);
194 200
195 if (reschedules > 0) 201 if (reschedules > 0) {
196 CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata), 202 core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
197 userdata); 203 userdata);
204 }
198} 205}
199} // namespace ChainSchedulingTest 206} // namespace ChainSchedulingTest
200 207
@@ -202,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
202 using namespace ChainSchedulingTest; 209 using namespace ChainSchedulingTest;
203 210
204 ScopeInit guard; 211 ScopeInit guard;
212 auto& core_timing = guard.core_timing;
205 213
206 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 214 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
207 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 215 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
208 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 216 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
209 CoreTiming::EventType* cb_rs = 217 Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
210 CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback); 218 "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
219 RescheduleCallback(core_timing, userdata, cycles_late);
220 });
211 221
212 // Enter slice 0 222 // Enter slice 0
213 CoreTiming::Advance(); 223 core_timing.Advance();
214 224
215 CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]); 225 core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
216 CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); 226 core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
217 CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]); 227 core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
218 CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); 228 core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
219 REQUIRE(800 == CoreTiming::GetDowncount()); 229 REQUIRE(800 == core_timing.GetDowncount());
220 230
221 reschedules = 3; 231 reschedules = 3;
222 AdvanceAndCheck(0, 200); // cb_a 232 AdvanceAndCheck(core_timing, 0, 200); // cb_a
223 AdvanceAndCheck(1, 1000); // cb_b, cb_rs 233 AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
224 REQUIRE(2 == reschedules); 234 REQUIRE(2 == reschedules);
225 235
226 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 236 core_timing.AddTicks(core_timing.GetDowncount());
227 CoreTiming::Advance(); // cb_rs 237 core_timing.Advance(); // cb_rs
228 REQUIRE(1 == reschedules); 238 REQUIRE(1 == reschedules);
229 REQUIRE(200 == CoreTiming::GetDowncount()); 239 REQUIRE(200 == core_timing.GetDowncount());
230 240
231 AdvanceAndCheck(2, 800); // cb_c 241 AdvanceAndCheck(core_timing, 2, 800); // cb_c
232 242
233 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 243 core_timing.AddTicks(core_timing.GetDowncount());
234 CoreTiming::Advance(); // cb_rs 244 core_timing.Advance(); // cb_rs
235 REQUIRE(0 == reschedules); 245 REQUIRE(0 == reschedules);
236 REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); 246 REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
237} 247}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 33e507e69..242a0d1cd 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,18 +5,24 @@ add_library(video_core STATIC
5 debug_utils/debug_utils.h 5 debug_utils/debug_utils.h
6 engines/fermi_2d.cpp 6 engines/fermi_2d.cpp
7 engines/fermi_2d.h 7 engines/fermi_2d.h
8 engines/kepler_compute.cpp
9 engines/kepler_compute.h
8 engines/kepler_memory.cpp 10 engines/kepler_memory.cpp
9 engines/kepler_memory.h 11 engines/kepler_memory.h
10 engines/maxwell_3d.cpp 12 engines/maxwell_3d.cpp
11 engines/maxwell_3d.h 13 engines/maxwell_3d.h
12 engines/maxwell_compute.cpp
13 engines/maxwell_compute.h
14 engines/maxwell_dma.cpp 14 engines/maxwell_dma.cpp
15 engines/maxwell_dma.h 15 engines/maxwell_dma.h
16 engines/shader_bytecode.h 16 engines/shader_bytecode.h
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -74,6 +80,7 @@ add_library(video_core STATIC
74 shader/decode/hfma2.cpp 80 shader/decode/hfma2.cpp
75 shader/decode/conversion.cpp 81 shader/decode/conversion.cpp
76 shader/decode/memory.cpp 82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
77 shader/decode/float_set_predicate.cpp 84 shader/decode/float_set_predicate.cpp
78 shader/decode/integer_set_predicate.cpp 85 shader/decode/integer_set_predicate.cpp
79 shader/decode/half_set_predicate.cpp 86 shader/decode/half_set_predicate.cpp
@@ -94,6 +101,8 @@ add_library(video_core STATIC
94 surface.h 101 surface.h
95 textures/astc.cpp 102 textures/astc.cpp
96 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
97 textures/decoders.cpp 106 textures/decoders.cpp
98 textures/decoders.h 107 textures/decoders.h
99 textures/texture.h 108 textures/texture.h
@@ -101,7 +110,33 @@ add_library(video_core STATIC
101 video_core.h 110 video_core.h
102) 111)
103 112
113if (ENABLE_VULKAN)
114 target_sources(video_core PRIVATE
115 renderer_vulkan/declarations.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
118 renderer_vulkan/vk_buffer_cache.cpp
119 renderer_vulkan/vk_buffer_cache.h
120 renderer_vulkan/vk_device.cpp
121 renderer_vulkan/vk_device.h
122 renderer_vulkan/vk_memory_manager.cpp
123 renderer_vulkan/vk_memory_manager.h
124 renderer_vulkan/vk_resource_manager.cpp
125 renderer_vulkan/vk_resource_manager.h
126 renderer_vulkan/vk_sampler_cache.cpp
127 renderer_vulkan/vk_sampler_cache.h
128 renderer_vulkan/vk_scheduler.cpp
129 renderer_vulkan/vk_scheduler.h
130 renderer_vulkan/vk_stream_buffer.cpp
131 renderer_vulkan/vk_stream_buffer.h
132 renderer_vulkan/vk_swapchain.cpp
133 renderer_vulkan/vk_swapchain.h)
134
135 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
136 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
137endif()
138
104create_target_directory_groups(video_core) 139create_target_directory_groups(video_core)
105 140
106target_link_libraries(video_core PUBLIC common core) 141target_link_libraries(video_core PUBLIC common core)
107target_link_libraries(video_core PRIVATE glad lz4_static) 142target_link_libraries(video_core PRIVATE glad)
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5ffb492ea..f0ef67535 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -10,7 +10,7 @@ namespace Tegra {
10 10
11void DebugContext::DoOnEvent(Event event, void* data) { 11void DebugContext::DoOnEvent(Event event, void* data) {
12 { 12 {
13 std::unique_lock<std::mutex> lock(breakpoint_mutex); 13 std::unique_lock lock{breakpoint_mutex};
14 14
15 // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will 15 // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
16 // show on debug widgets 16 // show on debug widgets
@@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
32 32
33void DebugContext::Resume() { 33void DebugContext::Resume() {
34 { 34 {
35 std::lock_guard<std::mutex> lock(breakpoint_mutex); 35 std::lock_guard lock{breakpoint_mutex};
36 36
37 // Tell all observers that we are about to resume 37 // Tell all observers that we are about to resume
38 for (auto& breakpoint_observer : breakpoint_observers) { 38 for (auto& breakpoint_observer : breakpoint_observers) {
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index c235faf46..ac3a2eb01 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -40,7 +40,7 @@ public:
40 /// Constructs the object such that it observes events of the given DebugContext. 40 /// Constructs the object such that it observes events of the given DebugContext.
41 explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context) 41 explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
42 : context_weak(debug_context) { 42 : context_weak(debug_context) {
43 std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); 43 std::unique_lock lock{debug_context->breakpoint_mutex};
44 debug_context->breakpoint_observers.push_back(this); 44 debug_context->breakpoint_observers.push_back(this);
45 } 45 }
46 46
@@ -48,7 +48,7 @@ public:
48 auto context = context_weak.lock(); 48 auto context = context_weak.lock();
49 if (context) { 49 if (context) {
50 { 50 {
51 std::unique_lock<std::mutex> lock(context->breakpoint_mutex); 51 std::unique_lock lock{context->breakpoint_mutex};
52 context->breakpoint_observers.remove(this); 52 context->breakpoint_observers.remove(this);
53 } 53 }
54 54
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index eb9bf1878..8b1bea1ae 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,18 +33,33 @@ void DmaPusher::DispatchCalls() {
33} 33}
34 34
35bool DmaPusher::Step() { 35bool DmaPusher::Step() {
36 if (dma_get != dma_put) { 36 if (!ib_enable || dma_pushbuffer.empty()) {
37 // Push buffer non-empty, read a word 37 // pushbuffer empty and IB empty or nonexistent - nothing to do
38 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); 38 return false;
39 ASSERT_MSG(address, "Invalid GPU address"); 39 }
40 40
41 const CommandHeader command_header{Memory::Read32(*address)}; 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main;
42 46
43 dma_get += sizeof(u32); 47 if (dma_pushbuffer_subindex >= command_list.size()) {
48 // We've gone through the current list, remove it from the queue
49 dma_pushbuffer.pop();
50 dma_pushbuffer_subindex = 0;
51 }
44 52
45 if (!non_main) { 53 if (command_list_header.size == 0) {
46 dma_mget = dma_get; 54 return true;
47 } 55 }
56
57 // Push buffer non-empty, read a word
58 command_headers.resize(command_list_header.size);
59 gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
60 command_list_header.size * sizeof(u32));
61
62 for (const CommandHeader& command_header : command_headers) {
48 63
49 // now, see if we're in the middle of a command 64 // now, see if we're in the middle of a command
50 if (dma_state.length_pending) { 65 if (dma_state.length_pending) {
@@ -91,22 +106,11 @@ bool DmaPusher::Step() {
91 break; 106 break;
92 } 107 }
93 } 108 }
94 } else if (ib_enable && !dma_pushbuffer.empty()) { 109 }
95 // Current pushbuffer empty, but we have more IB entries to read 110
96 const CommandList& command_list{dma_pushbuffer.front()}; 111 if (!non_main) {
97 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 112 // TODO (degasus): This is dead code, as dma_mget is never read.
98 dma_get = command_list_header.addr; 113 dma_mget = dma_put;
99 dma_put = dma_get + command_list_header.size * sizeof(u32);
100 non_main = command_list_header.is_non_main;
101
102 if (dma_pushbuffer_subindex >= command_list.size()) {
103 // We've gone through the current list, remove it from the queue
104 dma_pushbuffer.pop();
105 dma_pushbuffer_subindex = 0;
106 }
107 } else {
108 // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
109 return {};
110 } 114 }
111 115
112 return true; 116 return true;
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1097e5c49..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
9 9
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/memory_manager.h"
13 12
14namespace Tegra { 13namespace Tegra {
15 14
@@ -75,6 +74,8 @@ private:
75 74
76 GPU& gpu; 75 GPU& gpu;
77 76
77 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
78
78 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 79 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
79 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer 80 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
80 81
@@ -89,11 +90,8 @@ private:
89 DmaState dma_state{}; 90 DmaState dma_state{};
90 bool dma_increment_once{}; 91 bool dma_increment_once{};
91 92
92 GPUVAddr dma_put{}; ///< pushbuffer current end address
93 GPUVAddr dma_get{}; ///< pushbuffer current read address
94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 93 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
95 bool ib_enable{true}; ///< IB mode enabled 94 bool ib_enable{true}; ///< IB mode enabled
96 bool non_main{}; ///< non-main pushbuffer active
97}; 95};
98 96
99} // namespace Tegra 97} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 9f1533263..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "common/assert.h"
6#include "core/memory.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
7#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10#include "video_core/textures/decoders.h"
11 10
12namespace Tegra::Engines { 11namespace Tegra::Engines {
13 12
@@ -21,7 +20,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
21 regs.reg_array[method_call.method] = method_call.argument; 20 regs.reg_array[method_call.method] = method_call.argument;
22 21
23 switch (method_call.method) { 22 switch (method_call.method) {
24 case FERMI2D_REG_INDEX(trigger): { 23 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
24 // so trigger on the second 32-bit write.
25 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
25 HandleSurfaceCopy(); 26 HandleSurfaceCopy();
26 break; 27 break;
27 } 28 }
@@ -32,57 +33,23 @@ void Fermi2D::HandleSurfaceCopy() {
32 LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", 33 LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
33 static_cast<u32>(regs.operation)); 34 static_cast<u32>(regs.operation));
34 35
35 const GPUVAddr source = regs.src.Address();
36 const GPUVAddr dest = regs.dst.Address();
37
38 // TODO(Subv): Only same-format and same-size copies are allowed for now.
39 ASSERT(regs.src.format == regs.dst.format);
40 ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
41
42 // TODO(Subv): Only raw copies are implemented. 36 // TODO(Subv): Only raw copies are implemented.
43 ASSERT(regs.operation == Regs::Operation::SrcCopy); 37 ASSERT(regs.operation == Regs::Operation::SrcCopy);
44 38
45 const auto source_cpu = memory_manager.GpuToCpuAddress(source); 39 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
46 const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); 40 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
47 ASSERT_MSG(source_cpu, "Invalid source GPU address"); 41 const u32 src_blit_x2{
48 ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); 42 static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
49 43 const u32 src_blit_y2{
50 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); 44 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
51 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
52
53 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
54 // All copies here update the main memory, so mark all rasterizer states as invalid.
55 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
56 45
57 rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 46 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
58 // We have to invalidate the destination region to evict any outdated surfaces from the 47 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
59 // cache. We do this before actually writing the new data because the destination address 48 regs.blit_dst_x + regs.blit_dst_width,
60 // might contain a dirty surface that will have to be written back to memory. 49 regs.blit_dst_y + regs.blit_dst_height};
61 rasterizer.InvalidateRegion(*dest_cpu,
62 dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
63 50
64 if (regs.src.linear == regs.dst.linear) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
65 // If the input layout and the output layout are the same, just perform a raw copy. 52 UNIMPLEMENTED();
66 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
67 Memory::CopyBlock(*dest_cpu, *source_cpu,
68 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
69 return;
70 }
71 u8* src_buffer = Memory::GetPointer(*source_cpu);
72 u8* dst_buffer = Memory::GetPointer(*dest_cpu);
73 if (!regs.src.linear && regs.dst.linear) {
74 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
75 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
76 src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
77 dst_buffer, true, regs.src.BlockHeight(),
78 regs.src.BlockDepth(), 0);
79 } else {
80 // If the input is linear and the output is tiled, swizzle the input and copy it over.
81 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
82 src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
83 src_buffer, false, regs.dst.BlockHeight(),
84 regs.dst.BlockDepth(), 0);
85 }
86 } 53 }
87} 54}
88 55
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 50009bf75..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
@@ -94,12 +94,22 @@ public:
94 94
95 Operation operation; 95 Operation operation;
96 96
97 INSERT_PADDING_WORDS(0x9); 97 INSERT_PADDING_WORDS(0x177);
98 98
99 // TODO(Subv): This is only a guess. 99 u32 blit_control;
100 u32 trigger;
101 100
102 INSERT_PADDING_WORDS(0x1A3); 101 INSERT_PADDING_WORDS(0x8);
102
103 u32 blit_dst_x;
104 u32 blit_dst_y;
105 u32 blit_dst_width;
106 u32 blit_dst_height;
107 u64 blit_du_dx;
108 u64 blit_dv_dy;
109 u64 blit_src_x;
110 u64 blit_src_y;
111
112 INSERT_PADDING_WORDS(0x21);
103 }; 113 };
104 std::array<u32, NUM_REGS> reg_array; 114 std::array<u32, NUM_REGS> reg_array;
105 }; 115 };
@@ -122,7 +132,16 @@ private:
122ASSERT_REG_POSITION(dst, 0x80); 132ASSERT_REG_POSITION(dst, 0x80);
123ASSERT_REG_POSITION(src, 0x8C); 133ASSERT_REG_POSITION(src, 0x8C);
124ASSERT_REG_POSITION(operation, 0xAB); 134ASSERT_REG_POSITION(operation, 0xAB);
125ASSERT_REG_POSITION(trigger, 0xB5); 135ASSERT_REG_POSITION(blit_control, 0x223);
136ASSERT_REG_POSITION(blit_dst_x, 0x22c);
137ASSERT_REG_POSITION(blit_dst_y, 0x22d);
138ASSERT_REG_POSITION(blit_dst_width, 0x22e);
139ASSERT_REG_POSITION(blit_dst_height, 0x22f);
140ASSERT_REG_POSITION(blit_du_dx, 0x230);
141ASSERT_REG_POSITION(blit_dv_dy, 0x232);
142ASSERT_REG_POSITION(blit_src_x, 0x234);
143ASSERT_REG_POSITION(blit_src_y, 0x236);
144
126#undef ASSERT_REG_POSITION 145#undef ASSERT_REG_POSITION
127 146
128} // namespace Tegra::Engines 147} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
new file mode 100644
index 000000000..b1d950460
--- /dev/null
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,33 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/engines/kepler_compute.h"
8#include "video_core/memory_manager.h"
9
10namespace Tegra::Engines {
11
12KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
13
14KeplerCompute::~KeplerCompute() = default;
15
16void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
17 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
18 "Invalid KeplerCompute register, increase the size of the Regs structure");
19
20 regs.reg_array[method_call.method] = method_call.argument;
21
22 switch (method_call.method) {
23 case KEPLER_COMPUTE_REG_INDEX(launch):
24 // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
25 // kernels)
26 UNREACHABLE_MSG("Compute shaders are not implemented");
27 break;
28 default:
29 break;
30 }
31}
32
33} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h
index 1d71f11bd..6575afd0f 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,52 +5,52 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h"
10#include "common/common_funcs.h" 9#include "common/common_funcs.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12#include "video_core/memory_manager.h"
13 13
14namespace Tegra::Engines { 14namespace Tegra::Engines {
15 15
16#define MAXWELL_COMPUTE_REG_INDEX(field_name) \ 16#define KEPLER_COMPUTE_REG_INDEX(field_name) \
17 (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32)) 17 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
18 18
19class MaxwellCompute final { 19class KeplerCompute final {
20public: 20public:
21 MaxwellCompute() = default; 21 explicit KeplerCompute(MemoryManager& memory_manager);
22 ~MaxwellCompute() = default; 22 ~KeplerCompute();
23
24 static constexpr std::size_t NumConstBuffers = 8;
23 25
24 struct Regs { 26 struct Regs {
25 static constexpr std::size_t NUM_REGS = 0xCF8; 27 static constexpr std::size_t NUM_REGS = 0xCF8;
26 28
27 union { 29 union {
28 struct { 30 struct {
29 INSERT_PADDING_WORDS(0x281); 31 INSERT_PADDING_WORDS(0xAF);
30 32
31 union { 33 u32 launch;
32 u32 compute_end;
33 BitField<0, 1, u32> unknown;
34 } compute;
35 34
36 INSERT_PADDING_WORDS(0xA76); 35 INSERT_PADDING_WORDS(0xC48);
37 }; 36 };
38 std::array<u32, NUM_REGS> reg_array; 37 std::array<u32, NUM_REGS> reg_array;
39 }; 38 };
40 } regs{}; 39 } regs{};
41
42 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), 40 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
43 "MaxwellCompute Regs has wrong size"); 41 "KeplerCompute Regs has wrong size");
42
43 MemoryManager& memory_manager;
44 44
45 /// Write the value to the register identified by method. 45 /// Write the value to the register identified by method.
46 void CallMethod(const GPU::MethodCall& method_call); 46 void CallMethod(const GPU::MethodCall& method_call);
47}; 47};
48 48
49#define ASSERT_REG_POSITION(field_name, position) \ 49#define ASSERT_REG_POSITION(field_name, position) \
50 static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \ 50 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
51 "Field " #field_name " has invalid position") 51 "Field " #field_name " has invalid position")
52 52
53ASSERT_REG_POSITION(compute, 0x281); 53ASSERT_REG_POSITION(launch, 0xAF);
54 54
55#undef ASSERT_REG_POSITION 55#undef ASSERT_REG_POSITION
56 56
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 5c1029ddf..e259bf46b 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,18 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/kepler_memory.h" 9#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
11 13
12namespace Tegra::Engines { 14namespace Tegra::Engines {
13 15
14KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer, 16KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 MemoryManager& memory_manager) 17 MemoryManager& memory_manager)
16 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
17 19
18KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
19 21
@@ -39,18 +41,14 @@ void KeplerMemory::ProcessData(u32 data) {
39 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
40 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
41 43
42 const GPUVAddr address = regs.dest.Address();
43 const auto dest_address =
44 memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
45 ASSERT_MSG(dest_address, "Invalid GPU address");
46
47 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 44 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
48 // We do this before actually writing the new data because the destination address might contain 45 // We do this before actually writing the new data because the destination address might
49 // a dirty surface that will have to be written back to memory. 46 // contain a dirty surface that will have to be written back to memory.
50 rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); 47 const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
48 rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
49 memory_manager.Write<u32>(address, data);
51 50
52 Memory::Write32(*dest_address, data); 51 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54 52
55 state.write_offset++; 53 state.write_offset++;
56} 54}
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index fe9ebc5b9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,13 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace VideoCore { 19namespace VideoCore {
16class RasterizerInterface; 20class RasterizerInterface;
17} 21}
@@ -23,7 +27,8 @@ namespace Tegra::Engines {
23 27
24class KeplerMemory final { 28class KeplerMemory final {
25public: 29public:
26 KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 30 KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
31 MemoryManager& memory_manager);
27 ~KeplerMemory(); 32 ~KeplerMemory();
28 33
29 /// Write the value to the register identified by method. 34 /// Write the value to the register identified by method.
@@ -76,6 +81,7 @@ public:
76 } state{}; 81 } state{};
77 82
78private: 83private:
84 Core::System& system;
79 MemoryManager& memory_manager; 85 MemoryManager& memory_manager;
80 VideoCore::RasterizerInterface& rasterizer; 86 VideoCore::RasterizerInterface& rasterizer;
81 87
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 10eae6a65..defcfbd3f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -19,8 +19,10 @@ namespace Tegra::Engines {
19/// First register id that is actually a Macro call. 19/// First register id that is actually a Macro call.
20constexpr u32 MacroRegistersStart = 0xE00; 20constexpr u32 MacroRegistersStart = 0xE00;
21 21
22Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 22Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
23 : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) { 23 MemoryManager& memory_manager)
24 : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
25 macro_interpreter(*this) {
24 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
25} 27}
26 28
@@ -103,23 +105,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
103} 105}
104 106
105void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
106 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 108 auto debug_context = system.GetGPUDebugContext();
109
110 const u32 method = method_call.method;
107 111
108 // It is an error to write to a register other than the current macro's ARG register before it 112 // It is an error to write to a register other than the current macro's ARG register before it
109 // has finished execution. 113 // has finished execution.
110 if (executing_macro != 0) { 114 if (executing_macro != 0) {
111 ASSERT(method_call.method == executing_macro + 1); 115 ASSERT(method == executing_macro + 1);
112 } 116 }
113 117
114 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 118 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
115 // uploaded to the GPU during initialization. 119 // uploaded to the GPU during initialization.
116 if (method_call.method >= MacroRegistersStart) { 120 if (method >= MacroRegistersStart) {
117 // We're trying to execute a macro 121 // We're trying to execute a macro
118 if (executing_macro == 0) { 122 if (executing_macro == 0) {
119 // A macro call must begin by writing the macro method's register, not its argument. 123 // A macro call must begin by writing the macro method's register, not its argument.
120 ASSERT_MSG((method_call.method % 2) == 0, 124 ASSERT_MSG((method % 2) == 0,
121 "Can't start macro execution by writing to the ARGS register"); 125 "Can't start macro execution by writing to the ARGS register");
122 executing_macro = method_call.method; 126 executing_macro = method;
123 } 127 }
124 128
125 macro_params.push_back(method_call.argument); 129 macro_params.push_back(method_call.argument);
@@ -131,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
131 return; 135 return;
132 } 136 }
133 137
134 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 138 ASSERT_MSG(method < Regs::NUM_REGS,
135 "Invalid Maxwell3D register, increase the size of the Regs structure"); 139 "Invalid Maxwell3D register, increase the size of the Regs structure");
136 140
137 if (debug_context) { 141 if (debug_context) {
138 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 142 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
139 } 143 }
140 144
141 if (regs.reg_array[method_call.method] != method_call.argument) { 145 if (regs.reg_array[method] != method_call.argument) {
142 regs.reg_array[method_call.method] = method_call.argument; 146 regs.reg_array[method] = method_call.argument;
143 // Color buffers 147 // Color buffers
144 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 148 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
145 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 149 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
146 if (method_call.method >= first_rt_reg && 150 if (method >= first_rt_reg &&
147 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 151 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
148 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 152 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
149 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 153 dirty_flags.color_buffer.set(rt_index);
150 } 154 }
151 155
152 // Zeta buffer 156 // Zeta buffer
153 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 157 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
154 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 158 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
155 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 159 method == MAXWELL3D_REG_INDEX(zeta_width) ||
156 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 160 method == MAXWELL3D_REG_INDEX(zeta_height) ||
157 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 161 (method >= MAXWELL3D_REG_INDEX(zeta) &&
158 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 162 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
159 dirty_flags.zeta_buffer = true; 163 dirty_flags.zeta_buffer = true;
160 } 164 }
161 165
162 // Shader 166 // Shader
163 constexpr u32 shader_registers_count = 167 constexpr u32 shader_registers_count =
164 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 168 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
165 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 169 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
166 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 170 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
167 dirty_flags.shaders = true; 171 dirty_flags.shaders = true;
168 } 172 }
169 173
170 // Vertex format 174 // Vertex format
171 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 175 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
172 method_call.method < 176 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
173 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
174 dirty_flags.vertex_attrib_format = true; 177 dirty_flags.vertex_attrib_format = true;
175 } 178 }
176 179
177 // Vertex buffer 180 // Vertex buffer
178 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 181 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
179 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 182 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
180 dirty_flags.vertex_array |= 183 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
181 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 184 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
182 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 185 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
183 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 186 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
184 dirty_flags.vertex_array |= 187 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
185 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 188 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
186 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 189 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
187 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
188 dirty_flags.vertex_array |=
189 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
190 } 190 }
191 } 191 }
192 192
193 switch (method_call.method) { 193 switch (method) {
194 case MAXWELL3D_REG_INDEX(macros.data): { 194 case MAXWELL3D_REG_INDEX(macros.data): {
195 ProcessMacroUpload(method_call.argument); 195 ProcessMacroUpload(method_call.argument);
196 break; 196 break;
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
270} 270}
271 271
272void Maxwell3D::ProcessQueryGet() { 272void Maxwell3D::ProcessQueryGet() {
273 GPUVAddr sequence_address = regs.query.QueryAddress(); 273 const GPUVAddr sequence_address{regs.query.QueryAddress()};
274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
275 // VAddr before writing. 275 // VAddr before writing.
276 const auto address = memory_manager.GpuToCpuAddress(sequence_address);
277 ASSERT_MSG(address, "Invalid GPU address");
278 276
279 // TODO(Subv): Support the other query units. 277 // TODO(Subv): Support the other query units.
280 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 278 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
309 // Write the current query sequence to the sequence address. 307 // Write the current query sequence to the sequence address.
310 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 308 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
311 // query. 309 // query.
312 Memory::Write32(*address, sequence); 310 memory_manager.Write<u32>(sequence_address, sequence);
313 } else { 311 } else {
314 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast 312 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
315 // GPU, this command may actually take a while to complete in real hardware due to GPU 313 // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -317,8 +315,8 @@ void Maxwell3D::ProcessQueryGet() {
317 LongQueryResult query_result{}; 315 LongQueryResult query_result{};
318 query_result.value = result; 316 query_result.value = result;
319 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming 317 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
320 query_result.timestamp = CoreTiming::GetTicks(); 318 query_result.timestamp = system.CoreTiming().GetTicks();
321 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 319 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
322 } 320 }
323 dirty_flags.OnMemoryWrite(); 321 dirty_flags.OnMemoryWrite();
324 break; 322 break;
@@ -334,7 +332,7 @@ void Maxwell3D::DrawArrays() {
334 regs.vertex_buffer.count); 332 regs.vertex_buffer.count);
335 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 333 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
336 334
337 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 335 auto debug_context = system.GetGPUDebugContext();
338 336
339 if (debug_context) { 337 if (debug_context) {
340 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); 338 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
@@ -393,10 +391,12 @@ void Maxwell3D::ProcessCBData(u32 value) {
393 // Don't allow writing past the end of the buffer. 391 // Don't allow writing past the end of the buffer.
394 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 392 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
395 393
396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 394 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
397 ASSERT_MSG(address, "Invalid GPU address"); 395
396 u8* ptr{memory_manager.GetPointer(address)};
397 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
398 memory_manager.Write<u32>(address, value);
398 399
399 Memory::Write32(*address, value);
400 dirty_flags.OnMemoryWrite(); 400 dirty_flags.OnMemoryWrite();
401 401
402 // Increment the current buffer position. 402 // Increment the current buffer position.
@@ -404,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
404} 404}
405 405
406Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 406Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
407 const GPUVAddr tic_base_address = regs.tic.TICAddress(); 407 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
408
409 const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
410 const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
411 ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
412 408
413 Texture::TICEntry tic_entry; 409 Texture::TICEntry tic_entry;
414 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 410 memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
415 411
416 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 412 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
417 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 413 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -429,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
429} 425}
430 426
431Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 427Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
432 const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 428 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
433
434 const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
435 const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
436 ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
437 429
438 Texture::TSCEntry tsc_entry; 430 Texture::TSCEntry tsc_entry;
439 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 431 memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
440 return tsc_entry; 432 return tsc_entry;
441} 433}
442 434
@@ -455,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
455 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 447 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
456 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 448 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
457 449
458 const auto address = memory_manager.GpuToCpuAddress(current_texture); 450 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
459 ASSERT_MSG(address, "Invalid GPU address");
460
461 const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
462 451
463 Texture::FullTextureInfo tex_info{}; 452 Texture::FullTextureInfo tex_info{};
464 // TODO(Subv): Use the shader to determine which textures are actually accessed. 453 // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -493,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
493 482
494 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); 483 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
495 484
496 const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); 485 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
497 ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
498
499 const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
500 486
501 Texture::FullTextureInfo tex_info{}; 487 Texture::FullTextureInfo tex_info{};
502 tex_info.index = static_cast<u32>(offset); 488 tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1f76aa670..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -17,6 +19,10 @@
17#include "video_core/memory_manager.h" 19#include "video_core/memory_manager.h"
18#include "video_core/textures/texture.h" 20#include "video_core/textures/texture.h"
19 21
22namespace Core {
23class System;
24}
25
20namespace VideoCore { 26namespace VideoCore {
21class RasterizerInterface; 27class RasterizerInterface;
22} 28}
@@ -28,7 +34,8 @@ namespace Tegra::Engines {
28 34
29class Maxwell3D final { 35class Maxwell3D final {
30public: 36public:
31 explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 37 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
38 MemoryManager& memory_manager);
32 ~Maxwell3D() = default; 39 ~Maxwell3D() = default;
33 40
34 /// Register structure of the Maxwell3D engine. 41 /// Register structure of the Maxwell3D engine.
@@ -498,7 +505,7 @@ public:
498 f32 translate_z; 505 f32 translate_z;
499 INSERT_PADDING_WORDS(2); 506 INSERT_PADDING_WORDS(2);
500 507
501 MathUtil::Rectangle<s32> GetRect() const { 508 Common::Rectangle<s32> GetRect() const {
502 return { 509 return {
503 GetX(), // left 510 GetX(), // left
504 GetY() + GetHeight(), // top 511 GetY() + GetHeight(), // top
@@ -1089,19 +1096,18 @@ public:
1089 MemoryManager& memory_manager; 1096 MemoryManager& memory_manager;
1090 1097
1091 struct DirtyFlags { 1098 struct DirtyFlags {
1092 u8 color_buffer = 0xFF; 1099 std::bitset<8> color_buffer{0xFF};
1093 bool zeta_buffer = true; 1100 std::bitset<32> vertex_array{0xFFFFFFFF};
1094
1095 bool shaders = true;
1096 1101
1097 bool vertex_attrib_format = true; 1102 bool vertex_attrib_format = true;
1098 u32 vertex_array = 0xFFFFFFFF; 1103 bool zeta_buffer = true;
1104 bool shaders = true;
1099 1105
1100 void OnMemoryWrite() { 1106 void OnMemoryWrite() {
1101 color_buffer = 0xFF;
1102 zeta_buffer = true; 1107 zeta_buffer = true;
1103 shaders = true; 1108 shaders = true;
1104 vertex_array = 0xFFFFFFFF; 1109 color_buffer.set();
1110 vertex_array.set();
1105 } 1111 }
1106 }; 1112 };
1107 1113
@@ -1131,6 +1137,8 @@ public:
1131private: 1137private:
1132 void InitializeRegisterDefaults(); 1138 void InitializeRegisterDefaults();
1133 1139
1140 Core::System& system;
1141
1134 VideoCore::RasterizerInterface& rasterizer; 1142 VideoCore::RasterizerInterface& rasterizer;
1135 1143
1136 /// Start offsets of each macro in macro_memory 1144 /// Start offsets of each macro in macro_memory
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
deleted file mode 100644
index 656db6a61..000000000
--- a/src/video_core/engines/maxwell_compute.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/core.h"
7#include "video_core/engines/maxwell_compute.h"
8
9namespace Tegra::Engines {
10
11void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
12 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
13 "Invalid MaxwellCompute register, increase the size of the Regs structure");
14
15 regs.reg_array[method_call.method] = method_call.argument;
16
17 switch (method_call.method) {
18 case MAXWELL_COMPUTE_REG_INDEX(compute): {
19 LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
20 UNREACHABLE();
21 break;
22 }
23 default:
24 break;
25 }
26}
27
28} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index d6c41a5ae..5cca5c29a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,17 +2,21 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
6#include "common/logging/log.h"
5#include "core/core.h" 7#include "core/core.h"
6#include "core/memory.h" 8#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
9#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
10#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
11 14
12namespace Tegra::Engines { 15namespace Tegra::Engines {
13 16
14MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 17MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 MemoryManager& memory_manager)
19 : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
16 20
17void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 21void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 22 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -39,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
39 const GPUVAddr source = regs.src_address.Address(); 43 const GPUVAddr source = regs.src_address.Address();
40 const GPUVAddr dest = regs.dst_address.Address(); 44 const GPUVAddr dest = regs.dst_address.Address();
41 45
42 const auto source_cpu = memory_manager.GpuToCpuAddress(source);
43 const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
44 ASSERT_MSG(source_cpu, "Invalid source GPU address");
45 ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
46
47 // TODO(Subv): Perform more research and implement all features of this engine. 46 // TODO(Subv): Perform more research and implement all features of this engine.
48 ASSERT(regs.exec.enable_swizzle == 0); 47 ASSERT(regs.exec.enable_swizzle == 0);
49 ASSERT(regs.exec.query_mode == Regs::QueryMode::None); 48 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -59,14 +58,14 @@ void MaxwellDMA::HandleCopy() {
59 } 58 }
60 59
61 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
62 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
63 62
64 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
65 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
66 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 65 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
67 // y_count). 66 // y_count).
68 if (!regs.exec.enable_2d) { 67 if (!regs.exec.enable_2d) {
69 Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); 68 memory_manager.CopyBlock(dest, source, regs.x_count);
70 return; 69 return;
71 } 70 }
72 71
@@ -75,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
75 // rectangle. There is no need to manually flush/invalidate the regions because 74 // rectangle. There is no need to manually flush/invalidate the regions because
76 // CopyBlock does that for us. 75 // CopyBlock does that for us.
77 for (u32 line = 0; line < regs.y_count; ++line) { 76 for (u32 line = 0; line < regs.y_count; ++line) {
78 const VAddr source_line = *source_cpu + line * regs.src_pitch; 77 const GPUVAddr source_line = source + line * regs.src_pitch;
79 const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; 78 const GPUVAddr dest_line = dest + line * regs.dst_pitch;
80 Memory::CopyBlock(dest_line, source_line, regs.x_count); 79 memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
81 } 80 }
82 return; 81 return;
83 } 82 }
@@ -86,15 +85,28 @@ void MaxwellDMA::HandleCopy() {
86 85
87 const std::size_t copy_size = regs.x_count * regs.y_count; 86 const std::size_t copy_size = regs.x_count * regs.y_count;
88 87
88 auto source_ptr{memory_manager.GetPointer(source)};
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90
91 if (!source_ptr) {
92 LOG_ERROR(HW_GPU, "source_ptr is invalid");
93 return;
94 }
95
96 if (!dst_ptr) {
97 LOG_ERROR(HW_GPU, "dst_ptr is invalid");
98 return;
99 }
100
89 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 101 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
90 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 102 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
91 // copying. 103 // copying.
92 rasterizer.FlushRegion(*source_cpu, src_size); 104 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
93 105
94 // We have to invalidate the destination region to evict any outdated surfaces from the 106 // We have to invalidate the destination region to evict any outdated surfaces from the
95 // cache. We do this before actually writing the new data because the destination address 107 // cache. We do this before actually writing the new data because the destination address
96 // might contain a dirty surface that will have to be written back to memory. 108 // might contain a dirty surface that will have to be written back to memory.
97 rasterizer.InvalidateRegion(*dest_cpu, dst_size); 109 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
98 }; 110 };
99 111
100 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 112 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -107,8 +119,8 @@ void MaxwellDMA::HandleCopy() {
107 copy_size * src_bytes_per_pixel); 119 copy_size * src_bytes_per_pixel);
108 120
109 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 121 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
110 regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, 122 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
111 *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, 123 regs.src_params.BlockHeight(), regs.src_params.pos_x,
112 regs.src_params.pos_y); 124 regs.src_params.pos_y);
113 } else { 125 } else {
114 ASSERT(regs.dst_params.size_z == 1); 126 ASSERT(regs.dst_params.size_z == 1);
@@ -121,7 +133,7 @@ void MaxwellDMA::HandleCopy() {
121 133
122 // If the input is linear and the output is tiled, swizzle the input and copy it over. 134 // If the input is linear and the output is tiled, swizzle the input and copy it over.
123 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 135 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
124 src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); 136 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
125 } 137 }
126} 138}
127 139
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 1f8cd65d2..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,13 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace VideoCore { 19namespace VideoCore {
16class RasterizerInterface; 20class RasterizerInterface;
17} 21}
@@ -20,7 +24,8 @@ namespace Tegra::Engines {
20 24
21class MaxwellDMA final { 25class MaxwellDMA final {
22public: 26public:
23 explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 27 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
28 MemoryManager& memory_manager);
24 ~MaxwellDMA() = default; 29 ~MaxwellDMA() = default;
25 30
26 /// Write the value to the register identified by method. 31 /// Write the value to the register identified by method.
@@ -137,6 +142,8 @@ public:
137 MemoryManager& memory_manager; 142 MemoryManager& memory_manager;
138 143
139private: 144private:
145 Core::System& system;
146
140 VideoCore::RasterizerInterface& rasterizer; 147 VideoCore::RasterizerInterface& rasterizer;
141 148
142 /// Performs the copy from the source buffer to the destination buffer as configured in the 149 /// Performs the copy from the source buffer to the destination buffer as configured in the
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 269df9437..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
6 6
7#include <bitset> 7#include <bitset>
8#include <optional> 8#include <optional>
9#include <string>
10#include <tuple> 9#include <tuple>
11#include <vector> 10#include <vector>
12 11
@@ -186,7 +185,7 @@ enum class SubOp : u64 {
186}; 185};
187 186
188enum class F2iRoundingOp : u64 { 187enum class F2iRoundingOp : u64 {
189 None = 0, 188 RoundEven = 0,
190 Floor = 1, 189 Floor = 1,
191 Ceil = 2, 190 Ceil = 2,
192 Trunc = 3, 191 Trunc = 3,
@@ -325,11 +324,11 @@ enum class TextureQueryType : u64 {
325 324
326enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
327 None = 0, 326 None = 0,
328 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
329 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
330 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
331 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
332 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
333}; 332};
334 333
335enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -376,9 +375,9 @@ enum class R2pMode : u64 {
376}; 375};
377 376
378enum class IpaInterpMode : u64 { 377enum class IpaInterpMode : u64 {
379 Linear = 0, 378 Pass = 0,
380 Perspective = 1, 379 Multiply = 1,
381 Flat = 2, 380 Constant = 2,
382 Sc = 3, 381 Sc = 3,
383}; 382};
384 383
@@ -1446,6 +1445,7 @@ public:
1446 Flow, 1445 Flow,
1447 Synch, 1446 Synch,
1448 Memory, 1447 Memory,
1448 Texture,
1449 FloatSet, 1449 FloatSet,
1450 FloatSetPredicate, 1450 FloatSetPredicate,
1451 IntegerSet, 1451 IntegerSet,
@@ -1576,14 +1576,14 @@ private:
1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1579 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1579 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1580 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1580 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1581 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1581 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1582 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1582 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1583 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1583 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1584 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1584 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1585 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1585 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1586 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1586 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index cf2b76ff6..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 16 TriangleStrip = 7,
17}; 17};
18 18
19enum class AttributeUse : u8 {
20 Unused = 0,
21 Constant = 1,
22 Perspective = 2,
23 ScreenLinear = 3,
24};
25
19// Documentation in: 26// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header { 28struct Header {
@@ -84,9 +91,15 @@ struct Header {
84 } vtg; 91 } vtg;
85 92
86 struct { 93 struct {
87 INSERT_PADDING_BYTES(3); // ImapSystemValuesA 94 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
88 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 95 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
89 INSERT_PADDING_BYTES(32); // ImapGenericVector[32] 96 union {
97 BitField<0, 2, AttributeUse> x;
98 BitField<2, 2, AttributeUse> y;
99 BitField<4, 2, AttributeUse> w;
100 BitField<6, 2, AttributeUse> z;
101 u8 raw;
102 } imap_generic_vector[32];
90 INSERT_PADDING_BYTES(2); // ImapColor 103 INSERT_PADDING_BYTES(2); // ImapColor
91 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 104 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
92 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] 105 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
103 const u32 bit = render_target * 4 + component; 116 const u32 bit = render_target * 4 + component;
104 return omap.target & (1 << bit); 117 return omap.target & (1 << bit);
105 } 118 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
120 return static_cast<AttributeUse>(
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
122 }
123 AttributeUse GetAttributeUse(u32 attribute) const {
124 AttributeUse result = AttributeUse::Unused;
125 for (u32 i = 0; i < 4; i++) {
126 const auto index = GetAttributeIndexUse(attribute, i);
127 if (index == AttributeUse::Unused) {
128 continue;
129 }
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue;
133 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
135 if (index == AttributeUse::Perspective) {
136 result = index;
137 }
138 }
139 return result;
140 }
106 } ps; 141 } ps;
107 }; 142 };
108 143
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d3d32a359..30b29e14d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,21 +3,24 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/core.h"
6#include "core/core_timing.h" 7#include "core/core_timing.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/fermi_2d.h" 9#include "video_core/engines/fermi_2d.h"
10#include "video_core/engines/kepler_compute.h"
9#include "video_core/engines/kepler_memory.h" 11#include "video_core/engines/kepler_memory.h"
10#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/maxwell_compute.h"
12#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/memory_manager.h"
16#include "video_core/renderer_base.h"
15 17
16namespace Tegra { 18namespace Tegra {
17 19
18u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { 20u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
19 switch (format) { 21 switch (format) {
20 case PixelFormat::ABGR8: 22 case PixelFormat::ABGR8:
23 case PixelFormat::BGRA8:
21 return 4; 24 return 4;
22 default: 25 default:
23 return 4; 26 return 4;
@@ -26,14 +29,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
26 UNREACHABLE(); 29 UNREACHABLE();
27} 30}
28 31
29GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
33 auto& rasterizer{renderer.Rasterizer()};
30 memory_manager = std::make_unique<Tegra::MemoryManager>(); 34 memory_manager = std::make_unique<Tegra::MemoryManager>();
31 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
32 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
33 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
34 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 38 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
35 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); 39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
36 kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); 40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
37} 41}
38 42
39GPU::~GPU() = default; 43GPU::~GPU() = default;
@@ -245,8 +249,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
245 case EngineID::MAXWELL_B: 249 case EngineID::MAXWELL_B:
246 maxwell_3d->CallMethod(method_call); 250 maxwell_3d->CallMethod(method_call);
247 break; 251 break;
248 case EngineID::MAXWELL_COMPUTE_B: 252 case EngineID::KEPLER_COMPUTE_B:
249 maxwell_compute->CallMethod(method_call); 253 kepler_compute->CallMethod(method_call);
250 break; 254 break;
251 case EngineID::MAXWELL_DMA_COPY_A: 255 case EngineID::MAXWELL_DMA_COPY_A:
252 maxwell_dma->CallMethod(method_call); 256 maxwell_dma->CallMethod(method_call);
@@ -271,7 +275,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
271 const auto op = 275 const auto op =
272 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); 276 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
273 if (op == GpuSemaphoreOperation::WriteLong) { 277 if (op == GpuSemaphoreOperation::WriteLong) {
274 auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
275 struct Block { 278 struct Block {
276 u32 sequence; 279 u32 sequence;
277 u32 zeros = 0; 280 u32 zeros = 0;
@@ -282,12 +285,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
282 block.sequence = regs.semaphore_sequence; 285 block.sequence = regs.semaphore_sequence;
283 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of 286 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
284 // CoreTiming 287 // CoreTiming
285 block.timestamp = CoreTiming::GetTicks(); 288 block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
286 Memory::WriteBlock(*address, &block, sizeof(block)); 289 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
290 sizeof(block));
287 } else { 291 } else {
288 const auto address = 292 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
289 memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
290 const u32 word = Memory::Read32(*address);
291 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || 293 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
292 (op == GpuSemaphoreOperation::AcquireGequal && 294 (op == GpuSemaphoreOperation::AcquireGequal &&
293 static_cast<s32>(word - regs.semaphore_sequence) > 0) || 295 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -314,13 +316,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
314} 316}
315 317
316void GPU::ProcessSemaphoreRelease() { 318void GPU::ProcessSemaphoreRelease() {
317 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); 319 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
318 Memory::Write32(*address, regs.semaphore_release);
319} 320}
320 321
321void GPU::ProcessSemaphoreAcquire() { 322void GPU::ProcessSemaphoreAcquire() {
322 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); 323 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
323 const u32 word = Memory::Read32(*address);
324 const auto value = regs.semaphore_acquire; 324 const auto value = regs.semaphore_acquire;
325 if (word != value) { 325 if (word != value) {
326 regs.acquire_active = true; 326 regs.acquire_active = true;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fb8975811..de30ea354 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,16 +6,23 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <vector>
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "core/hle/service/nvflinger/buffer_queue.h" 10#include "core/hle/service/nvflinger/buffer_queue.h"
12#include "video_core/dma_pusher.h" 11#include "video_core/dma_pusher.h"
13#include "video_core/memory_manager.h"
14 12
15namespace VideoCore { 13using CacheAddr = std::uintptr_t;
16class RasterizerInterface; 14inline CacheAddr ToCacheAddr(const void* host_ptr) {
15 return reinterpret_cast<CacheAddr>(host_ptr);
16}
17
18namespace Core {
19class System;
17} 20}
18 21
22namespace VideoCore {
23class RendererBase;
24} // namespace VideoCore
25
19namespace Tegra { 26namespace Tegra {
20 27
21enum class RenderTargetFormat : u32 { 28enum class RenderTargetFormat : u32 {
@@ -80,6 +87,7 @@ class DebugContext;
80struct FramebufferConfig { 87struct FramebufferConfig {
81 enum class PixelFormat : u32 { 88 enum class PixelFormat : u32 {
82 ABGR8 = 1, 89 ABGR8 = 1,
90 BGRA8 = 5,
83 }; 91 };
84 92
85 /** 93 /**
@@ -96,29 +104,32 @@ struct FramebufferConfig {
96 104
97 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; 105 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
98 TransformFlags transform_flags; 106 TransformFlags transform_flags;
99 MathUtil::Rectangle<int> crop_rect; 107 Common::Rectangle<int> crop_rect;
100}; 108};
101 109
102namespace Engines { 110namespace Engines {
103class Fermi2D; 111class Fermi2D;
104class Maxwell3D; 112class Maxwell3D;
105class MaxwellCompute;
106class MaxwellDMA; 113class MaxwellDMA;
114class KeplerCompute;
107class KeplerMemory; 115class KeplerMemory;
108} // namespace Engines 116} // namespace Engines
109 117
110enum class EngineID { 118enum class EngineID {
111 FERMI_TWOD_A = 0x902D, // 2D Engine 119 FERMI_TWOD_A = 0x902D, // 2D Engine
112 MAXWELL_B = 0xB197, // 3D Engine 120 MAXWELL_B = 0xB197, // 3D Engine
113 MAXWELL_COMPUTE_B = 0xB1C0, 121 KEPLER_COMPUTE_B = 0xB1C0,
114 KEPLER_INLINE_TO_MEMORY_B = 0xA140, 122 KEPLER_INLINE_TO_MEMORY_B = 0xA140,
115 MAXWELL_DMA_COPY_A = 0xB0B5, 123 MAXWELL_DMA_COPY_A = 0xB0B5,
116}; 124};
117 125
118class GPU final { 126class MemoryManager;
127
128class GPU {
119public: 129public:
120 explicit GPU(VideoCore::RasterizerInterface& rasterizer); 130 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
121 ~GPU(); 131
132 virtual ~GPU();
122 133
123 struct MethodCall { 134 struct MethodCall {
124 u32 method{}; 135 u32 method{};
@@ -166,11 +177,11 @@ public:
166 u32 address_high; 177 u32 address_high;
167 u32 address_low; 178 u32 address_low;
168 179
169 GPUVAddr SmaphoreAddress() const { 180 GPUVAddr SemaphoreAddress() const {
170 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | 181 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
171 address_low); 182 address_low);
172 } 183 }
173 } smaphore_address; 184 } semaphore_address;
174 185
175 u32 semaphore_sequence; 186 u32 semaphore_sequence;
176 u32 semaphore_trigger; 187 u32 semaphore_trigger;
@@ -196,42 +207,63 @@ public:
196 }; 207 };
197 } regs{}; 208 } regs{};
198 209
210 /// Push GPU command entries to be processed
211 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
212
213 /// Swap buffers (render frame)
214 virtual void SwapBuffers(
215 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
216
217 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
218 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
219
220 /// Notify rasterizer that any caches of the specified region should be invalidated
221 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
222
223 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
224 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
225
199private: 226private:
227 void ProcessBindMethod(const MethodCall& method_call);
228 void ProcessSemaphoreTriggerMethod();
229 void ProcessSemaphoreRelease();
230 void ProcessSemaphoreAcquire();
231
232 /// Calls a GPU puller method.
233 void CallPullerMethod(const MethodCall& method_call);
234
235 /// Calls a GPU engine method.
236 void CallEngineMethod(const MethodCall& method_call);
237
238 /// Determines where the method should be executed.
239 bool ExecuteMethodOnEngine(const MethodCall& method_call);
240
241protected:
200 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 242 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
243 VideoCore::RendererBase& renderer;
244
245private:
201 std::unique_ptr<Tegra::MemoryManager> memory_manager; 246 std::unique_ptr<Tegra::MemoryManager> memory_manager;
202 247
203 /// Mapping of command subchannels to their bound engine ids. 248 /// Mapping of command subchannels to their bound engine ids
204 std::array<EngineID, 8> bound_engines = {}; 249 std::array<EngineID, 8> bound_engines = {};
205
206 /// 3D engine 250 /// 3D engine
207 std::unique_ptr<Engines::Maxwell3D> maxwell_3d; 251 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
208 /// 2D engine 252 /// 2D engine
209 std::unique_ptr<Engines::Fermi2D> fermi_2d; 253 std::unique_ptr<Engines::Fermi2D> fermi_2d;
210 /// Compute engine 254 /// Compute engine
211 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; 255 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
212 /// DMA engine 256 /// DMA engine
213 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 257 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
214 /// Inline memory engine 258 /// Inline memory engine
215 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 259 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
216
217 void ProcessBindMethod(const MethodCall& method_call);
218 void ProcessSemaphoreTriggerMethod();
219 void ProcessSemaphoreRelease();
220 void ProcessSemaphoreAcquire();
221
222 // Calls a GPU puller method.
223 void CallPullerMethod(const MethodCall& method_call);
224 // Calls a GPU engine method.
225 void CallEngineMethod(const MethodCall& method_call);
226 // Determines where the method should be executed.
227 bool ExecuteMethodOnEngine(const MethodCall& method_call);
228}; 260};
229 261
230#define ASSERT_REG_POSITION(field_name, position) \ 262#define ASSERT_REG_POSITION(field_name, position) \
231 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ 263 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
232 "Field " #field_name " has invalid position") 264 "Field " #field_name " has invalid position")
233 265
234ASSERT_REG_POSITION(smaphore_address, 0x4); 266ASSERT_REG_POSITION(semaphore_address, 0x4);
235ASSERT_REG_POSITION(semaphore_sequence, 0x6); 267ASSERT_REG_POSITION(semaphore_sequence, 0x6);
236ASSERT_REG_POSITION(semaphore_trigger, 0x7); 268ASSERT_REG_POSITION(semaphore_trigger, 0x7);
237ASSERT_REG_POSITION(reference_count, 0x14); 269ASSERT_REG_POSITION(reference_count, 0x14);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..8b355cf7b
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..1dcc61a6c
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..2cfc900ed
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..766b5631c
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..c5dc199c5
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h"
8#include "video_core/dma_pusher.h"
9#include "video_core/gpu.h"
10#include "video_core/gpu_thread.h"
11#include "video_core/renderer_base.h"
12
13namespace VideoCommon::GPUThread {
14
15/// Runs the GPU thread
16static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
17 SynchState& state) {
18 MicroProfileOnThreadCreate("GpuThread");
19
20 // Wait for first GPU command before acquiring the window context
21 state.WaitForCommands();
22
23 // If emulation was stopped during disk shader loading, abort before trying to acquire context
24 if (!state.is_running) {
25 return;
26 }
27
28 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
29
30 CommandDataContainer next;
31 while (state.is_running) {
32 state.WaitForCommands();
33 while (!state.queue.Empty()) {
34 state.queue.Pop(next);
35 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
36 dma_pusher.Push(std::move(submit_list->entries));
37 dma_pusher.DispatchCalls();
38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
39 state.DecrementFramesCounter();
40 renderer.SwapBuffers(std::move(data->framebuffer));
41 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
42 renderer.Rasterizer().FlushRegion(data->addr, data->size);
43 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
44 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
46 return;
47 } else {
48 UNREACHABLE();
49 }
50 }
51 }
52}
53
54ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
55 : renderer{renderer}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher),
56 std::ref(state)} {}
57
58ThreadManager::~ThreadManager() {
59 // Notify GPU thread that a shutdown is pending
60 PushCommand(EndProcessingCommand());
61 thread.join();
62}
63
64void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
65 PushCommand(SubmitListCommand(std::move(entries)));
66}
67
68void ThreadManager::SwapBuffers(
69 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
70 state.IncrementFramesCounter();
71 PushCommand(SwapBuffersCommand(std::move(framebuffer)));
72 state.WaitForFrames();
73}
74
75void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
76 PushCommand(FlushRegionCommand(addr, size));
77}
78
79void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
80 if (state.queue.Empty()) {
81 // It's quicker to invalidate a single region on the CPU if the queue is already empty
82 renderer.Rasterizer().InvalidateRegion(addr, size);
83 } else {
84 PushCommand(InvalidateRegionCommand(addr, size));
85 }
86}
87
88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size);
91}
92
93void ThreadManager::PushCommand(CommandData&& command_data) {
94 state.queue.Push(CommandDataContainer(std::move(command_data)));
95 state.SignalCommands();
96}
97
98} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..70acb2e79
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,182 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <condition_variable>
9#include <mutex>
10#include <optional>
11#include <thread>
12#include <variant>
13
14#include "common/threadsafe_queue.h"
15#include "video_core/gpu.h"
16
17namespace Tegra {
18struct FramebufferConfig;
19class DmaPusher;
20} // namespace Tegra
21
22namespace VideoCore {
23class RendererBase;
24} // namespace VideoCore
25
26namespace VideoCommon::GPUThread {
27
28/// Command to signal to the GPU thread that processing has ended
29struct EndProcessingCommand final {};
30
31/// Command to signal to the GPU thread that a command list is ready for processing
32struct SubmitListCommand final {
33 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
34
35 Tegra::CommandList entries;
36};
37
38/// Command to signal to the GPU thread that a swap buffers is pending
39struct SwapBuffersCommand final {
40 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
41 : framebuffer{std::move(framebuffer)} {}
42
43 std::optional<Tegra::FramebufferConfig> framebuffer;
44};
45
46/// Command to signal to the GPU thread to flush a region
47struct FlushRegionCommand final {
48 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
49
50 CacheAddr addr;
51 u64 size;
52};
53
54/// Command to signal to the GPU thread to invalidate a region
55struct InvalidateRegionCommand final {
56 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
57
58 CacheAddr addr;
59 u64 size;
60};
61
62/// Command to signal to the GPU thread to flush and invalidate a region
63struct FlushAndInvalidateRegionCommand final {
64 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
65 : addr{addr}, size{size} {}
66
67 CacheAddr addr;
68 u64 size;
69};
70
71using CommandData =
72 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
73 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
74
75struct CommandDataContainer {
76 CommandDataContainer() = default;
77
78 CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
79
80 CommandDataContainer& operator=(const CommandDataContainer& t) {
81 data = std::move(t.data);
82 return *this;
83 }
84
85 CommandData data;
86};
87
88/// Struct used to synchronize the GPU thread
89struct SynchState final {
90 std::atomic_bool is_running{true};
91 std::atomic_int queued_frame_count{};
92 std::mutex frames_mutex;
93 std::mutex commands_mutex;
94 std::condition_variable commands_condition;
95 std::condition_variable frames_condition;
96
97 void IncrementFramesCounter() {
98 std::lock_guard lock{frames_mutex};
99 ++queued_frame_count;
100 }
101
102 void DecrementFramesCounter() {
103 {
104 std::lock_guard lock{frames_mutex};
105 --queued_frame_count;
106
107 if (queued_frame_count) {
108 return;
109 }
110 }
111 frames_condition.notify_one();
112 }
113
114 void WaitForFrames() {
115 {
116 std::lock_guard lock{frames_mutex};
117 if (!queued_frame_count) {
118 return;
119 }
120 }
121
122 // Wait for the GPU to be idle (all commands to be executed)
123 {
124 std::unique_lock lock{frames_mutex};
125 frames_condition.wait(lock, [this] { return !queued_frame_count; });
126 }
127 }
128
129 void SignalCommands() {
130 {
131 std::unique_lock lock{commands_mutex};
132 if (queue.Empty()) {
133 return;
134 }
135 }
136
137 commands_condition.notify_one();
138 }
139
140 void WaitForCommands() {
141 std::unique_lock lock{commands_mutex};
142 commands_condition.wait(lock, [this] { return !queue.Empty(); });
143 }
144
145 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
146 CommandQueue queue;
147};
148
149/// Class used to manage the GPU thread
150class ThreadManager final {
151public:
152 explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
153 ~ThreadManager();
154
155 /// Push GPU command entries to be processed
156 void SubmitList(Tegra::CommandList&& entries);
157
158 /// Swap buffers (render frame)
159 void SwapBuffers(
160 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
161
162 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
163 void FlushRegion(CacheAddr addr, u64 size);
164
165 /// Notify rasterizer that any caches of the specified region should be invalidated
166 void InvalidateRegion(CacheAddr addr, u64 size);
167
168 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
169 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
170
171private:
172 /// Pushes a command to be executed by the GPU thread
173 void PushCommand(CommandData&& command_data);
174
175private:
176 SynchState state;
177 VideoCore::RendererBase& renderer;
178 std::thread thread;
179 std::thread::id thread_id;
180};
181
182} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 54abe5298..e76b59842 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,181 +5,446 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
9#include "core/memory.h"
10#include "video_core/gpu.h"
8#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
12#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_base.h"
9 14
10namespace Tegra { 15namespace Tegra {
11 16
12MemoryManager::MemoryManager() { 17MemoryManager::MemoryManager() {
13 // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might 18 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
14 // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with 19 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
15 // Undertale using 0 for a render target. 20 Common::PageType::Unmapped);
16 PageSlot(0) = static_cast<u64>(PageStatus::Reserved); 21 page_table.Resize(address_space_width);
22
23 // Initialize the map with a single free region covering the entire managed space.
24 VirtualMemoryArea initial_vma;
25 initial_vma.size = address_space_end;
26 vma_map.emplace(initial_vma.base, initial_vma);
27
28 UpdatePageTableForVMA(initial_vma);
17} 29}
18 30
19GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 31GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
20 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; 32 const u64 aligned_size{Common::AlignUp(size, page_size)};
33 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
21 34
22 ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); 35 AllocateMemory(gpu_addr, 0, aligned_size);
23 36
24 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 37 return gpu_addr;
25 VAddr& slot{PageSlot(*gpu_addr + offset)}; 38}
26 39
27 ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); 40GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
41 const u64 aligned_size{Common::AlignUp(size, page_size)};
28 42
29 slot = static_cast<u64>(PageStatus::Allocated); 43 AllocateMemory(gpu_addr, 0, aligned_size);
30 }
31 44
32 return *gpu_addr; 45 return gpu_addr;
33} 46}
34 47
35GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { 48GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
36 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 49 const u64 aligned_size{Common::AlignUp(size, page_size)};
37 VAddr& slot{PageSlot(gpu_addr + offset)}; 50 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
38 51
39 ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); 52 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
40 53
41 slot = static_cast<u64>(PageStatus::Allocated); 54 return gpu_addr;
42 } 55}
56
57GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
58 ASSERT((gpu_addr & page_mask) == 0);
59
60 const u64 aligned_size{Common::AlignUp(size, page_size)};
61
62 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
43 63
44 return gpu_addr; 64 return gpu_addr;
45} 65}
46 66
47GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { 67GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
48 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)}; 68 ASSERT((gpu_addr & page_mask) == 0);
49 69
50 ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); 70 const u64 aligned_size{Common::AlignUp(size, page_size)};
71 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
72
73 Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
74 aligned_size);
75 UnmapRange(gpu_addr, aligned_size);
76
77 return gpu_addr;
78}
51 79
52 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 80GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
53 VAddr& slot{PageSlot(*gpu_addr + offset)}; 81 // Find the first Free VMA.
82 const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
83 if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
84 return false;
85 }
54 86
55 ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); 87 const VAddr vma_end{vma.second.base + vma.second.size};
88 return vma_end > region_start && vma_end >= region_start + size;
89 })};
56 90
57 slot = cpu_addr + offset; 91 if (vma_handle == vma_map.end()) {
92 return {};
58 } 93 }
59 94
60 const MappedRegion region{cpu_addr, *gpu_addr, size}; 95 return std::max(region_start, vma_handle->second.base);
61 mapped_regions.push_back(region); 96}
62 97
63 return *gpu_addr; 98bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
99 return (addr >> page_bits) < page_table.pointers.size();
64} 100}
65 101
66GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { 102std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
67 ASSERT((gpu_addr & PAGE_MASK) == 0); 103 if (!IsAddressValid(addr)) {
104 return {};
105 }
68 106
69 if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) { 107 VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
70 // Page has been already mapped. In this case, we must find a new area of memory to use that 108 if (cpu_addr) {
71 // is different than the specified one. Super Mario Odyssey hits this scenario when changing 109 return cpu_addr + (addr & page_mask);
72 // areas, but we do not want to overwrite the old pages. 110 }
73 // TODO(bunnei): We need to write a hardware test to confirm this behavior.
74 111
75 LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr); 112 return {};
113}
76 114
77 const std::optional<GPUVAddr> new_gpu_addr{ 115template <typename T>
78 FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)}; 116T MemoryManager::Read(GPUVAddr addr) {
117 if (!IsAddressValid(addr)) {
118 return {};
119 }
79 120
80 ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory"); 121 const u8* page_pointer{page_table.pointers[addr >> page_bits]};
122 if (page_pointer) {
123 // NOTE: Avoid adding any extra logic to this fast-path block
124 T value;
125 std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
126 return value;
127 }
81 128
82 gpu_addr = *new_gpu_addr; 129 switch (page_table.attributes[addr >> page_bits]) {
130 case Common::PageType::Unmapped:
131 LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
132 return 0;
133 case Common::PageType::Memory:
134 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
135 break;
136 default:
137 UNREACHABLE();
83 } 138 }
139 return {};
140}
84 141
85 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 142template <typename T>
86 VAddr& slot{PageSlot(gpu_addr + offset)}; 143void MemoryManager::Write(GPUVAddr addr, T data) {
144 if (!IsAddressValid(addr)) {
145 return;
146 }
87 147
88 ASSERT(slot == static_cast<u64>(PageStatus::Allocated)); 148 u8* page_pointer{page_table.pointers[addr >> page_bits]};
149 if (page_pointer) {
150 // NOTE: Avoid adding any extra logic to this fast-path block
151 std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
152 return;
153 }
89 154
90 slot = cpu_addr + offset; 155 switch (page_table.attributes[addr >> page_bits]) {
156 case Common::PageType::Unmapped:
157 LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
158 static_cast<u32>(data), addr);
159 return;
160 case Common::PageType::Memory:
161 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
162 break;
163 default:
164 UNREACHABLE();
91 } 165 }
166}
92 167
93 const MappedRegion region{cpu_addr, gpu_addr, size}; 168template u8 MemoryManager::Read<u8>(GPUVAddr addr);
94 mapped_regions.push_back(region); 169template u16 MemoryManager::Read<u16>(GPUVAddr addr);
170template u32 MemoryManager::Read<u32>(GPUVAddr addr);
171template u64 MemoryManager::Read<u64>(GPUVAddr addr);
172template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
173template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
174template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
175template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
176
177u8* MemoryManager::GetPointer(GPUVAddr addr) {
178 if (!IsAddressValid(addr)) {
179 return {};
180 }
95 181
96 return gpu_addr; 182 u8* page_pointer{page_table.pointers[addr >> page_bits]};
183 if (page_pointer) {
184 return page_pointer + (addr & page_mask);
185 }
186
187 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
188 return {};
97} 189}
98 190
99GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { 191void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
100 ASSERT((gpu_addr & PAGE_MASK) == 0); 192 std::memcpy(dest_buffer, GetPointer(src_addr), size);
193}
194void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
195 std::memcpy(GetPointer(dest_addr), src_buffer, size);
196}
101 197
102 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 198void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
103 VAddr& slot{PageSlot(gpu_addr + offset)}; 199 std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
200}
104 201
105 ASSERT(slot != static_cast<u64>(PageStatus::Allocated) && 202void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
106 slot != static_cast<u64>(PageStatus::Unmapped)); 203 VAddr backing_addr) {
204 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
205 (base + size) * page_size);
206
207 const VAddr end{base + size};
208 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
209 base + page_table.pointers.size());
210
211 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
212
213 if (memory == nullptr) {
214 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
215 std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
216 backing_addr);
217 } else {
218 while (base != end) {
219 page_table.pointers[base] = memory;
220 page_table.backing_addr[base] = backing_addr;
221
222 base += 1;
223 memory += page_size;
224 backing_addr += page_size;
225 }
226 }
227}
107 228
108 slot = static_cast<u64>(PageStatus::Unmapped); 229void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
230 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
231 ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
232 MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
233}
234
235void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
236 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
237 ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
238 MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
239}
240
241bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
242 ASSERT(base + size == next.base);
243 if (type != next.type) {
244 return {};
245 }
246 if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
247 return {};
248 }
249 if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
250 return {};
251 }
252 return true;
253}
254
255MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
256 if (target >= address_space_end) {
257 return vma_map.end();
258 } else {
259 return std::prev(vma_map.upper_bound(target));
109 } 260 }
261}
110 262
111 // Delete the region mappings that are contained within the unmapped region 263MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
112 mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(), 264 VirtualMemoryArea& vma{vma_handle->second};
113 [&](const MappedRegion& region) { 265
114 return region.gpu_addr <= gpu_addr && 266 vma.type = VirtualMemoryArea::Type::Allocated;
115 region.gpu_addr + region.size < gpu_addr + size; 267 vma.backing_addr = 0;
116 }), 268 vma.backing_memory = {};
117 mapped_regions.end()); 269 UpdatePageTableForVMA(vma);
118 return gpu_addr; 270
271 return MergeAdjacent(vma_handle);
119} 272}
120 273
121GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const { 274MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
122 for (const auto& region : mapped_regions) { 275 u64 size) {
123 const GPUVAddr region_end{region.gpu_addr + region.size}; 276
124 if (region_start >= region.gpu_addr && region_start < region_end) { 277 // This is the appropriately sized VMA that will turn into our allocation.
125 return region_end; 278 VMAIter vma_handle{CarveVMA(target, size)};
126 } 279 VirtualMemoryArea& vma{vma_handle->second};
280
281 ASSERT(vma.size == size);
282
283 vma.offset = offset;
284
285 return Allocate(vma_handle);
286}
287
288MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
289 VAddr backing_addr) {
290 // This is the appropriately sized VMA that will turn into our allocation.
291 VMAIter vma_handle{CarveVMA(target, size)};
292 VirtualMemoryArea& vma{vma_handle->second};
293
294 ASSERT(vma.size == size);
295
296 vma.type = VirtualMemoryArea::Type::Mapped;
297 vma.backing_memory = memory;
298 vma.backing_addr = backing_addr;
299 UpdatePageTableForVMA(vma);
300
301 return MergeAdjacent(vma_handle);
302}
303
304void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
305 VMAIter vma{CarveVMARange(target, size)};
306 const VAddr target_end{target + size};
307 const VMAIter end{vma_map.end()};
308
309 // The comparison against the end of the range must be done using addresses since VMAs can be
310 // merged during this process, causing invalidation of the iterators.
311 while (vma != end && vma->second.base < target_end) {
312 // Unmapped ranges return to allocated state and can be reused
313 // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
314 vma = std::next(Allocate(vma));
127 } 315 }
128 return {}; 316
317 ASSERT(FindVMA(target)->second.size >= size);
129} 318}
130 319
131std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 320MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
132 PageStatus status) { 321 // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
133 GPUVAddr gpu_addr{region_start}; 322 // non-const access to its container.
134 u64 free_space{}; 323 return vma_map.erase(iter, iter); // Erases an empty range of elements
135 align = (align + PAGE_MASK) & ~PAGE_MASK; 324}
136 325
137 while (gpu_addr + free_space < MAX_ADDRESS) { 326MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
138 if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) { 327 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
139 free_space += PAGE_SIZE; 328 ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
140 if (free_space >= size) { 329
141 return gpu_addr; 330 VMAIter vma_handle{StripIterConstness(FindVMA(base))};
142 } 331 if (vma_handle == vma_map.end()) {
143 } else { 332 // Target address is outside the managed range
144 gpu_addr += free_space + PAGE_SIZE; 333 return {};
145 free_space = 0;
146 gpu_addr = Common::AlignUp(gpu_addr, align);
147 }
148 } 334 }
149 335
150 return {}; 336 const VirtualMemoryArea& vma{vma_handle->second};
337 if (vma.type == VirtualMemoryArea::Type::Mapped) {
338 // Region is already allocated
339 return {};
340 }
341
342 const VAddr start_in_vma{base - vma.base};
343 const VAddr end_in_vma{start_in_vma + size};
344
345 ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
346 vma.size, end_in_vma);
347
348 if (end_in_vma < vma.size) {
349 // Split VMA at the end of the allocated region
350 SplitVMA(vma_handle, end_in_vma);
351 }
352 if (start_in_vma != 0) {
353 // Split VMA at the start of the allocated region
354 vma_handle = SplitVMA(vma_handle, start_in_vma);
355 }
356
357 return vma_handle;
151} 358}
152 359
153std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { 360MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
154 const VAddr base_addr{PageSlot(gpu_addr)}; 361 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
362 ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
155 363
156 if (base_addr == static_cast<u64>(PageStatus::Allocated) || 364 const VAddr target_end{target + size};
157 base_addr == static_cast<u64>(PageStatus::Unmapped) || 365 ASSERT(target_end >= target);
158 base_addr == static_cast<u64>(PageStatus::Reserved)) { 366 ASSERT(size > 0);
367
368 VMAIter begin_vma{StripIterConstness(FindVMA(target))};
369 const VMAIter i_end{vma_map.lower_bound(target_end)};
370 if (std::any_of(begin_vma, i_end, [](const auto& entry) {
371 return entry.second.type == VirtualMemoryArea::Type::Unmapped;
372 })) {
159 return {}; 373 return {};
160 } 374 }
161 375
162 return base_addr + (gpu_addr & PAGE_MASK); 376 if (target != begin_vma->second.base) {
377 begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
378 }
379
380 VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
381 if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
382 end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
383 }
384
385 return begin_vma;
163} 386}
164 387
165std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { 388MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
166 std::vector<GPUVAddr> results; 389 VirtualMemoryArea& old_vma{vma_handle->second};
167 for (const auto& region : mapped_regions) { 390 VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
168 if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { 391
169 const u64 offset{cpu_addr - region.cpu_addr}; 392 // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
170 results.push_back(region.gpu_addr + offset); 393 // a bug. This restriction might be removed later.
394 ASSERT(offset_in_vma < old_vma.size);
395 ASSERT(offset_in_vma > 0);
396
397 old_vma.size = offset_in_vma;
398 new_vma.base += offset_in_vma;
399 new_vma.size -= offset_in_vma;
400
401 switch (new_vma.type) {
402 case VirtualMemoryArea::Type::Unmapped:
403 break;
404 case VirtualMemoryArea::Type::Allocated:
405 new_vma.offset += offset_in_vma;
406 break;
407 case VirtualMemoryArea::Type::Mapped:
408 new_vma.backing_memory += offset_in_vma;
409 break;
410 }
411
412 ASSERT(old_vma.CanBeMergedWith(new_vma));
413
414 return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
415}
416
417MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
418 const VMAIter next_vma{std::next(iter)};
419 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
420 iter->second.size += next_vma->second.size;
421 vma_map.erase(next_vma);
422 }
423
424 if (iter != vma_map.begin()) {
425 VMAIter prev_vma{std::prev(iter)};
426 if (prev_vma->second.CanBeMergedWith(iter->second)) {
427 prev_vma->second.size += iter->second.size;
428 vma_map.erase(iter);
429 iter = prev_vma;
171 } 430 }
172 } 431 }
173 return results; 432
433 return iter;
174} 434}
175 435
176VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { 436void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
177 auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]}; 437 switch (vma.type) {
178 if (!block) { 438 case VirtualMemoryArea::Type::Unmapped:
179 block = std::make_unique<PageBlock>(); 439 UnmapRegion(vma.base, vma.size);
180 block->fill(static_cast<VAddr>(PageStatus::Unmapped)); 440 break;
441 case VirtualMemoryArea::Type::Allocated:
442 MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
443 break;
444 case VirtualMemoryArea::Type::Mapped:
445 MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
446 break;
181 } 447 }
182 return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
183} 448}
184 449
185} // namespace Tegra 450} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..34744bb27 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,67 +1,148 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <map>
8#include <memory>
9#include <optional> 8#include <optional>
10#include <vector>
11 9
12#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/page_table.h"
13 12
14namespace Tegra { 13namespace Tegra {
15 14
16/// Virtual addresses in the GPU's memory map are 64 bit. 15/**
17using GPUVAddr = u64; 16 * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
17 * with homogeneous attributes across its extents. In this particular implementation each VMA is
18 * also backed by a single host memory allocation.
19 */
20struct VirtualMemoryArea {
21 enum class Type : u8 {
22 Unmapped,
23 Allocated,
24 Mapped,
25 };
26
27 /// Virtual base address of the region.
28 GPUVAddr base{};
29 /// Size of the region.
30 u64 size{};
31 /// Memory area mapping type.
32 Type type{Type::Unmapped};
33 /// CPU memory mapped address corresponding to this memory area.
34 VAddr backing_addr{};
35 /// Offset into the backing_memory the mapping starts from.
36 std::size_t offset{};
37 /// Pointer backing this VMA.
38 u8* backing_memory{};
39
40 /// Tests if this area can be merged to the right with `next`.
41 bool CanBeMergedWith(const VirtualMemoryArea& next) const;
42};
18 43
19class MemoryManager final { 44class MemoryManager final {
20public: 45public:
21 MemoryManager(); 46 MemoryManager();
22 47
23 GPUVAddr AllocateSpace(u64 size, u64 align); 48 GPUVAddr AllocateSpace(u64 size, u64 align);
24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); 49 GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
25 GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); 50 GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
26 GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size); 51 GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
27 GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); 52 GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
28 GPUVAddr GetRegionEnd(GPUVAddr region_start) const; 53 std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);
29 std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); 54
30 std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; 55 template <typename T>
56 T Read(GPUVAddr addr);
57
58 template <typename T>
59 void Write(GPUVAddr addr, T data);
31 60
32 static constexpr u64 PAGE_BITS = 16; 61 u8* GetPointer(GPUVAddr addr);
33 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; 62
34 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 63 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
64 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
65 void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
35 66
36private: 67private:
37 enum class PageStatus : u64 { 68 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 69 using VMAHandle = VMAMap::const_iterator;
39 Allocated = 0xFFFFFFFFFFFFFFFEULL, 70 using VMAIter = VMAMap::iterator;
40 Reserved = 0xFFFFFFFFFFFFFFFDULL,
41 };
42 71
43 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 72 bool IsAddressValid(GPUVAddr addr) const;
44 PageStatus status); 73 void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
45 VAddr& PageSlot(GPUVAddr gpu_addr); 74 VAddr backing_addr = 0);
46 75 void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
47 static constexpr u64 MAX_ADDRESS{0x10000000000ULL}; 76 void UnmapRegion(GPUVAddr base, u64 size);
48 static constexpr u64 PAGE_TABLE_BITS{10}; 77
49 static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS}; 78 /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
50 static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1}; 79 VMAHandle FindVMA(GPUVAddr target) const;
51 static constexpr u64 PAGE_BLOCK_BITS{14}; 80
52 static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS}; 81 VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
53 static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1}; 82
54 83 /**
55 using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; 84 * Maps an unmanaged host memory pointer at a given address.
56 std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; 85 *
57 86 * @param target The guest address to start the mapping at.
58 struct MappedRegion { 87 * @param memory The memory to be mapped.
59 VAddr cpu_addr; 88 * @param size Size of the mapping.
60 GPUVAddr gpu_addr; 89 * @param state MemoryState tag to attach to the VMA.
61 u64 size; 90 */
62 }; 91 VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
92
93 /// Unmaps a range of addresses, splitting VMAs as necessary.
94 void UnmapRange(GPUVAddr target, u64 size);
95
96 /// Converts a VMAHandle to a mutable VMAIter.
97 VMAIter StripIterConstness(const VMAHandle& iter);
98
99 /// Marks as the specfied VMA as allocated.
100 VMAIter Allocate(VMAIter vma);
101
102 /**
103 * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
104 * the appropriate error checking.
105 */
106 VMAIter CarveVMA(GPUVAddr base, u64 size);
107
108 /**
109 * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
110 * end of the range.
111 */
112 VMAIter CarveVMARange(GPUVAddr base, u64 size);
113
114 /**
115 * Splits a VMA in two, at the specified offset.
116 * @returns the right side of the split, with the original iterator becoming the left side.
117 */
118 VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
119
120 /**
121 * Checks for and merges the specified VMA with adjacent ones if possible.
122 * @returns the merged VMA or the original if no merging was possible.
123 */
124 VMAIter MergeAdjacent(VMAIter vma);
125
126 /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
127 void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
128
129 /// Finds a free (unmapped region) of the specified size starting at the specified address.
130 GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
131
132private:
133 static constexpr u64 page_bits{16};
134 static constexpr u64 page_size{1 << page_bits};
135 static constexpr u64 page_mask{page_size - 1};
136
137 /// Address space in bits, this is fairly arbitrary but sufficiently large.
138 static constexpr u32 address_space_width{39};
139 /// Start address for mapping, this is fairly arbitrary but must be non-zero.
140 static constexpr GPUVAddr address_space_base{0x100000};
141 /// End of address space, based on address space in bits.
142 static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
63 143
64 std::vector<MappedRegion> mapped_regions; 144 Common::PageTable page_table{page_bits};
145 VMAMap vma_map;
65}; 146};
66 147
67} // namespace Tegra 148} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index b68f4fb13..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h" 9#include "video_core/morton.h"
11#include "video_core/surface.h" 10#include "video_core/surface.h"
12#include "video_core/textures/decoders.h" 11#include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
16using Surface::GetBytesPerPixel; 15using Surface::GetBytesPerPixel;
17using Surface::PixelFormat; 16using Surface::PixelFormat;
18 17
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); 18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; 19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21 20
22template <bool morton_to_linear, PixelFormat format> 21template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, 22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) { 23 u32 tile_width_spacing, u8* buffer, u8* addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); 24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26 25
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
34 stride, height, depth, block_height, block_depth, 33 stride, height, depth, block_height, block_depth,
35 tile_width_spacing); 34 tile_width_spacing);
36 } else { 35 } else {
37 Tegra::Texture::CopySwizzledData( 36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
38 (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, 37 (height + tile_size_y - 1) / tile_size_y, depth,
39 depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, 38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
40 block_height, block_depth, tile_width_spacing); 39 block_height, block_depth, tile_width_spacing);
41 } 40 }
42} 41}
43 42
44static constexpr ConversionArray morton_to_linear_fns = { 43static constexpr ConversionArray morton_to_linear_fns = {
45 // clang-format off 44 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8U>, 45 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8S>, 46 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::ABGR8UI>, 47 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::B5G6R5U>, 48 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A2B10G10R10U>, 49 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::A1B5G5R5U>, 50 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8U>, 51 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::R8UI>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16F>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16U>, 54 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT1>, 58 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT23>, 59 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXT45>, 60 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN1>, 61 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2UNORM>, 62 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::DXN2SNORM>, 63 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC7U>, 64 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_UF16>, 65 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::BC6H_SF16>, 66 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::ASTC_2D_4X4>, 67 MortonCopy<true, PixelFormat::BGRA8>,
69 MortonCopy<true, PixelFormat::BGRA8>, 68 MortonCopy<true, PixelFormat::RGBA32F>,
70 MortonCopy<true, PixelFormat::RGBA32F>, 69 MortonCopy<true, PixelFormat::RG32F>,
71 MortonCopy<true, PixelFormat::RG32F>, 70 MortonCopy<true, PixelFormat::R32F>,
72 MortonCopy<true, PixelFormat::R32F>, 71 MortonCopy<true, PixelFormat::R16F>,
73 MortonCopy<true, PixelFormat::R16F>, 72 MortonCopy<true, PixelFormat::R16U>,
74 MortonCopy<true, PixelFormat::R16U>, 73 MortonCopy<true, PixelFormat::R16S>,
75 MortonCopy<true, PixelFormat::R16S>, 74 MortonCopy<true, PixelFormat::R16UI>,
76 MortonCopy<true, PixelFormat::R16UI>, 75 MortonCopy<true, PixelFormat::R16I>,
77 MortonCopy<true, PixelFormat::R16I>, 76 MortonCopy<true, PixelFormat::RG16>,
78 MortonCopy<true, PixelFormat::RG16>, 77 MortonCopy<true, PixelFormat::RG16F>,
79 MortonCopy<true, PixelFormat::RG16F>, 78 MortonCopy<true, PixelFormat::RG16UI>,
80 MortonCopy<true, PixelFormat::RG16UI>, 79 MortonCopy<true, PixelFormat::RG16I>,
81 MortonCopy<true, PixelFormat::RG16I>, 80 MortonCopy<true, PixelFormat::RG16S>,
82 MortonCopy<true, PixelFormat::RG16S>, 81 MortonCopy<true, PixelFormat::RGB32F>,
83 MortonCopy<true, PixelFormat::RGB32F>, 82 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
84 MortonCopy<true, PixelFormat::RGBA8_SRGB>, 83 MortonCopy<true, PixelFormat::RG8U>,
85 MortonCopy<true, PixelFormat::RG8U>, 84 MortonCopy<true, PixelFormat::RG8S>,
86 MortonCopy<true, PixelFormat::RG8S>, 85 MortonCopy<true, PixelFormat::RG32UI>,
87 MortonCopy<true, PixelFormat::RG32UI>, 86 MortonCopy<true, PixelFormat::R32UI>,
88 MortonCopy<true, PixelFormat::R32UI>, 87 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
89 MortonCopy<true, PixelFormat::ASTC_2D_8X8>, 88 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X5>, 89 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
91 MortonCopy<true, PixelFormat::ASTC_2D_5X4>, 90 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
92 MortonCopy<true, PixelFormat::BGRA8_SRGB>, 91 MortonCopy<true, PixelFormat::DXT1_SRGB>,
93 MortonCopy<true, PixelFormat::DXT1_SRGB>, 92 MortonCopy<true, PixelFormat::DXT23_SRGB>,
94 MortonCopy<true, PixelFormat::DXT23_SRGB>, 93 MortonCopy<true, PixelFormat::DXT45_SRGB>,
95 MortonCopy<true, PixelFormat::DXT45_SRGB>, 94 MortonCopy<true, PixelFormat::BC7U_SRGB>,
96 MortonCopy<true, PixelFormat::BC7U_SRGB>, 95 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
97 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, 96 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, 97 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, 98 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, 99 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X5>, 100 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, 101 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
103 MortonCopy<true, PixelFormat::ASTC_2D_10X8>, 102 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, 103 MortonCopy<true, PixelFormat::Z32F>,
105 MortonCopy<true, PixelFormat::Z32F>, 104 MortonCopy<true, PixelFormat::Z16>,
106 MortonCopy<true, PixelFormat::Z16>, 105 MortonCopy<true, PixelFormat::Z24S8>,
107 MortonCopy<true, PixelFormat::Z24S8>, 106 MortonCopy<true, PixelFormat::S8Z24>,
108 MortonCopy<true, PixelFormat::S8Z24>, 107 MortonCopy<true, PixelFormat::Z32FS8>,
109 MortonCopy<true, PixelFormat::Z32FS8>,
110 // clang-format on
111}; 108};
112 109
113static constexpr ConversionArray linear_to_morton_fns = { 110static constexpr ConversionArray linear_to_morton_fns = {
114 // clang-format off 111 MortonCopy<false, PixelFormat::ABGR8U>,
115 MortonCopy<false, PixelFormat::ABGR8U>, 112 MortonCopy<false, PixelFormat::ABGR8S>,
116 MortonCopy<false, PixelFormat::ABGR8S>, 113 MortonCopy<false, PixelFormat::ABGR8UI>,
117 MortonCopy<false, PixelFormat::ABGR8UI>, 114 MortonCopy<false, PixelFormat::B5G6R5U>,
118 MortonCopy<false, PixelFormat::B5G6R5U>, 115 MortonCopy<false, PixelFormat::A2B10G10R10U>,
119 MortonCopy<false, PixelFormat::A2B10G10R10U>, 116 MortonCopy<false, PixelFormat::A1B5G5R5U>,
120 MortonCopy<false, PixelFormat::A1B5G5R5U>, 117 MortonCopy<false, PixelFormat::R8U>,
121 MortonCopy<false, PixelFormat::R8U>, 118 MortonCopy<false, PixelFormat::R8UI>,
122 MortonCopy<false, PixelFormat::R8UI>, 119 MortonCopy<false, PixelFormat::RGBA16F>,
123 MortonCopy<false, PixelFormat::RGBA16F>, 120 MortonCopy<false, PixelFormat::RGBA16U>,
124 MortonCopy<false, PixelFormat::RGBA16U>, 121 MortonCopy<false, PixelFormat::RGBA16UI>,
125 MortonCopy<false, PixelFormat::RGBA16UI>, 122 MortonCopy<false, PixelFormat::R11FG11FB10F>,
126 MortonCopy<false, PixelFormat::R11FG11FB10F>, 123 MortonCopy<false, PixelFormat::RGBA32UI>,
127 MortonCopy<false, PixelFormat::RGBA32UI>, 124 MortonCopy<false, PixelFormat::DXT1>,
128 MortonCopy<false, PixelFormat::DXT1>, 125 MortonCopy<false, PixelFormat::DXT23>,
129 MortonCopy<false, PixelFormat::DXT23>, 126 MortonCopy<false, PixelFormat::DXT45>,
130 MortonCopy<false, PixelFormat::DXT45>, 127 MortonCopy<false, PixelFormat::DXN1>,
131 MortonCopy<false, PixelFormat::DXN1>, 128 MortonCopy<false, PixelFormat::DXN2UNORM>,
132 MortonCopy<false, PixelFormat::DXN2UNORM>, 129 MortonCopy<false, PixelFormat::DXN2SNORM>,
133 MortonCopy<false, PixelFormat::DXN2SNORM>, 130 MortonCopy<false, PixelFormat::BC7U>,
134 MortonCopy<false, PixelFormat::BC7U>, 131 MortonCopy<false, PixelFormat::BC6H_UF16>,
135 MortonCopy<false, PixelFormat::BC6H_UF16>, 132 MortonCopy<false, PixelFormat::BC6H_SF16>,
136 MortonCopy<false, PixelFormat::BC6H_SF16>, 133 // TODO(Subv): Swizzling ASTC formats are not supported
137 // TODO(Subv): Swizzling ASTC formats are not supported 134 nullptr,
138 nullptr, 135 MortonCopy<false, PixelFormat::BGRA8>,
139 MortonCopy<false, PixelFormat::BGRA8>, 136 MortonCopy<false, PixelFormat::RGBA32F>,
140 MortonCopy<false, PixelFormat::RGBA32F>, 137 MortonCopy<false, PixelFormat::RG32F>,
141 MortonCopy<false, PixelFormat::RG32F>, 138 MortonCopy<false, PixelFormat::R32F>,
142 MortonCopy<false, PixelFormat::R32F>, 139 MortonCopy<false, PixelFormat::R16F>,
143 MortonCopy<false, PixelFormat::R16F>, 140 MortonCopy<false, PixelFormat::R16U>,
144 MortonCopy<false, PixelFormat::R16U>, 141 MortonCopy<false, PixelFormat::R16S>,
145 MortonCopy<false, PixelFormat::R16S>, 142 MortonCopy<false, PixelFormat::R16UI>,
146 MortonCopy<false, PixelFormat::R16UI>, 143 MortonCopy<false, PixelFormat::R16I>,
147 MortonCopy<false, PixelFormat::R16I>, 144 MortonCopy<false, PixelFormat::RG16>,
148 MortonCopy<false, PixelFormat::RG16>, 145 MortonCopy<false, PixelFormat::RG16F>,
149 MortonCopy<false, PixelFormat::RG16F>, 146 MortonCopy<false, PixelFormat::RG16UI>,
150 MortonCopy<false, PixelFormat::RG16UI>, 147 MortonCopy<false, PixelFormat::RG16I>,
151 MortonCopy<false, PixelFormat::RG16I>, 148 MortonCopy<false, PixelFormat::RG16S>,
152 MortonCopy<false, PixelFormat::RG16S>, 149 MortonCopy<false, PixelFormat::RGB32F>,
153 MortonCopy<false, PixelFormat::RGB32F>, 150 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
154 MortonCopy<false, PixelFormat::RGBA8_SRGB>, 151 MortonCopy<false, PixelFormat::RG8U>,
155 MortonCopy<false, PixelFormat::RG8U>, 152 MortonCopy<false, PixelFormat::RG8S>,
156 MortonCopy<false, PixelFormat::RG8S>, 153 MortonCopy<false, PixelFormat::RG32UI>,
157 MortonCopy<false, PixelFormat::RG32UI>, 154 MortonCopy<false, PixelFormat::R32UI>,
158 MortonCopy<false, PixelFormat::R32UI>, 155 nullptr,
159 nullptr, 156 nullptr,
160 nullptr, 157 nullptr,
161 nullptr, 158 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
162 MortonCopy<false, PixelFormat::BGRA8_SRGB>, 159 MortonCopy<false, PixelFormat::DXT1_SRGB>,
163 MortonCopy<false, PixelFormat::DXT1_SRGB>, 160 MortonCopy<false, PixelFormat::DXT23_SRGB>,
164 MortonCopy<false, PixelFormat::DXT23_SRGB>, 161 MortonCopy<false, PixelFormat::DXT45_SRGB>,
165 MortonCopy<false, PixelFormat::DXT45_SRGB>, 162 MortonCopy<false, PixelFormat::BC7U_SRGB>,
166 MortonCopy<false, PixelFormat::BC7U_SRGB>, 163 nullptr,
167 nullptr, 164 nullptr,
168 nullptr, 165 nullptr,
169 nullptr, 166 nullptr,
170 nullptr, 167 nullptr,
171 nullptr, 168 nullptr,
172 nullptr, 169 nullptr,
173 nullptr, 170 nullptr,
174 nullptr, 171 MortonCopy<false, PixelFormat::Z32F>,
175 MortonCopy<false, PixelFormat::Z32F>, 172 MortonCopy<false, PixelFormat::Z16>,
176 MortonCopy<false, PixelFormat::Z16>, 173 MortonCopy<false, PixelFormat::Z24S8>,
177 MortonCopy<false, PixelFormat::Z24S8>, 174 MortonCopy<false, PixelFormat::S8Z24>,
178 MortonCopy<false, PixelFormat::S8Z24>, 175 MortonCopy<false, PixelFormat::Z32FS8>,
179 MortonCopy<false, PixelFormat::Z32FS8>,
180 // clang-format on
181}; 176};
182 177
183static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { 178static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
191 return morton_to_linear_fns[static_cast<std::size_t>(format)]; 186 return morton_to_linear_fns[static_cast<std::size_t>(format)];
192} 187}
193 188
194/// 8x8 Z-Order coordinate from 2D coordinates
195static u32 MortonInterleave(u32 x, u32 y) {
196 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
197 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
198 return xlut[x % 8] + ylut[y % 8];
199}
200
201/// Calculates the offset of the position of the pixel in Morton order
202static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
203 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
204 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
205 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
206 // texels are laid out in a 2x2 subtile like this:
207 // 2 3
208 // 0 1
209 //
210 // The full 8x8 tile has the texels arranged like this:
211 //
212 // 42 43 46 47 58 59 62 63
213 // 40 41 44 45 56 57 60 61
214 // 34 35 38 39 50 51 54 55
215 // 32 33 36 37 48 49 52 53
216 // 10 11 14 15 26 27 30 31
217 // 08 09 12 13 24 25 28 29
218 // 02 03 06 07 18 19 22 23
219 // 00 01 04 05 16 17 20 21
220 //
221 // This pattern is what's called Z-order curve, or Morton order.
222
223 const unsigned int block_height = 8;
224 const unsigned int coarse_x = x & ~7;
225
226 u32 i = MortonInterleave(x, y);
227
228 const unsigned int offset = coarse_x * block_height;
229
230 return (i + offset) * bytes_per_pixel;
231}
232
233static u32 MortonInterleave128(u32 x, u32 y) { 189static u32 MortonInterleave128(u32 x, u32 y) {
234 // 128x128 Z-Order coordinate from 2D coordinates 190 // 128x128 Z-Order coordinate from 2D coordinates
235 static constexpr u32 xlut[] = { 191 static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
325 281
326void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 282void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
327 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 283 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
328 u8* buffer, std::size_t buffer_size, VAddr addr) { 284 u8* buffer, u8* addr) {
329
330 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, 285 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
331 tile_width_spacing, buffer, buffer_size, addr); 286 tile_width_spacing, buffer, addr);
332} 287}
333 288
334void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 289void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
335 u8* morton_data, u8* linear_data, bool morton_to_linear) { 290 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
291 const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
336 u8* data_ptrs[2]; 292 u8* data_ptrs[2];
337 for (u32 y = 0; y < height; ++y) { 293 for (u32 y = 0; y < height; ++y) {
338 for (u32 x = 0; x < width; ++x) { 294 for (u32 x = 0; x < width; ++x) {
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index 065f59ce3..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13 13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, 14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, std::size_t buffer_size, VAddr addr); 16 u8* buffer, u8* addr);
17 17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear); 19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
20 20
21} // namespace VideoCore 21} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..291772186 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <mutex>
7#include <set> 8#include <set>
8#include <unordered_map> 9#include <unordered_map>
9 10
@@ -12,14 +13,26 @@
12 13
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "core/settings.h" 15#include "core/settings.h"
16#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16 18
17class RasterizerCacheObject { 19class RasterizerCacheObject {
18public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr)
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23
19 virtual ~RasterizerCacheObject(); 24 virtual ~RasterizerCacheObject();
20 25
26 CacheAddr GetCacheAddr() const {
27 return cache_addr;
28 }
29
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
21 /// Gets the address of the shader in guest memory, required for cache management 34 /// Gets the address of the shader in guest memory, required for cache management
22 virtual VAddr GetAddr() const = 0; 35 virtual VAddr GetCpuAddr() const = 0;
23 36
24 /// Gets the size of the shader in guest memory, required for cache management 37 /// Gets the size of the shader in guest memory, required for cache management
25 virtual std::size_t GetSizeInBytes() const = 0; 38 virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
58 bool is_registered{}; ///< Whether the object is currently registered with the cache 71 bool is_registered{}; ///< Whether the object is currently registered with the cache
59 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 72 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
60 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 73 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
74 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
75 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
61}; 76};
62 77
63template <class T> 78template <class T>
@@ -68,7 +83,9 @@ public:
68 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 83 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
69 84
70 /// Write any cached resources overlapping the specified region back to memory 85 /// Write any cached resources overlapping the specified region back to memory
71 void FlushRegion(Tegra::GPUVAddr addr, size_t size) { 86 void FlushRegion(CacheAddr addr, std::size_t size) {
87 std::lock_guard lock{mutex};
88
72 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 89 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
73 for (auto& object : objects) { 90 for (auto& object : objects) {
74 FlushObject(object); 91 FlushObject(object);
@@ -76,7 +93,9 @@ public:
76 } 93 }
77 94
78 /// Mark the specified region as being invalidated 95 /// Mark the specified region as being invalidated
79 void InvalidateRegion(VAddr addr, u64 size) { 96 void InvalidateRegion(CacheAddr addr, u64 size) {
97 std::lock_guard lock{mutex};
98
80 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 99 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
81 for (auto& object : objects) { 100 for (auto& object : objects) {
82 if (!object->IsRegistered()) { 101 if (!object->IsRegistered()) {
@@ -89,49 +108,70 @@ public:
89 108
90 /// Invalidates everything in the cache 109 /// Invalidates everything in the cache
91 void InvalidateAll() { 110 void InvalidateAll() {
111 std::lock_guard lock{mutex};
112
92 while (interval_cache.begin() != interval_cache.end()) { 113 while (interval_cache.begin() != interval_cache.end()) {
93 Unregister(*interval_cache.begin()->second.begin()); 114 Unregister(*interval_cache.begin()->second.begin());
94 } 115 }
95 } 116 }
96 117
97protected: 118protected:
98 /// Tries to get an object from the cache with the specified address 119 /// Tries to get an object from the cache with the specified cache address
99 T TryGet(VAddr addr) const { 120 T TryGet(CacheAddr addr) const {
100 const auto iter = map_cache.find(addr); 121 const auto iter = map_cache.find(addr);
101 if (iter != map_cache.end()) 122 if (iter != map_cache.end())
102 return iter->second; 123 return iter->second;
103 return nullptr; 124 return nullptr;
104 } 125 }
105 126
127 T TryGet(const void* addr) const {
128 const auto iter = map_cache.find(ToCacheAddr(addr));
129 if (iter != map_cache.end())
130 return iter->second;
131 return nullptr;
132 }
133
106 /// Register an object into the cache 134 /// Register an object into the cache
107 void Register(const T& object) { 135 virtual void Register(const T& object) {
136 std::lock_guard lock{mutex};
137
108 object->SetIsRegistered(true); 138 object->SetIsRegistered(true);
109 interval_cache.add({GetInterval(object), ObjectSet{object}}); 139 interval_cache.add({GetInterval(object), ObjectSet{object}});
110 map_cache.insert({object->GetAddr(), object}); 140 map_cache.insert({object->GetCacheAddr(), object});
111 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); 141 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
112 } 142 }
113 143
114 /// Unregisters an object from the cache 144 /// Unregisters an object from the cache
115 void Unregister(const T& object) { 145 virtual void Unregister(const T& object) {
116 object->SetIsRegistered(false); 146 std::lock_guard lock{mutex};
117 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
118 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
119 if (Settings::values.use_accurate_gpu_emulation) {
120 FlushObject(object);
121 }
122 147
148 object->SetIsRegistered(false);
149 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
123 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 150 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
124 map_cache.erase(object->GetAddr()); 151 map_cache.erase(object->GetCacheAddr());
125 } 152 }
126 153
127 /// Returns a ticks counter used for tracking when cached objects were last modified 154 /// Returns a ticks counter used for tracking when cached objects were last modified
128 u64 GetModifiedTicks() { 155 u64 GetModifiedTicks() {
156 std::lock_guard lock{mutex};
157
129 return ++modified_ticks; 158 return ++modified_ticks;
130 } 159 }
131 160
161 /// Flushes the specified object, updating appropriate cache state as needed
162 void FlushObject(const T& object) {
163 std::lock_guard lock{mutex};
164
165 if (!object->IsDirty()) {
166 return;
167 }
168 object->Flush();
169 object->MarkAsModified(false, *this);
170 }
171
132private: 172private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 173 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 174 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
135 if (size == 0) { 175 if (size == 0) {
136 return {}; 176 return {};
137 } 177 }
@@ -154,27 +194,19 @@ private:
154 return objects; 194 return objects;
155 } 195 }
156 196
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 197 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 198 using ObjectCache = std::unordered_map<CacheAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 199 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
169 using ObjectInterval = typename IntervalCache::interval_type; 200 using ObjectInterval = typename IntervalCache::interval_type;
170 201
171 static auto GetInterval(const T& object) { 202 static auto GetInterval(const T& object) {
172 return ObjectInterval::right_open(object->GetAddr(), 203 return ObjectInterval::right_open(object->GetCacheAddr(),
173 object->GetAddr() + object->GetSizeInBytes()); 204 object->GetCacheAddr() + object->GetSizeInBytes());
174 } 205 }
175 206
176 ObjectCache map_cache; 207 ObjectCache map_cache;
177 IntervalCache interval_cache; ///< Cache of objects 208 IntervalCache interval_cache; ///< Cache of objects
178 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing 209 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
179 VideoCore::RasterizerInterface& rasterizer; 210 VideoCore::RasterizerInterface& rasterizer;
211 std::recursive_mutex mutex;
180}; 212};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 77da135a0..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,7 +9,6 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
11#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12#include "video_core/memory_manager.h"
13 12
14namespace VideoCore { 13namespace VideoCore {
15 14
@@ -35,18 +34,20 @@ public:
35 virtual void FlushAll() = 0; 34 virtual void FlushAll() = 0;
36 35
37 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 36 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
38 virtual void FlushRegion(VAddr addr, u64 size) = 0; 37 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
39 38
40 /// Notify rasterizer that any caches of the specified region should be invalidated 39 /// Notify rasterizer that any caches of the specified region should be invalidated
41 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 40 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
42 41
43 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 42 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
44 /// and invalidated 43 /// and invalidated
45 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 44 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
46 45
47 /// Attempt to use a faster method to perform a surface copy 46 /// Attempt to use a faster method to perform a surface copy
48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 47 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
49 const Tegra::Engines::Fermi2D::Regs::Surface& dst) { 48 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
49 const Common::Rectangle<u32>& src_rect,
50 const Common::Rectangle<u32>& dst_rect) {
50 return false; 51 return false;
51 } 52 }
52 53
@@ -61,7 +62,7 @@ public:
61 } 62 }
62 63
63 /// Increase/decrease the number of object in pages touching the specified region 64 /// Increase/decrease the number of object in pages touching the specified region
64 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} 65 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
65 66
66 /// Initialize disk cached resources for the game being emulated 67 /// Initialize disk cached resources for the game being emulated
67 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 68 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/emu_window.h" 6#include "core/frontend/emu_window.h"
6#include "core/settings.h" 7#include "core/settings.h"
7#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b3062e5ba..fd091c84c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -13,24 +13,28 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
17 std::size_t alignment, u8* host_ptr)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
19 alignment{alignment} {}
20
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
17 : RasterizerCache{rasterizer}, stream_buffer(size, true) {} 22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
18 23
19GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, 24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
20 std::size_t alignment, bool cache) { 25 bool cache) {
21 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 26 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
22 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
23 ASSERT_MSG(cpu_addr, "Invalid GPU address");
24 27
25 // Cache management is a big overhead, so only cache entries with a given size. 28 // Cache management is a big overhead, so only cache entries with a given size.
26 // TODO: Figure out which size is the best for given games. 29 // TODO: Figure out which size is the best for given games.
27 cache &= size >= 2048; 30 cache &= size >= 2048;
28 31
32 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
29 if (cache) { 33 if (cache) {
30 auto entry = TryGet(*cpu_addr); 34 auto entry = TryGet(host_ptr);
31 if (entry) { 35 if (entry) {
32 if (entry->size >= size && entry->alignment == alignment) { 36 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
33 return entry->offset; 37 return entry->GetOffset();
34 } 38 }
35 Unregister(entry); 39 Unregister(entry);
36 } 40 }
@@ -39,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
39 AlignBuffer(alignment); 43 AlignBuffer(alignment);
40 const GLintptr uploaded_offset = buffer_offset; 44 const GLintptr uploaded_offset = buffer_offset;
41 45
42 Memory::ReadBlock(*cpu_addr, buffer_ptr, size); 46 if (!host_ptr) {
47 return uploaded_offset;
48 }
43 49
50 std::memcpy(buffer_ptr, host_ptr, size);
44 buffer_ptr += size; 51 buffer_ptr += size;
45 buffer_offset += size; 52 buffer_offset += size;
46 53
47 if (cache) { 54 if (cache) {
48 auto entry = std::make_shared<CachedBufferEntry>(); 55 auto entry = std::make_shared<CachedBufferEntry>(
49 entry->offset = uploaded_offset; 56 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
50 entry->size = size;
51 entry->alignment = alignment;
52 entry->addr = *cpu_addr;
53 Register(entry); 57 Register(entry);
54 } 58 }
55 59
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
17 17
18class RasterizerOpenGL; 18class RasterizerOpenGL;
19 19
20struct CachedBufferEntry final : public RasterizerCacheObject { 20class CachedBufferEntry final : public RasterizerCacheObject {
21 VAddr GetAddr() const override { 21public:
22 return addr; 22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
23 } 27 }
24 28
25 std::size_t GetSizeInBytes() const override { 29 std::size_t GetSizeInBytes() const override {
26 return size; 30 return size;
27 } 31 }
28 32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
29 // We do not have to flush this cache as things in it are never modified by us. 45 // We do not have to flush this cache as things in it are never modified by us.
30 void Flush() override {} 46 void Flush() override {}
31 47
32 VAddr addr; 48private:
33 std::size_t size; 49 VAddr cpu_addr{};
34 GLintptr offset; 50 std::size_t size{};
35 std::size_t alignment; 51 GLintptr offset{};
52 std::size_t alignment{};
36}; 53};
37 54
38class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 55class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
41 58
42 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 59 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
43 /// allocated. 60 /// allocated.
44 GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 61 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
45 bool cache = true); 62 bool cache = true);
46 63
47 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 64 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index c7f32feaa..da9326253 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -7,7 +7,6 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/memory.h"
11#include "video_core/renderer_opengl/gl_global_cache.h" 10#include "video_core/renderer_opengl/gl_global_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h" 12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +14,13 @@
15 14
16namespace OpenGL { 15namespace OpenGL {
17 16
18CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { 17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
19 buffer.Create(); 19 buffer.Create();
20 // Bind and unbind the buffer so it gets allocated by the driver 20 // Bind and unbind the buffer so it gets allocated by the driver
21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
23 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 23 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
24} 24}
25 25
26void CachedGlobalRegion::Reload(u32 size_) { 26void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +35,10 @@ void CachedGlobalRegion::Reload(u32 size_) {
35 35
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer 36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); 38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
39} 39}
40 40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { 41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)}; 42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) { 43 if (search == reserve.end()) {
44 return {}; 44 return {};
@@ -46,19 +46,22 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
46 return search->second; 46 return search->second;
47} 47}
48 48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { 49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
50 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; 50 u8* host_ptr) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
51 if (!region) { 52 if (!region) {
52 // No reserved surface available, create a new one and reserve it 53 // No reserved surface available, create a new one and reserve it
53 region = std::make_shared<CachedGlobalRegion>(addr, size); 54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
56 region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
54 ReserveGlobalRegion(region); 57 ReserveGlobalRegion(region);
55 } 58 }
56 region->Reload(size); 59 region->Reload(size);
57 return region; 60 return region;
58} 61}
59 62
60void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { 63void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
61 reserve[region->GetAddr()] = region; 64 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
62} 65}
63 66
64GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 67GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
69 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { 72 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
70 73
71 auto& gpu{Core::System::GetInstance().GPU()}; 74 auto& gpu{Core::System::GetInstance().GPU()};
72 const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; 75 auto& memory_manager{gpu.MemoryManager()};
73 const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( 76 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
74 cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); 77 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
75 ASSERT(cbuf_addr); 78 global_region.GetCbufOffset()};
76 79 const auto actual_addr{memory_manager.Read<u64>(addr)};
77 const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); 80 const auto size{memory_manager.Read<u32>(addr + 8)};
78 const auto size = Memory::Read32(*cbuf_addr + 8);
79 const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
80 ASSERT(actual_addr);
81 81
82 // Look up global region in the cache based on address 82 // Look up global region in the cache based on address
83 GlobalRegion region = TryGet(*actual_addr); 83 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
84 GlobalRegion region{TryGet(host_ptr)};
84 85
85 if (!region) { 86 if (!region) {
86 // No global region found - create a new one 87 // No global region found - create a new one
87 region = GetUncachedGlobalRegion(*actual_addr, size); 88 region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
88 Register(region); 89 Register(region);
89 } 90 }
90 91
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 37830bb7c..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27 27
28class CachedGlobalRegion final : public RasterizerCacheObject { 28class CachedGlobalRegion final : public RasterizerCacheObject {
29public: 29public:
30 explicit CachedGlobalRegion(VAddr addr, u32 size); 30 explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
31 31
32 /// Gets the address of the shader in guest memory, required for cache management 32 VAddr GetCpuAddr() const override {
33 VAddr GetAddr() const { 33 return cpu_addr;
34 return addr;
35 } 34 }
36 35
37 /// Gets the size of the shader in guest memory, required for cache management 36 std::size_t GetSizeInBytes() const override {
38 std::size_t GetSizeInBytes() const {
39 return size; 37 return size;
40 } 38 }
41 39
@@ -53,9 +51,8 @@ public:
53 } 51 }
54 52
55private: 53private:
56 VAddr addr{}; 54 VAddr cpu_addr{};
57 u32 size{}; 55 u32 size{};
58
59 OGLBuffer buffer; 56 OGLBuffer buffer;
60}; 57};
61 58
@@ -68,11 +65,11 @@ public:
68 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); 65 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
69 66
70private: 67private:
71 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; 68 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
72 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); 69 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
73 void ReserveGlobalRegion(const GlobalRegion& region); 70 void ReserveGlobalRegion(GlobalRegion region);
74 71
75 std::unordered_map<VAddr, GlobalRegion> reserve; 72 std::unordered_map<CacheAddr, GlobalRegion> reserve;
76}; 73};
77 74
78} // namespace OpenGL 75} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 77d5cedd2..2bcbd3da2 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -40,16 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
40 return index_offset; 40 return index_offset;
41} 41}
42 42
43GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, 43GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
44 u32 count) {
45 const std::size_t map_size{CalculateQuadSize(count)}; 44 const std::size_t map_size{CalculateQuadSize(count)};
46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); 45 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
47 46
48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 47 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
49 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 48 const u8* source{memory_manager.GetPointer(gpu_addr)};
50 ASSERT_MSG(cpu_addr, "Invalid GPU address");
51
52 const u8* source{Memory::GetPointer(*cpu_addr)};
53 49
54 for (u32 primitive = 0; primitive < count / 4; ++primitive) { 50 for (u32 primitive = 0; primitive < count / 4; ++primitive) {
55 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { 51 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
64 return index_offset; 60 return index_offset;
65} 61}
66 62
67} // namespace OpenGL \ No newline at end of file 63} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..0e2e7dc36 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -24,7 +24,7 @@ public:
24 24
25 GLintptr MakeQuadArray(u32 first, u32 count); 25 GLintptr MakeQuadArray(u32 first, u32 count);
26 26
27 GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); 27 GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
28 28
29private: 29private:
30 OGLBufferCache& buffer_cache; 30 OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 974ca6a20..8f012db62 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -100,10 +100,9 @@ struct FramebufferCacheKey {
100 } 100 }
101}; 101};
102 102
103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, 103RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
104 ScreenInfo& info) 104 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
105 : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, 105 screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
106 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
107 // Create sampler objects 106 // Create sampler objects
108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 107 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
109 texture_samplers[i].Create(); 108 texture_samplers[i].Create();
@@ -118,7 +117,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
118 117
119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 118 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
120 119
121 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 120 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
122 CheckExtensions(); 121 CheckExtensions();
123} 122}
124 123
@@ -138,7 +137,7 @@ void RasterizerOpenGL::CheckExtensions() {
138} 137}
139 138
140GLuint RasterizerOpenGL::SetupVertexFormat() { 139GLuint RasterizerOpenGL::SetupVertexFormat() {
141 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 140 auto& gpu = system.GPU().Maxwell3D();
142 const auto& regs = gpu.regs; 141 const auto& regs = gpu.regs;
143 142
144 if (!gpu.dirty_flags.vertex_attrib_format) { 143 if (!gpu.dirty_flags.vertex_attrib_format) {
@@ -177,7 +176,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
177 continue; 176 continue;
178 177
179 const auto& buffer = regs.vertex_array[attrib.buffer]; 178 const auto& buffer = regs.vertex_array[attrib.buffer];
180 LOG_TRACE(HW_GPU, 179 LOG_TRACE(Render_OpenGL,
181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 180 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 181 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
183 attrib.offset.Value(), attrib.IsNormalized()); 182 attrib.offset.Value(), attrib.IsNormalized());
@@ -200,32 +199,32 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
200 } 199 }
201 200
202 // Rebinding the VAO invalidates the vertex buffer bindings. 201 // Rebinding the VAO invalidates the vertex buffer bindings.
203 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 202 gpu.dirty_flags.vertex_array.set();
204 203
205 state.draw.vertex_array = vao_entry.handle; 204 state.draw.vertex_array = vao_entry.handle;
206 return vao_entry.handle; 205 return vao_entry.handle;
207} 206}
208 207
209void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 208void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 209 auto& gpu = system.GPU().Maxwell3D();
211 const auto& regs = gpu.regs; 210 const auto& regs = gpu.regs;
212 211
213 if (!gpu.dirty_flags.vertex_array) 212 if (gpu.dirty_flags.vertex_array.none())
214 return; 213 return;
215 214
216 MICROPROFILE_SCOPE(OpenGL_VB); 215 MICROPROFILE_SCOPE(OpenGL_VB);
217 216
218 // Upload all guest vertex arrays sequentially to our buffer 217 // Upload all guest vertex arrays sequentially to our buffer
219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 218 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
220 if (~gpu.dirty_flags.vertex_array & (1u << index)) 219 if (!gpu.dirty_flags.vertex_array[index])
221 continue; 220 continue;
222 221
223 const auto& vertex_array = regs.vertex_array[index]; 222 const auto& vertex_array = regs.vertex_array[index];
224 if (!vertex_array.IsEnabled()) 223 if (!vertex_array.IsEnabled())
225 continue; 224 continue;
226 225
227 const Tegra::GPUVAddr start = vertex_array.StartAddress(); 226 const GPUVAddr start = vertex_array.StartAddress();
228 const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 227 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
229 228
230 ASSERT(end > start); 229 ASSERT(end > start);
231 const u64 size = end - start + 1; 230 const u64 size = end - start + 1;
@@ -244,11 +243,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
244 } 243 }
245 } 244 }
246 245
247 gpu.dirty_flags.vertex_array = 0; 246 gpu.dirty_flags.vertex_array.reset();
248} 247}
249 248
250DrawParameters RasterizerOpenGL::SetupDraw() { 249DrawParameters RasterizerOpenGL::SetupDraw() {
251 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 250 const auto& gpu = system.GPU().Maxwell3D();
252 const auto& regs = gpu.regs; 251 const auto& regs = gpu.regs;
253 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 252 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
254 253
@@ -297,7 +296,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
297 296
298void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 297void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
299 MICROPROFILE_SCOPE(OpenGL_Shader); 298 MICROPROFILE_SCOPE(OpenGL_Shader);
300 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 299 auto& gpu = system.GPU().Maxwell3D();
301 300
302 BaseBindings base_bindings; 301 BaseBindings base_bindings;
303 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 302 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -319,7 +318,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
319 const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 318 const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
320 319
321 GLShader::MaxwellUniformData ubo{}; 320 GLShader::MaxwellUniformData ubo{};
322 ubo.SetFromRegs(gpu.state.shader_stages[stage]); 321 ubo.SetFromRegs(gpu, stage);
323 const GLintptr offset = buffer_cache.UploadHostMemory( 322 const GLintptr offset = buffer_cache.UploadHostMemory(
324 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); 323 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
325 324
@@ -343,9 +342,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 shader_program_manager->UseProgrammableFragmentShader(program_handle); 342 shader_program_manager->UseProgrammableFragmentShader(program_handle);
344 break; 343 break;
345 default: 344 default:
346 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 345 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
347 shader_config.enable.Value(), shader_config.offset); 346 shader_config.enable.Value(), shader_config.offset);
348 UNREACHABLE();
349 } 347 }
350 348
351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 349 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -414,15 +412,15 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
414} 412}
415 413
416std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 414std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
417 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 415 const auto& regs = system.GPU().Maxwell3D().regs;
418 416
419 std::size_t size = 0; 417 std::size_t size = 0;
420 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 418 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
421 if (!regs.vertex_array[index].IsEnabled()) 419 if (!regs.vertex_array[index].IsEnabled())
422 continue; 420 continue;
423 421
424 const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); 422 const GPUVAddr start = regs.vertex_array[index].StartAddress();
425 const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 423 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
426 424
427 ASSERT(end > start); 425 ASSERT(end > start);
428 size += end - start + 1; 426 size += end - start + 1;
@@ -432,7 +430,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
432} 430}
433 431
434std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { 432std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
435 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 433 const auto& regs = system.GPU().Maxwell3D().regs;
436 434
437 return static_cast<std::size_t>(regs.index_array.count) * 435 return static_cast<std::size_t>(regs.index_array.count) *
438 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); 436 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
@@ -449,7 +447,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
449 return boost::make_iterator_range(map.equal_range(interval)); 447 return boost::make_iterator_range(map.equal_range(interval));
450} 448}
451 449
452void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { 450void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
453 const u64 page_start{addr >> Memory::PAGE_BITS}; 451 const u64 page_start{addr >> Memory::PAGE_BITS};
454 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; 452 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
455 453
@@ -488,13 +486,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 486 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
489 std::optional<std::size_t> single_color_target) { 487 std::optional<std::size_t> single_color_target) {
490 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 488 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
491 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 489 auto& gpu = system.GPU().Maxwell3D();
492 const auto& regs = gpu.regs; 490 const auto& regs = gpu.regs;
493 491
494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 492 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
495 single_color_target}; 493 single_color_target};
496 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 494 if (fb_config_state == current_framebuffer_config_state &&
497 !gpu.dirty_flags.zeta_buffer) { 495 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 496 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
499 // single color targets). This is done because the guest registers may not change but the 497 // single color targets). This is done because the guest registers may not change but the
500 // host framebuffer may contain different attachments 498 // host framebuffer may contain different attachments
@@ -582,7 +580,7 @@ void RasterizerOpenGL::Clear() {
582 const auto prev_state{state}; 580 const auto prev_state{state};
583 SCOPE_EXIT({ prev_state.Apply(); }); 581 SCOPE_EXIT({ prev_state.Apply(); });
584 582
585 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 583 const auto& regs = system.GPU().Maxwell3D().regs;
586 bool use_color{}; 584 bool use_color{};
587 bool use_depth{}; 585 bool use_depth{};
588 bool use_stencil{}; 586 bool use_stencil{};
@@ -673,7 +671,7 @@ void RasterizerOpenGL::DrawArrays() {
673 return; 671 return;
674 672
675 MICROPROFILE_SCOPE(OpenGL_Drawing); 673 MICROPROFILE_SCOPE(OpenGL_Drawing);
676 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 674 auto& gpu = system.GPU().Maxwell3D();
677 const auto& regs = gpu.regs; 675 const auto& regs = gpu.regs;
678 676
679 ConfigureFramebuffers(state); 677 ConfigureFramebuffers(state);
@@ -721,10 +719,10 @@ void RasterizerOpenGL::DrawArrays() {
721 // Add space for at least 18 constant buffers 719 // Add space for at least 18 constant buffers
722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 720 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
723 721
724 bool invalidate = buffer_cache.Map(buffer_size); 722 const bool invalidate = buffer_cache.Map(buffer_size);
725 if (invalidate) { 723 if (invalidate) {
726 // As all cached buffers are invalidated, we need to recheck their state. 724 // As all cached buffers are invalidated, we need to recheck their state.
727 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 725 gpu.dirty_flags.vertex_array.set();
728 } 726 }
729 727
730 const GLuint vao = SetupVertexFormat(); 728 const GLuint vao = SetupVertexFormat();
@@ -738,55 +736,45 @@ void RasterizerOpenGL::DrawArrays() {
738 shader_program_manager->ApplyTo(state); 736 shader_program_manager->ApplyTo(state);
739 state.Apply(); 737 state.Apply();
740 738
741 // Execute draw call 739 res_cache.SignalPreDrawCall();
742 params.DispatchDraw(); 740 params.DispatchDraw();
743 741 res_cache.SignalPostDrawCall();
744 // Disable scissor test
745 state.viewports[0].scissor.enabled = false;
746 742
747 accelerate_draw = AccelDraw::Disabled; 743 accelerate_draw = AccelDraw::Disabled;
748
749 // Unbind textures for potential future use as framebuffer attachments
750 for (auto& texture_unit : state.texture_units) {
751 texture_unit.Unbind();
752 }
753 state.Apply();
754} 744}
755 745
756void RasterizerOpenGL::FlushAll() {} 746void RasterizerOpenGL::FlushAll() {}
757 747
758void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 748void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
759 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 749 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760 750 if (!addr || !size) {
761 if (Settings::values.use_accurate_gpu_emulation) { 751 return;
762 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
763 res_cache.FlushRegion(addr, size);
764 } 752 }
753 res_cache.FlushRegion(addr, size);
765} 754}
766 755
767void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 756void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
768 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 757 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
758 if (!addr || !size) {
759 return;
760 }
769 res_cache.InvalidateRegion(addr, size); 761 res_cache.InvalidateRegion(addr, size);
770 shader_cache.InvalidateRegion(addr, size); 762 shader_cache.InvalidateRegion(addr, size);
771 global_cache.InvalidateRegion(addr, size); 763 global_cache.InvalidateRegion(addr, size);
772 buffer_cache.InvalidateRegion(addr, size); 764 buffer_cache.InvalidateRegion(addr, size);
773} 765}
774 766
775void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 767void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
776 FlushRegion(addr, size); 768 FlushRegion(addr, size);
777 InvalidateRegion(addr, size); 769 InvalidateRegion(addr, size);
778} 770}
779 771
780bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 772bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
781 const Tegra::Engines::Fermi2D::Regs::Surface& dst) { 773 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
774 const Common::Rectangle<u32>& src_rect,
775 const Common::Rectangle<u32>& dst_rect) {
782 MICROPROFILE_SCOPE(OpenGL_Blits); 776 MICROPROFILE_SCOPE(OpenGL_Blits);
783 777 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
784 if (Settings::values.use_accurate_gpu_emulation) {
785 // Skip the accelerated copy and perform a slow but more accurate copy
786 return false;
787 }
788
789 res_cache.FermiCopySurface(src, dst);
790 return true; 778 return true;
791} 779}
792 780
@@ -798,7 +786,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
798 786
799 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 787 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
800 788
801 const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; 789 const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
802 if (!surface) { 790 if (!surface) {
803 return {}; 791 return {};
804 } 792 }
@@ -809,7 +797,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
809 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 797 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
810 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 798 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
811 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 799 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
812 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 800
801 if (params.pixel_format != pixel_format) {
802 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
803 }
813 804
814 screen_info.display_texture = surface->Texture().handle; 805 screen_info.display_texture = surface->Texture().handle;
815 806
@@ -818,104 +809,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
818 809
819void RasterizerOpenGL::SamplerInfo::Create() { 810void RasterizerOpenGL::SamplerInfo::Create() {
820 sampler.Create(); 811 sampler.Create();
821 mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; 812 mag_filter = Tegra::Texture::TextureFilter::Linear;
822 wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; 813 min_filter = Tegra::Texture::TextureFilter::Linear;
823 uses_depth_compare = false; 814 wrap_u = Tegra::Texture::WrapMode::Wrap;
815 wrap_v = Tegra::Texture::WrapMode::Wrap;
816 wrap_p = Tegra::Texture::WrapMode::Wrap;
817 use_depth_compare = false;
824 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; 818 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
825 819
826 // default is GL_LINEAR_MIPMAP_LINEAR 820 // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
827 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 821 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
828 // Other attributes have correct defaults
829 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); 822 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
823
824 // Other attributes have correct defaults
830} 825}
831 826
832void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { 827void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
833 const GLuint s = sampler.handle; 828 const GLuint sampler_id = sampler.handle;
834 if (mag_filter != config.mag_filter) { 829 if (mag_filter != config.mag_filter) {
835 mag_filter = config.mag_filter; 830 mag_filter = config.mag_filter;
836 glSamplerParameteri( 831 glSamplerParameteri(
837 s, GL_TEXTURE_MAG_FILTER, 832 sampler_id, GL_TEXTURE_MAG_FILTER,
838 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); 833 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
839 } 834 }
840 if (min_filter != config.min_filter || mip_filter != config.mip_filter) { 835 if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
841 min_filter = config.min_filter; 836 min_filter = config.min_filter;
842 mip_filter = config.mip_filter; 837 mipmap_filter = config.mipmap_filter;
843 glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, 838 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
844 MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); 839 MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
845 } 840 }
846 841
847 if (wrap_u != config.wrap_u) { 842 if (wrap_u != config.wrap_u) {
848 wrap_u = config.wrap_u; 843 wrap_u = config.wrap_u;
849 glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); 844 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
850 } 845 }
851 if (wrap_v != config.wrap_v) { 846 if (wrap_v != config.wrap_v) {
852 wrap_v = config.wrap_v; 847 wrap_v = config.wrap_v;
853 glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); 848 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
854 } 849 }
855 if (wrap_p != config.wrap_p) { 850 if (wrap_p != config.wrap_p) {
856 wrap_p = config.wrap_p; 851 wrap_p = config.wrap_p;
857 glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); 852 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
858 } 853 }
859 854
860 if (uses_depth_compare != (config.depth_compare_enabled == 1)) { 855 if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
861 uses_depth_compare = (config.depth_compare_enabled == 1); 856 use_depth_compare = enabled;
862 if (uses_depth_compare) { 857 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
863 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); 858 use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
864 } else {
865 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
866 }
867 } 859 }
868 860
869 if (depth_compare_func != config.depth_compare_func) { 861 if (depth_compare_func != config.depth_compare_func) {
870 depth_compare_func = config.depth_compare_func; 862 depth_compare_func = config.depth_compare_func;
871 glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, 863 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
872 MaxwellToGL::DepthCompareFunc(depth_compare_func)); 864 MaxwellToGL::DepthCompareFunc(depth_compare_func));
873 } 865 }
874 866
875 GLvec4 new_border_color; 867 if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
876 if (config.srgb_conversion) {
877 new_border_color[0] = config.srgb_border_color_r / 255.0f;
878 new_border_color[1] = config.srgb_border_color_g / 255.0f;
879 new_border_color[2] = config.srgb_border_color_g / 255.0f;
880 } else {
881 new_border_color[0] = config.border_color_r;
882 new_border_color[1] = config.border_color_g;
883 new_border_color[2] = config.border_color_b;
884 }
885 new_border_color[3] = config.border_color_a;
886
887 if (border_color != new_border_color) {
888 border_color = new_border_color; 868 border_color = new_border_color;
889 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); 869 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
890 } 870 }
891 871
892 const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); 872 if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
893 if (anisotropic_max != max_anisotropic) { 873 max_anisotropic = anisotropic;
894 max_anisotropic = anisotropic_max;
895 if (GLAD_GL_ARB_texture_filter_anisotropic) { 874 if (GLAD_GL_ARB_texture_filter_anisotropic) {
896 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); 875 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
897 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 876 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
898 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); 877 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
899 } 878 }
900 } 879 }
901 const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
902 if (lod_min != min_lod) {
903 min_lod = lod_min;
904 glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
905 }
906 880
907 const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; 881 if (const float min = config.GetMinLod(); min_lod != min) {
908 if (lod_max != max_lod) { 882 min_lod = min;
909 max_lod = lod_max; 883 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
910 glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod); 884 }
885 if (const float max = config.GetMaxLod(); max_lod != max) {
886 max_lod = max;
887 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
911 } 888 }
912 const u32 bias = config.mip_lod_bias.Value(); 889
913 // Sign extend the 13-bit value. 890 if (const float bias = config.GetLodBias(); lod_bias != bias) {
914 constexpr u32 mask = 1U << (13 - 1); 891 lod_bias = bias;
915 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; 892 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
916 if (lod_bias != bias_lod) {
917 lod_bias = bias_lod;
918 glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias);
919 } 893 }
920} 894}
921 895
@@ -923,7 +897,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
923 const Shader& shader, GLuint program_handle, 897 const Shader& shader, GLuint program_handle,
924 BaseBindings base_bindings) { 898 BaseBindings base_bindings) {
925 MICROPROFILE_SCOPE(OpenGL_UBO); 899 MICROPROFILE_SCOPE(OpenGL_UBO);
926 const auto& gpu = Core::System::GetInstance().GPU(); 900 const auto& gpu = system.GPU();
927 const auto& maxwell3d = gpu.Maxwell3D(); 901 const auto& maxwell3d = gpu.Maxwell3D();
928 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; 902 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
929 const auto& entries = shader->GetShaderEntries().const_buffers; 903 const auto& entries = shader->GetShaderEntries().const_buffers;
@@ -955,8 +929,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
955 size = buffer.size; 929 size = buffer.size;
956 930
957 if (size > MaxConstbufferSize) { 931 if (size > MaxConstbufferSize) {
958 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 932 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
959 MaxConstbufferSize); 933 MaxConstbufferSize);
960 size = MaxConstbufferSize; 934 size = MaxConstbufferSize;
961 } 935 }
962 } else { 936 } else {
@@ -1002,7 +976,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
1002void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 976void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
1003 GLuint program_handle, BaseBindings base_bindings) { 977 GLuint program_handle, BaseBindings base_bindings) {
1004 MICROPROFILE_SCOPE(OpenGL_Texture); 978 MICROPROFILE_SCOPE(OpenGL_Texture);
1005 const auto& gpu = Core::System::GetInstance().GPU(); 979 const auto& gpu = system.GPU();
1006 const auto& maxwell3d = gpu.Maxwell3D(); 980 const auto& maxwell3d = gpu.Maxwell3D();
1007 const auto& entries = shader->GetShaderEntries().samplers; 981 const auto& entries = shader->GetShaderEntries().samplers;
1008 982
@@ -1016,10 +990,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1016 990
1017 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 991 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1018 992
1019 Surface surface = res_cache.GetTextureSurface(texture, entry); 993 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1020 if (surface != nullptr) {
1021 state.texture_units[current_bindpoint].texture = 994 state.texture_units[current_bindpoint].texture =
1022 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 995 surface->Texture(entry.IsArray()).handle;
1023 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 996 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1024 texture.tic.w_source); 997 texture.tic.w_source);
1025 } else { 998 } else {
@@ -1030,7 +1003,7 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1030} 1003}
1031 1004
1032void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 1005void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1033 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1006 const auto& regs = system.GPU().Maxwell3D().regs;
1034 const bool geometry_shaders_enabled = 1007 const bool geometry_shaders_enabled =
1035 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1008 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1036 const std::size_t viewport_count = 1009 const std::size_t viewport_count =
@@ -1038,7 +1011,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1038 for (std::size_t i = 0; i < viewport_count; i++) { 1011 for (std::size_t i = 0; i < viewport_count; i++) {
1039 auto& viewport = current_state.viewports[i]; 1012 auto& viewport = current_state.viewports[i];
1040 const auto& src = regs.viewports[i]; 1013 const auto& src = regs.viewports[i];
1041 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 1014 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
1042 viewport.x = viewport_rect.left; 1015 viewport.x = viewport_rect.left;
1043 viewport.y = viewport_rect.bottom; 1016 viewport.y = viewport_rect.bottom;
1044 viewport.width = viewport_rect.GetWidth(); 1017 viewport.width = viewport_rect.GetWidth();
@@ -1053,7 +1026,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1053void RasterizerOpenGL::SyncClipEnabled( 1026void RasterizerOpenGL::SyncClipEnabled(
1054 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { 1027 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
1055 1028
1056 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1029 const auto& regs = system.GPU().Maxwell3D().regs;
1057 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ 1030 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
1058 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, 1031 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
1059 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, 1032 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
@@ -1070,7 +1043,7 @@ void RasterizerOpenGL::SyncClipCoef() {
1070} 1043}
1071 1044
1072void RasterizerOpenGL::SyncCullMode() { 1045void RasterizerOpenGL::SyncCullMode() {
1073 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1046 const auto& regs = system.GPU().Maxwell3D().regs;
1074 1047
1075 state.cull.enabled = regs.cull.enabled != 0; 1048 state.cull.enabled = regs.cull.enabled != 0;
1076 1049
@@ -1094,14 +1067,14 @@ void RasterizerOpenGL::SyncCullMode() {
1094} 1067}
1095 1068
1096void RasterizerOpenGL::SyncPrimitiveRestart() { 1069void RasterizerOpenGL::SyncPrimitiveRestart() {
1097 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1070 const auto& regs = system.GPU().Maxwell3D().regs;
1098 1071
1099 state.primitive_restart.enabled = regs.primitive_restart.enabled; 1072 state.primitive_restart.enabled = regs.primitive_restart.enabled;
1100 state.primitive_restart.index = regs.primitive_restart.index; 1073 state.primitive_restart.index = regs.primitive_restart.index;
1101} 1074}
1102 1075
1103void RasterizerOpenGL::SyncDepthTestState() { 1076void RasterizerOpenGL::SyncDepthTestState() {
1104 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1077 const auto& regs = system.GPU().Maxwell3D().regs;
1105 1078
1106 state.depth.test_enabled = regs.depth_test_enable != 0; 1079 state.depth.test_enabled = regs.depth_test_enable != 0;
1107 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1080 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
@@ -1113,7 +1086,7 @@ void RasterizerOpenGL::SyncDepthTestState() {
1113} 1086}
1114 1087
1115void RasterizerOpenGL::SyncStencilTestState() { 1088void RasterizerOpenGL::SyncStencilTestState() {
1116 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1089 const auto& regs = system.GPU().Maxwell3D().regs;
1117 state.stencil.test_enabled = regs.stencil_enable != 0; 1090 state.stencil.test_enabled = regs.stencil_enable != 0;
1118 1091
1119 if (!regs.stencil_enable) { 1092 if (!regs.stencil_enable) {
@@ -1147,7 +1120,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
1147} 1120}
1148 1121
1149void RasterizerOpenGL::SyncColorMask() { 1122void RasterizerOpenGL::SyncColorMask() {
1150 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1123 const auto& regs = system.GPU().Maxwell3D().regs;
1151 const std::size_t count = 1124 const std::size_t count =
1152 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1125 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
1153 for (std::size_t i = 0; i < count; i++) { 1126 for (std::size_t i = 0; i < count; i++) {
@@ -1161,18 +1134,18 @@ void RasterizerOpenGL::SyncColorMask() {
1161} 1134}
1162 1135
1163void RasterizerOpenGL::SyncMultiSampleState() { 1136void RasterizerOpenGL::SyncMultiSampleState() {
1164 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1137 const auto& regs = system.GPU().Maxwell3D().regs;
1165 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; 1138 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
1166 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; 1139 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
1167} 1140}
1168 1141
1169void RasterizerOpenGL::SyncFragmentColorClampState() { 1142void RasterizerOpenGL::SyncFragmentColorClampState() {
1170 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1143 const auto& regs = system.GPU().Maxwell3D().regs;
1171 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; 1144 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
1172} 1145}
1173 1146
1174void RasterizerOpenGL::SyncBlendState() { 1147void RasterizerOpenGL::SyncBlendState() {
1175 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1148 const auto& regs = system.GPU().Maxwell3D().regs;
1176 1149
1177 state.blend_color.red = regs.blend_color.r; 1150 state.blend_color.red = regs.blend_color.r;
1178 state.blend_color.green = regs.blend_color.g; 1151 state.blend_color.green = regs.blend_color.g;
@@ -1214,7 +1187,7 @@ void RasterizerOpenGL::SyncBlendState() {
1214} 1187}
1215 1188
1216void RasterizerOpenGL::SyncLogicOpState() { 1189void RasterizerOpenGL::SyncLogicOpState() {
1217 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1190 const auto& regs = system.GPU().Maxwell3D().regs;
1218 1191
1219 state.logic_op.enabled = regs.logic_op.enable != 0; 1192 state.logic_op.enabled = regs.logic_op.enable != 0;
1220 1193
@@ -1228,7 +1201,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
1228} 1201}
1229 1202
1230void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1203void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1231 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1204 const auto& regs = system.GPU().Maxwell3D().regs;
1232 const bool geometry_shaders_enabled = 1205 const bool geometry_shaders_enabled =
1233 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1206 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1234 const std::size_t viewport_count = 1207 const std::size_t viewport_count =
@@ -1250,21 +1223,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1250} 1223}
1251 1224
1252void RasterizerOpenGL::SyncTransformFeedback() { 1225void RasterizerOpenGL::SyncTransformFeedback() {
1253 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1226 const auto& regs = system.GPU().Maxwell3D().regs;
1254 1227 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1255 if (regs.tfb_enabled != 0) {
1256 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1257 UNREACHABLE();
1258 }
1259} 1228}
1260 1229
1261void RasterizerOpenGL::SyncPointState() { 1230void RasterizerOpenGL::SyncPointState() {
1262 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1231 const auto& regs = system.GPU().Maxwell3D().regs;
1263 state.point.size = regs.point_size; 1232 state.point.size = regs.point_size;
1264} 1233}
1265 1234
1266void RasterizerOpenGL::SyncPolygonOffset() { 1235void RasterizerOpenGL::SyncPolygonOffset() {
1267 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1236 const auto& regs = system.GPU().Maxwell3D().regs;
1268 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1237 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1269 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1238 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1270 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1239 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
@@ -1274,13 +1243,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1274} 1243}
1275 1244
1276void RasterizerOpenGL::CheckAlphaTests() { 1245void RasterizerOpenGL::CheckAlphaTests() {
1277 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1246 const auto& regs = system.GPU().Maxwell3D().regs;
1278 1247 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1279 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1248 "Alpha Testing is enabled with more than one rendertarget");
1280 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1281 "this behavior is undefined.");
1282 UNREACHABLE();
1283 }
1284} 1249}
1285 1250
1286} // namespace OpenGL 1251} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f3b607f4d..4de565321 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -50,22 +50,23 @@ struct FramebufferCacheKey;
50 50
51class RasterizerOpenGL : public VideoCore::RasterizerInterface { 51class RasterizerOpenGL : public VideoCore::RasterizerInterface {
52public: 52public:
53 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, 53 explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
54 ScreenInfo& info);
55 ~RasterizerOpenGL() override; 54 ~RasterizerOpenGL() override;
56 55
57 void DrawArrays() override; 56 void DrawArrays() override;
58 void Clear() override; 57 void Clear() override;
59 void FlushAll() override; 58 void FlushAll() override;
60 void FlushRegion(VAddr addr, u64 size) override; 59 void FlushRegion(CacheAddr addr, u64 size) override;
61 void InvalidateRegion(VAddr addr, u64 size) override; 60 void InvalidateRegion(CacheAddr addr, u64 size) override;
62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 61 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 62 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
64 const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; 63 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
64 const Common::Rectangle<u32>& src_rect,
65 const Common::Rectangle<u32>& dst_rect) override;
65 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 66 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
66 u32 pixel_stride) override; 67 u32 pixel_stride) override;
67 bool AccelerateDrawBatch(bool is_indexed) override; 68 bool AccelerateDrawBatch(bool is_indexed) override;
68 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; 69 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
69 void LoadDiskResources(const std::atomic_bool& stop_loading, 70 void LoadDiskResources(const std::atomic_bool& stop_loading,
70 const VideoCore::DiskResourceLoadCallback& callback) override; 71 const VideoCore::DiskResourceLoadCallback& callback) override;
71 72
@@ -92,11 +93,12 @@ private:
92 private: 93 private:
93 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; 94 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
94 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; 95 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
95 Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; 96 Tegra::Texture::TextureMipmapFilter mipmap_filter =
97 Tegra::Texture::TextureMipmapFilter::None;
96 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; 98 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
97 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; 99 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
98 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; 100 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
99 bool uses_depth_compare = false; 101 bool use_depth_compare = false;
100 Tegra::Texture::DepthCompareFunc depth_compare_func = 102 Tegra::Texture::DepthCompareFunc depth_compare_func =
101 Tegra::Texture::DepthCompareFunc::Always; 103 Tegra::Texture::DepthCompareFunc::Always;
102 GLvec4 border_color = {}; 104 GLvec4 border_color = {};
@@ -211,7 +213,7 @@ private:
211 ShaderCacheOpenGL shader_cache; 213 ShaderCacheOpenGL shader_cache;
212 GlobalRegionCacheOpenGL global_cache; 214 GlobalRegionCacheOpenGL global_cache;
213 215
214 Core::Frontend::EmuWindow& emu_window; 216 Core::System& system;
215 217
216 ScreenInfo& screen_info; 218 ScreenInfo& screen_info;
217 219
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index a79eee03e..aba6ce731 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -20,7 +21,7 @@
20#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
21#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25 26
26namespace OpenGL { 27namespace OpenGL {
@@ -54,12 +55,11 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
54 } 55 }
55} 56}
56 57
57void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { 58void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
58 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; 59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
59 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
60 60
61 addr = cpu_addr ? *cpu_addr : 0;
62 gpu_addr = gpu_addr_; 61 gpu_addr = gpu_addr_;
62 host_ptr = memory_manager.GetPointer(gpu_addr_);
63 size_in_bytes = SizeInBytesRaw(); 63 size_in_bytes = SizeInBytesRaw();
64 64
65 if (IsPixelFormatASTC(pixel_format)) { 65 if (IsPixelFormatASTC(pixel_format)) {
@@ -125,6 +125,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
125 125
126 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); 126 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
127 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); 127 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
128 if (!params.is_tiled) {
129 params.pitch = config.tic.Pitch();
130 }
128 params.unaligned_height = config.tic.Height(); 131 params.unaligned_height = config.tic.Height();
129 params.target = SurfaceTargetFromTextureType(config.tic.texture_type); 132 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
130 params.identity = SurfaceClass::Uploaded; 133 params.identity = SurfaceClass::Uploaded;
@@ -191,7 +194,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
191 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; 194 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
192 params.component_type = ComponentTypeFromRenderTarget(config.format); 195 params.component_type = ComponentTypeFromRenderTarget(config.format);
193 params.type = GetFormatType(params.pixel_format); 196 params.type = GetFormatType(params.pixel_format);
194 params.width = config.width; 197 if (params.is_tiled) {
198 params.width = config.width;
199 } else {
200 params.pitch = config.width;
201 const u32 bpp = params.GetFormatBpp() / 8;
202 params.width = params.pitch / bpp;
203 }
195 params.height = config.height; 204 params.height = config.height;
196 params.unaligned_height = config.height; 205 params.unaligned_height = config.height;
197 params.target = SurfaceTarget::Texture2D; 206 params.target = SurfaceTarget::Texture2D;
@@ -213,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
213} 222}
214 223
215/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( 224/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
216 u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, 225 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
217 u32 block_width, u32 block_height, u32 block_depth, 226 u32 block_width, u32 block_height, u32 block_depth,
218 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { 227 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
219 SurfaceParams params{}; 228 SurfaceParams params{};
@@ -390,7 +399,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
390 return format; 399 return format;
391} 400}
392 401
393MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 402/// Returns the discrepant array target
403constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
404 switch (target) {
405 case SurfaceTarget::Texture1D:
406 return GL_TEXTURE_1D_ARRAY;
407 case SurfaceTarget::Texture2D:
408 return GL_TEXTURE_2D_ARRAY;
409 case SurfaceTarget::Texture3D:
410 return GL_NONE;
411 case SurfaceTarget::Texture1DArray:
412 return GL_TEXTURE_1D;
413 case SurfaceTarget::Texture2DArray:
414 return GL_TEXTURE_2D;
415 case SurfaceTarget::TextureCubemap:
416 return GL_TEXTURE_CUBE_MAP_ARRAY;
417 case SurfaceTarget::TextureCubeArray:
418 return GL_TEXTURE_CUBE_MAP;
419 }
420 return GL_NONE;
421}
422
423Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
394 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 424 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
395 if (IsPixelFormatASTC(pixel_format)) { 425 if (IsPixelFormatASTC(pixel_format)) {
396 // ASTC formats must stop at the ATSC block size boundary 426 // ASTC formats must stop at the ATSC block size boundary
@@ -414,8 +444,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
414 for (u32 i = 0; i < params.depth; i++) { 444 for (u32 i = 0; i < params.depth; i++) {
415 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 445 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
416 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 446 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
417 params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, 447 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
418 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 448 gl_buffer.data() + offset_gl, params.host_ptr + offset);
419 offset += layer_size; 449 offset += layer_size;
420 offset_gl += gl_size; 450 offset_gl += gl_size;
421 } 451 }
@@ -424,11 +454,12 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
424 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 454 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
425 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 455 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
426 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, 456 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
427 gl_buffer.data(), gl_buffer.size(), params.addr + offset); 457 gl_buffer.data(), params.host_ptr + offset);
428 } 458 }
429} 459}
430 460
431static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { 461void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
462 const Surface& dst_surface) {
432 const auto& src_params{src_surface->GetSurfaceParams()}; 463 const auto& src_params{src_surface->GetSurfaceParams()};
433 const auto& dst_params{dst_surface->GetSurfaceParams()}; 464 const auto& dst_params{dst_surface->GetSurfaceParams()};
434 465
@@ -438,12 +469,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
438 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, 469 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
439 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, 470 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
440 0, 0, width, height, 1); 471 0, 0, width, height, 1);
472
473 dst_surface->MarkAsModified(true, *this);
441} 474}
442 475
443MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); 476MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
444static void CopySurface(const Surface& src_surface, const Surface& dst_surface, 477void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
445 const GLuint copy_pbo_handle, const GLenum src_attachment = 0, 478 const GLuint copy_pbo_handle, const GLenum src_attachment,
446 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) { 479 const GLenum dst_attachment,
480 const std::size_t cubemap_face) {
447 MICROPROFILE_SCOPE(OpenGL_CopySurface); 481 MICROPROFILE_SCOPE(OpenGL_CopySurface);
448 ASSERT_MSG(dst_attachment == 0, "Unimplemented"); 482 ASSERT_MSG(dst_attachment == 0, "Unimplemented");
449 483
@@ -478,9 +512,9 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
478 "reinterpretation but the texture is tiled."); 512 "reinterpretation but the texture is tiled.");
479 } 513 }
480 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; 514 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
481 515 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
482 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, 516 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
483 Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); 517 memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
484 } 518 }
485 519
486 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 520 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -523,17 +557,27 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
523 } 557 }
524 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); 558 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
525 } 559 }
560
561 dst_surface->MarkAsModified(true, *this);
526} 562}
527 563
528CachedSurface::CachedSurface(const SurfaceParams& params) 564CachedSurface::CachedSurface(const SurfaceParams& params)
529 : params(params), gl_target(SurfaceTargetToGL(params.target)), 565 : RasterizerCacheObject{params.host_ptr}, params{params},
530 cached_size_in_bytes(params.size_in_bytes) { 566 gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
567
568 const auto optional_cpu_addr{
569 Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
570 ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
571 cpu_addr = *optional_cpu_addr;
572
531 texture.Create(gl_target); 573 texture.Create(gl_target);
532 574
533 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) 575 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
534 // alternatives. This signals a bug on those functions. 576 // alternatives. This signals a bug on those functions.
535 const auto width = static_cast<GLsizei>(params.MipWidth(0)); 577 const auto width = static_cast<GLsizei>(params.MipWidth(0));
536 const auto height = static_cast<GLsizei>(params.MipHeight(0)); 578 const auto height = static_cast<GLsizei>(params.MipHeight(0));
579 memory_size = params.MemorySize();
580 reinterpreted = false;
537 581
538 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 582 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
539 gl_internal_format = format_tuple.internal_format; 583 gl_internal_format = format_tuple.internal_format;
@@ -564,116 +608,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
564 608
565 ApplyTextureDefaults(texture.handle, params.max_mip_level); 609 ApplyTextureDefaults(texture.handle, params.max_mip_level);
566 610
567 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); 611 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
568
569 // Clamp size to mapped GPU memory region
570 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
571 // R32F render buffer. We do not yet know if this is a game bug or something else, but this
572 // check is necessary to prevent flushing from overwriting unmapped memory.
573
574 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
575 const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
576 if (cached_size_in_bytes > max_size) {
577 LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
578 cached_size_in_bytes = max_size;
579 }
580}
581
582static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
583 union S8Z24 {
584 BitField<0, 24, u32> z24;
585 BitField<24, 8, u32> s8;
586 };
587 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
588
589 union Z24S8 {
590 BitField<0, 8, u32> s8;
591 BitField<8, 24, u32> z24;
592 };
593 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
594
595 S8Z24 s8z24_pixel{};
596 Z24S8 z24s8_pixel{};
597 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
598 for (std::size_t y = 0; y < height; ++y) {
599 for (std::size_t x = 0; x < width; ++x) {
600 const std::size_t offset{bpp * (y * width + x)};
601 if (reverse) {
602 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
603 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
604 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
605 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
606 } else {
607 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
608 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
609 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
610 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
611 }
612 }
613 }
614}
615
616/**
617 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
618 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
619 * typical desktop GPUs.
620 */
621static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
622 u32 width, u32 height, u32 depth) {
623 switch (pixel_format) {
624 case PixelFormat::ASTC_2D_4X4:
625 case PixelFormat::ASTC_2D_8X8:
626 case PixelFormat::ASTC_2D_8X5:
627 case PixelFormat::ASTC_2D_5X4:
628 case PixelFormat::ASTC_2D_5X5:
629 case PixelFormat::ASTC_2D_4X4_SRGB:
630 case PixelFormat::ASTC_2D_8X8_SRGB:
631 case PixelFormat::ASTC_2D_8X5_SRGB:
632 case PixelFormat::ASTC_2D_5X4_SRGB:
633 case PixelFormat::ASTC_2D_5X5_SRGB:
634 case PixelFormat::ASTC_2D_10X8:
635 case PixelFormat::ASTC_2D_10X8_SRGB: {
636 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
637 u32 block_width{};
638 u32 block_height{};
639 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
640 data =
641 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
642 break;
643 }
644 case PixelFormat::S8Z24:
645 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
646 ConvertS8Z24ToZ24S8(data, width, height, false);
647 break;
648 }
649}
650
651/**
652 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
653 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
654 * with typical desktop GPUs.
655 */
656static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
657 u32 width, u32 height) {
658 switch (pixel_format) {
659 case PixelFormat::ASTC_2D_4X4:
660 case PixelFormat::ASTC_2D_8X8:
661 case PixelFormat::ASTC_2D_4X4_SRGB:
662 case PixelFormat::ASTC_2D_8X8_SRGB:
663 case PixelFormat::ASTC_2D_5X5:
664 case PixelFormat::ASTC_2D_5X5_SRGB:
665 case PixelFormat::ASTC_2D_10X8:
666 case PixelFormat::ASTC_2D_10X8_SRGB: {
667 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
668 static_cast<u32>(pixel_format));
669 UNREACHABLE();
670 break;
671 }
672 case PixelFormat::S8Z24:
673 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
674 ConvertS8Z24ToZ24S8(data, width, height, true);
675 break;
676 }
677} 612}
678 613
679MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 614MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -688,13 +623,31 @@ void CachedSurface::LoadGLBuffer() {
688 for (u32 i = 0; i < params.max_mip_level; i++) 623 for (u32 i = 0; i < params.max_mip_level; i++)
689 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); 624 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
690 } else { 625 } else {
691 const auto texture_src_data{Memory::GetPointer(params.addr)}; 626 const u32 bpp = params.GetFormatBpp() / 8;
692 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 627 const u32 copy_size = params.width * bpp;
693 gl_buffer[0].assign(texture_src_data, texture_src_data_end); 628 if (params.pitch == copy_size) {
629 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
630 } else {
631 const u8* start{params.host_ptr};
632 u8* write_to = gl_buffer[0].data();
633 for (u32 h = params.height; h > 0; h--) {
634 std::memcpy(write_to, start, copy_size);
635 start += params.pitch;
636 write_to += copy_size;
637 }
638 }
694 } 639 }
695 for (u32 i = 0; i < params.max_mip_level; i++) { 640 for (u32 i = 0; i < params.max_mip_level; i++) {
696 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 641 const u32 width = params.MipWidth(i);
697 params.MipHeight(i), params.MipDepth(i)); 642 const u32 height = params.MipHeight(i);
643 const u32 depth = params.MipDepth(i);
644 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
645 // Reserve size for RGBA8 conversion
646 constexpr std::size_t rgba_bpp = 4;
647 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
648 }
649 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
650 height, depth, true, true);
698 } 651 }
699} 652}
700 653
@@ -717,17 +670,27 @@ void CachedSurface::FlushGLBuffer() {
717 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 670 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
718 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 671 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
719 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 672 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
720 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 673 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
721 params.height); 674 params.height, params.depth, true, true);
722 const u8* const texture_src_data = Memory::GetPointer(params.addr);
723 ASSERT(texture_src_data);
724 if (params.is_tiled) { 675 if (params.is_tiled) {
725 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 676 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
726 params.block_width, static_cast<u32>(params.target)); 677 params.block_width, static_cast<u32>(params.target));
727 678
728 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); 679 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
729 } else { 680 } else {
730 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 681 const u32 bpp = params.GetFormatBpp() / 8;
682 const u32 copy_size = params.width * bpp;
683 if (params.pitch == copy_size) {
684 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
685 } else {
686 u8* start{params.host_ptr};
687 const u8* read_to = gl_buffer[0].data();
688 for (u32 h = params.height; h > 0; h--) {
689 std::memcpy(start, read_to, copy_size);
690 start += params.pitch;
691 read_to += copy_size;
692 }
693 }
731 } 694 }
732} 695}
733 696
@@ -843,20 +806,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
843 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 806 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
844} 807}
845 808
846void CachedSurface::EnsureTextureView() { 809void CachedSurface::EnsureTextureDiscrepantView() {
847 if (texture_view.handle != 0) 810 if (discrepant_view.handle != 0)
848 return; 811 return;
849 812
850 const GLenum target{TargetLayer()}; 813 const GLenum target{GetArrayDiscrepantTarget(params.target)};
814 ASSERT(target != GL_NONE);
815
851 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 816 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
852 constexpr GLuint min_layer = 0; 817 constexpr GLuint min_layer = 0;
853 constexpr GLuint min_level = 0; 818 constexpr GLuint min_level = 0;
854 819
855 glGenTextures(1, &texture_view.handle); 820 glGenTextures(1, &discrepant_view.handle);
856 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0, 821 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
857 params.max_mip_level, 0, 1); 822 params.max_mip_level, min_layer, num_layers);
858 ApplyTextureDefaults(texture_view.handle, params.max_mip_level); 823 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
859 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, 824 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
860 reinterpret_cast<const GLint*>(swizzle.data())); 825 reinterpret_cast<const GLint*>(swizzle.data()));
861} 826}
862 827
@@ -882,8 +847,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
882 swizzle = {new_x, new_y, new_z, new_w}; 847 swizzle = {new_x, new_y, new_z, new_w};
883 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); 848 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
884 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 849 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
885 if (texture_view.handle != 0) { 850 if (discrepant_view.handle != 0) {
886 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 851 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
887 } 852 }
888} 853}
889 854
@@ -924,42 +889,45 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
924 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 889 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
925 const auto& regs{gpu.regs}; 890 const auto& regs{gpu.regs};
926 891
927 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 892 if (!gpu.dirty_flags.color_buffer[index]) {
928 return last_color_buffers[index]; 893 return current_color_buffers[index];
929 } 894 }
930 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 895 gpu.dirty_flags.color_buffer.reset(index);
931 896
932 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 897 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
933 898
934 if (index >= regs.rt_control.count) { 899 if (index >= regs.rt_control.count) {
935 return last_color_buffers[index] = {}; 900 return current_color_buffers[index] = {};
936 } 901 }
937 902
938 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 903 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
939 return last_color_buffers[index] = {}; 904 return current_color_buffers[index] = {};
940 } 905 }
941 906
942 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 907 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
943 908
944 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 909 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
945} 910}
946 911
947void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 912void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
948 surface->LoadGLBuffer(); 913 surface->LoadGLBuffer();
949 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 914 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
950 surface->MarkAsModified(false, *this); 915 surface->MarkAsModified(false, *this);
916 surface->MarkForReload(false);
951} 917}
952 918
953Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 919Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
954 if (params.addr == 0 || params.height * params.width == 0) { 920 if (!params.IsValid()) {
955 return {}; 921 return {};
956 } 922 }
957 923
958 // Look up surface in the cache based on address 924 // Look up surface in the cache based on address
959 Surface surface{TryGet(params.addr)}; 925 Surface surface{TryGet(params.host_ptr)};
960 if (surface) { 926 if (surface) {
961 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 927 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
962 // Use the cached surface as-is 928 // Use the cached surface as-is unless it's not synced with memory
929 if (surface->MustReload())
930 LoadSurface(surface);
963 return surface; 931 return surface;
964 } else if (preserve_contents) { 932 } else if (preserve_contents) {
965 // If surface parameters changed and we care about keeping the previous data, recreate 933 // If surface parameters changed and we care about keeping the previous data, recreate
@@ -967,6 +935,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
967 Surface new_surface{RecreateSurface(surface, params)}; 935 Surface new_surface{RecreateSurface(surface, params)};
968 Unregister(surface); 936 Unregister(surface);
969 Register(new_surface); 937 Register(new_surface);
938 if (new_surface->IsUploaded()) {
939 RegisterReinterpretSurface(new_surface);
940 }
970 return new_surface; 941 return new_surface;
971 } else { 942 } else {
972 // Delete the old surface before creating a new one to prevent collisions. 943 // Delete the old surface before creating a new one to prevent collisions.
@@ -1000,14 +971,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1000 const Surface& dst_surface) { 971 const Surface& dst_surface) {
1001 const auto& init_params{src_surface->GetSurfaceParams()}; 972 const auto& init_params{src_surface->GetSurfaceParams()};
1002 const auto& dst_params{dst_surface->GetSurfaceParams()}; 973 const auto& dst_params{dst_surface->GetSurfaceParams()};
1003 VAddr address = init_params.addr; 974 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
1004 const std::size_t layer_size = dst_params.LayerMemorySize(); 975 GPUVAddr address{init_params.gpu_addr};
976 const std::size_t layer_size{dst_params.LayerMemorySize()};
1005 for (u32 layer = 0; layer < dst_params.depth; layer++) { 977 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1006 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { 978 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1007 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); 979 const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
1008 const Surface& copy = TryGet(sub_address); 980 const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
1009 if (!copy) 981 if (!copy) {
1010 continue; 982 continue;
983 }
1011 const auto& src_params{copy->GetSurfaceParams()}; 984 const auto& src_params{copy->GetSurfaceParams()};
1012 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; 985 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
1013 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; 986 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1019,26 +992,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1019 } 992 }
1020 address += layer_size; 993 address += layer_size;
1021 } 994 }
995
996 dst_surface->MarkAsModified(true, *this);
997}
998
999static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1000 const Common::Rectangle<u32>& src_rect,
1001 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1002 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1003 std::size_t cubemap_face = 0) {
1004
1005 const auto& src_params{src_surface->GetSurfaceParams()};
1006 const auto& dst_params{dst_surface->GetSurfaceParams()};
1007
1008 OpenGLState prev_state{OpenGLState::GetCurState()};
1009 SCOPE_EXIT({ prev_state.Apply(); });
1010
1011 OpenGLState state;
1012 state.draw.read_framebuffer = read_fb_handle;
1013 state.draw.draw_framebuffer = draw_fb_handle;
1014 state.Apply();
1015
1016 u32 buffers{};
1017
1018 if (src_params.type == SurfaceType::ColorTexture) {
1019 switch (src_params.target) {
1020 case SurfaceTarget::Texture2D:
1021 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1022 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1023 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1024 0, 0);
1025 break;
1026 case SurfaceTarget::TextureCubemap:
1027 glFramebufferTexture2D(
1028 GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1029 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1030 src_surface->Texture().handle, 0);
1031 glFramebufferTexture2D(
1032 GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1033 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1034 break;
1035 case SurfaceTarget::Texture2DArray:
1036 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1037 src_surface->Texture().handle, 0, 0);
1038 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1039 break;
1040 case SurfaceTarget::Texture3D:
1041 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1042 SurfaceTargetToGL(src_params.target),
1043 src_surface->Texture().handle, 0, 0);
1044 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1045 SurfaceTargetToGL(src_params.target), 0, 0, 0);
1046 break;
1047 default:
1048 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1049 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1050 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1051 0, 0);
1052 break;
1053 }
1054
1055 switch (dst_params.target) {
1056 case SurfaceTarget::Texture2D:
1057 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1058 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1059 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1060 0, 0);
1061 break;
1062 case SurfaceTarget::TextureCubemap:
1063 glFramebufferTexture2D(
1064 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1065 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1066 dst_surface->Texture().handle, 0);
1067 glFramebufferTexture2D(
1068 GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1069 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1070 break;
1071 case SurfaceTarget::Texture2DArray:
1072 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1073 dst_surface->Texture().handle, 0, 0);
1074 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1075 break;
1076
1077 case SurfaceTarget::Texture3D:
1078 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1079 SurfaceTargetToGL(dst_params.target),
1080 dst_surface->Texture().handle, 0, 0);
1081 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1082 SurfaceTargetToGL(dst_params.target), 0, 0, 0);
1083 break;
1084 default:
1085 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1086 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1087 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1088 0, 0);
1089 break;
1090 }
1091
1092 buffers = GL_COLOR_BUFFER_BIT;
1093 } else if (src_params.type == SurfaceType::Depth) {
1094 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1095 GL_TEXTURE_2D, 0, 0);
1096 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1097 src_surface->Texture().handle, 0);
1098 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1099
1100 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1101 GL_TEXTURE_2D, 0, 0);
1102 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1103 dst_surface->Texture().handle, 0);
1104 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1105
1106 buffers = GL_DEPTH_BUFFER_BIT;
1107 } else if (src_params.type == SurfaceType::DepthStencil) {
1108 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1109 GL_TEXTURE_2D, 0, 0);
1110 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1111 src_surface->Texture().handle, 0);
1112
1113 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1114 GL_TEXTURE_2D, 0, 0);
1115 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1116 dst_surface->Texture().handle, 0);
1117
1118 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1119 }
1120
1121 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
1122 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
1123 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
1124
1125 return true;
1022} 1126}
1023 1127
1024void RasterizerCacheOpenGL::FermiCopySurface( 1128void RasterizerCacheOpenGL::FermiCopySurface(
1025 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1129 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1026 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { 1130 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1131 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1027 1132
1028 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 1133 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1029 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 1134 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
1030 1135
1031 ASSERT(src_params.width == dst_params.width);
1032 ASSERT(src_params.height == dst_params.height);
1033 ASSERT(src_params.pixel_format == dst_params.pixel_format); 1136 ASSERT(src_params.pixel_format == dst_params.pixel_format);
1034 ASSERT(src_params.block_height == dst_params.block_height); 1137 ASSERT(src_params.block_height == dst_params.block_height);
1035 ASSERT(src_params.is_tiled == dst_params.is_tiled); 1138 ASSERT(src_params.is_tiled == dst_params.is_tiled);
1036 ASSERT(src_params.depth == dst_params.depth); 1139 ASSERT(src_params.depth == dst_params.depth);
1037 ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
1038 ASSERT(src_params.target == dst_params.target); 1140 ASSERT(src_params.target == dst_params.target);
1039 ASSERT(src_params.rt.index == dst_params.rt.index); 1141 ASSERT(src_params.rt.index == dst_params.rt.index);
1040 1142
1041 FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); 1143 auto src_surface = GetSurface(src_params, true);
1144 auto dst_surface = GetSurface(dst_params, true);
1145
1146 BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
1147 draw_framebuffer.handle);
1148
1149 dst_surface->MarkAsModified(true, *this);
1042} 1150}
1043 1151
1044void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, 1152void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
@@ -1047,7 +1155,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1047 const auto& dst_params{dst_surface->GetSurfaceParams()}; 1155 const auto& dst_params{dst_surface->GetSurfaceParams()};
1048 1156
1049 // Flush enough memory for both the source and destination surface 1157 // Flush enough memory for both the source and destination surface
1050 FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); 1158 FlushRegion(ToCacheAddr(src_params.host_ptr),
1159 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1051 1160
1052 LoadSurface(dst_surface); 1161 LoadSurface(dst_surface);
1053} 1162}
@@ -1084,7 +1193,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1084 case SurfaceTarget::TextureCubemap: 1193 case SurfaceTarget::TextureCubemap:
1085 case SurfaceTarget::Texture2DArray: 1194 case SurfaceTarget::Texture2DArray:
1086 case SurfaceTarget::TextureCubeArray: 1195 case SurfaceTarget::TextureCubeArray:
1087 FastLayeredCopySurface(old_surface, new_surface); 1196 if (old_params.pixel_format == new_params.pixel_format)
1197 FastLayeredCopySurface(old_surface, new_surface);
1198 else {
1199 AccurateCopySurface(old_surface, new_surface);
1200 }
1088 break; 1201 break;
1089 default: 1202 default:
1090 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 1203 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1095,8 +1208,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1095 return new_surface; 1208 return new_surface;
1096} 1209}
1097 1210
1098Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { 1211Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1099 return TryGet(addr); 1212 return TryGet(host_ptr);
1100} 1213}
1101 1214
1102void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { 1215void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1113,4 +1226,108 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1113 return {}; 1226 return {};
1114} 1227}
1115 1228
1229static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1230 u32 height) {
1231 for (u32 i = 0; i < params.max_mip_level; i++) {
1232 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1233 return {i};
1234 }
1235 }
1236 return {};
1237}
1238
1239static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
1240 const std::size_t size{params.LayerMemorySize()};
1241 GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
1242 for (u32 i = 0; i < params.depth; i++) {
1243 if (start == addr) {
1244 return {i};
1245 }
1246 start += size;
1247 }
1248 return {};
1249}
1250
1251static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1252 const Surface blitted_surface) {
1253 const auto& dst_params = blitted_surface->GetSurfaceParams();
1254 const auto& src_params = render_surface->GetSurfaceParams();
1255 const std::size_t src_memory_size = src_params.size_in_bytes;
1256 const std::optional<u32> level =
1257 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1258 if (level.has_value()) {
1259 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1260 src_params.height == dst_params.MipHeight(*level) &&
1261 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1262 const std::optional<u32> slot =
1263 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
1264 if (slot.has_value()) {
1265 glCopyImageSubData(render_surface->Texture().handle,
1266 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1267 blitted_surface->Texture().handle,
1268 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1269 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1270 blitted_surface->MarkAsModified(true, cache);
1271 return true;
1272 }
1273 }
1274 }
1275 return false;
1276}
1277
1278static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1279 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1280 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1281 if (bound2 > bound1)
1282 return true;
1283 const auto& dst_params = blitted_surface->GetSurfaceParams();
1284 const auto& src_params = render_surface->GetSurfaceParams();
1285 return (dst_params.component_type != src_params.component_type);
1286}
1287
1288static bool IsReinterpretInvalidSecond(const Surface render_surface,
1289 const Surface blitted_surface) {
1290 const auto& dst_params = blitted_surface->GetSurfaceParams();
1291 const auto& src_params = render_surface->GetSurfaceParams();
1292 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1293}
1294
1295bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1296 Surface intersect) {
1297 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1298 Unregister(intersect);
1299 return false;
1300 }
1301 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1302 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1303 Unregister(intersect);
1304 return false;
1305 }
1306 FlushObject(intersect);
1307 FlushObject(triggering_surface);
1308 intersect->MarkForReload(true);
1309 }
1310 return true;
1311}
1312
1313void RasterizerCacheOpenGL::SignalPreDrawCall() {
1314 if (texception && GLAD_GL_ARB_texture_barrier) {
1315 glTextureBarrier();
1316 }
1317 texception = false;
1318}
1319
1320void RasterizerCacheOpenGL::SignalPostDrawCall() {
1321 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1322 if (current_color_buffers[i] != nullptr) {
1323 Surface intersect =
1324 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1325 if (intersect != nullptr) {
1326 PartialReinterpretSurface(current_color_buffers[i], intersect);
1327 texception = true;
1328 }
1329 }
1330 }
1331}
1332
1116} // namespace OpenGL 1333} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 490b8252e..e8073579f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,6 +8,7 @@
8#include <map> 8#include <map>
9#include <memory> 9#include <memory>
10#include <string> 10#include <string>
11#include <unordered_set>
11#include <vector> 12#include <vector>
12 13
13#include "common/alignment.h" 14#include "common/alignment.h"
@@ -27,15 +28,15 @@ namespace OpenGL {
27 28
28class CachedSurface; 29class CachedSurface;
29using Surface = std::shared_ptr<CachedSurface>; 30using Surface = std::shared_ptr<CachedSurface>;
30using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
31 32
32using SurfaceTarget = VideoCore::Surface::SurfaceTarget; 33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
33using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
34using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
35using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
36 38
37struct SurfaceParams { 39struct SurfaceParams {
38
39 enum class SurfaceClass { 40 enum class SurfaceClass {
40 Uploaded, 41 Uploaded,
41 RenderTarget, 42 RenderTarget,
@@ -71,7 +72,7 @@ struct SurfaceParams {
71 } 72 }
72 73
73 /// Returns the rectangle corresponding to this surface 74 /// Returns the rectangle corresponding to this surface
74 MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; 75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
75 76
76 /// Returns the total size of this surface in bytes, adjusted for compression 77 /// Returns the total size of this surface in bytes, adjusted for compression
77 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { 78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -108,6 +109,11 @@ struct SurfaceParams {
108 return size; 109 return size;
109 } 110 }
110 111
112 /// Returns true if the parameters constitute a valid rasterizer surface.
113 bool IsValid() const {
114 return gpu_addr && host_ptr && height && width;
115 }
116
111 /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including 117 /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
112 /// mipmaps. 118 /// mipmaps.
113 std::size_t LayerMemorySize() const { 119 std::size_t LayerMemorySize() const {
@@ -140,10 +146,18 @@ struct SurfaceParams {
140 return offset; 146 return offset;
141 } 147 }
142 148
149 std::size_t GetMipmapSingleSize(u32 mip_level) const {
150 return InnerMipmapMemorySize(mip_level, false, is_layered);
151 }
152
143 u32 MipWidth(u32 mip_level) const { 153 u32 MipWidth(u32 mip_level) const {
144 return std::max(1U, width >> mip_level); 154 return std::max(1U, width >> mip_level);
145 } 155 }
146 156
157 u32 MipWidthGobAligned(u32 mip_level) const {
158 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
159 }
160
147 u32 MipHeight(u32 mip_level) const { 161 u32 MipHeight(u32 mip_level) const {
148 return std::max(1U, height >> mip_level); 162 return std::max(1U, height >> mip_level);
149 } 163 }
@@ -168,20 +182,27 @@ struct SurfaceParams {
168 } 182 }
169 183
170 u32 MipBlockDepth(u32 mip_level) const { 184 u32 MipBlockDepth(u32 mip_level) const {
171 if (mip_level == 0) 185 if (mip_level == 0) {
172 return block_depth; 186 return block_depth;
173 if (is_layered) 187 }
188
189 if (is_layered) {
174 return 1; 190 return 1;
175 u32 depth = MipDepth(mip_level); 191 }
192
193 const u32 mip_depth = MipDepth(mip_level);
176 u32 bd = 32; 194 u32 bd = 32;
177 while (bd > 1 && depth * 2 <= bd) { 195 while (bd > 1 && mip_depth * 2 <= bd) {
178 bd >>= 1; 196 bd >>= 1;
179 } 197 }
198
180 if (bd == 32) { 199 if (bd == 32) {
181 u32 bh = MipBlockHeight(mip_level); 200 const u32 bh = MipBlockHeight(mip_level);
182 if (bh >= 4) 201 if (bh >= 4) {
183 return 16; 202 return 16;
203 }
184 } 204 }
205
185 return bd; 206 return bd;
186 } 207 }
187 208
@@ -194,7 +215,7 @@ struct SurfaceParams {
194 215
195 /// Creates SurfaceParams for a depth buffer configuration 216 /// Creates SurfaceParams for a depth buffer configuration
196 static SurfaceParams CreateForDepthBuffer( 217 static SurfaceParams CreateForDepthBuffer(
197 u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, 218 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
198 u32 block_width, u32 block_height, u32 block_depth, 219 u32 block_width, u32 block_height, u32 block_depth,
199 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); 220 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
200 221
@@ -216,7 +237,7 @@ struct SurfaceParams {
216 } 237 }
217 238
218 /// Initializes parameters for caching, should be called after everything has been initialized 239 /// Initializes parameters for caching, should be called after everything has been initialized
219 void InitCacheParameters(Tegra::GPUVAddr gpu_addr); 240 void InitCacheParameters(GPUVAddr gpu_addr);
220 241
221 std::string TargetName() const { 242 std::string TargetName() const {
222 switch (target) { 243 switch (target) {
@@ -272,6 +293,7 @@ struct SurfaceParams {
272 u32 height; 293 u32 height;
273 u32 depth; 294 u32 depth;
274 u32 unaligned_height; 295 u32 unaligned_height;
296 u32 pitch;
275 SurfaceTarget target; 297 SurfaceTarget target;
276 SurfaceClass identity; 298 SurfaceClass identity;
277 u32 max_mip_level; 299 u32 max_mip_level;
@@ -279,8 +301,8 @@ struct SurfaceParams {
279 bool is_array; 301 bool is_array;
280 bool srgb_conversion; 302 bool srgb_conversion;
281 // Parameters used for caching 303 // Parameters used for caching
282 VAddr addr; 304 u8* host_ptr;
283 Tegra::GPUVAddr gpu_addr; 305 GPUVAddr gpu_addr;
284 std::size_t size_in_bytes; 306 std::size_t size_in_bytes;
285 std::size_t size_in_bytes_gl; 307 std::size_t size_in_bytes_gl;
286 308
@@ -328,16 +350,20 @@ class RasterizerOpenGL;
328 350
329class CachedSurface final : public RasterizerCacheObject { 351class CachedSurface final : public RasterizerCacheObject {
330public: 352public:
331 CachedSurface(const SurfaceParams& params); 353 explicit CachedSurface(const SurfaceParams& params);
332 354
333 VAddr GetAddr() const override { 355 VAddr GetCpuAddr() const override {
334 return params.addr; 356 return cpu_addr;
335 } 357 }
336 358
337 std::size_t GetSizeInBytes() const override { 359 std::size_t GetSizeInBytes() const override {
338 return cached_size_in_bytes; 360 return cached_size_in_bytes;
339 } 361 }
340 362
363 std::size_t GetMemorySize() const {
364 return memory_size;
365 }
366
341 void Flush() override { 367 void Flush() override {
342 FlushGLBuffer(); 368 FlushGLBuffer();
343 } 369 }
@@ -346,31 +372,19 @@ public:
346 return texture; 372 return texture;
347 } 373 }
348 374
349 const OGLTexture& TextureLayer() { 375 const OGLTexture& Texture(bool as_array) {
350 if (params.is_array) { 376 if (params.is_array == as_array) {
351 return Texture(); 377 return texture;
378 } else {
379 EnsureTextureDiscrepantView();
380 return discrepant_view;
352 } 381 }
353 EnsureTextureView();
354 return texture_view;
355 } 382 }
356 383
357 GLenum Target() const { 384 GLenum Target() const {
358 return gl_target; 385 return gl_target;
359 } 386 }
360 387
361 GLenum TargetLayer() const {
362 using VideoCore::Surface::SurfaceTarget;
363 switch (params.target) {
364 case SurfaceTarget::Texture1D:
365 return GL_TEXTURE_1D_ARRAY;
366 case SurfaceTarget::Texture2D:
367 return GL_TEXTURE_2D_ARRAY;
368 case SurfaceTarget::TextureCubemap:
369 return GL_TEXTURE_CUBE_MAP_ARRAY;
370 }
371 return Target();
372 }
373
374 const SurfaceParams& GetSurfaceParams() const { 388 const SurfaceParams& GetSurfaceParams() const {
375 return params; 389 return params;
376 } 390 }
@@ -387,19 +401,43 @@ public:
387 Tegra::Texture::SwizzleSource swizzle_z, 401 Tegra::Texture::SwizzleSource swizzle_z,
388 Tegra::Texture::SwizzleSource swizzle_w); 402 Tegra::Texture::SwizzleSource swizzle_w);
389 403
404 void MarkReinterpreted() {
405 reinterpreted = true;
406 }
407
408 bool IsReinterpreted() const {
409 return reinterpreted;
410 }
411
412 void MarkForReload(bool reload) {
413 must_reload = reload;
414 }
415
416 bool MustReload() const {
417 return must_reload;
418 }
419
420 bool IsUploaded() const {
421 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
422 }
423
390private: 424private:
391 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 425 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
392 426
393 void EnsureTextureView(); 427 void EnsureTextureDiscrepantView();
394 428
395 OGLTexture texture; 429 OGLTexture texture;
396 OGLTexture texture_view; 430 OGLTexture discrepant_view;
397 std::vector<std::vector<u8>> gl_buffer; 431 std::vector<std::vector<u8>> gl_buffer;
398 SurfaceParams params{}; 432 SurfaceParams params{};
399 GLenum gl_target{}; 433 GLenum gl_target{};
400 GLenum gl_internal_format{}; 434 GLenum gl_internal_format{};
401 std::size_t cached_size_in_bytes{}; 435 std::size_t cached_size_in_bytes{};
402 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; 436 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
437 std::size_t memory_size;
438 bool reinterpreted = false;
439 bool must_reload = false;
440 VAddr cpu_addr{};
403}; 441};
404 442
405class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 443class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -417,11 +455,16 @@ public:
417 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); 455 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
418 456
419 /// Tries to find a framebuffer using on the provided CPU address 457 /// Tries to find a framebuffer using on the provided CPU address
420 Surface TryFindFramebufferSurface(VAddr addr) const; 458 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
421 459
422 /// Copies the contents of one surface to another 460 /// Copies the contents of one surface to another
423 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 461 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
424 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); 462 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
463 const Common::Rectangle<u32>& src_rect,
464 const Common::Rectangle<u32>& dst_rect);
465
466 void SignalPreDrawCall();
467 void SignalPostDrawCall();
425 468
426private: 469private:
427 void LoadSurface(const Surface& surface); 470 void LoadSurface(const Surface& surface);
@@ -439,9 +482,17 @@ private:
439 /// Tries to get a reserved surface for the specified parameters 482 /// Tries to get a reserved surface for the specified parameters
440 Surface TryGetReservedSurface(const SurfaceParams& params); 483 Surface TryGetReservedSurface(const SurfaceParams& params);
441 484
485 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
486 // returns true if the reinterpret was successful, false in case it was not.
487 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
488
442 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 489 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
443 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 490 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
444 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 491 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
492 void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
493 void CopySurface(const Surface& src_surface, const Surface& dst_surface,
494 const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
495 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
445 496
446 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 497 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
447 /// previously been used. This is to prevent surfaces from being constantly created and 498 /// previously been used. This is to prevent surfaces from being constantly created and
@@ -451,12 +502,54 @@ private:
451 OGLFramebuffer read_framebuffer; 502 OGLFramebuffer read_framebuffer;
452 OGLFramebuffer draw_framebuffer; 503 OGLFramebuffer draw_framebuffer;
453 504
505 bool texception = false;
506
454 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 507 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
455 /// using the new format. 508 /// using the new format.
456 OGLBuffer copy_pbo; 509 OGLBuffer copy_pbo;
457 510
458 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 511 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
512 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
459 Surface last_depth_buffer; 513 Surface last_depth_buffer;
514
515 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
516 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
517
518 static auto GetReinterpretInterval(const Surface& object) {
519 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
520 object->GetCacheAddr() + object->GetMemorySize() - 1);
521 }
522
523 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
524 SurfaceIntervalCache reinterpreted_surfaces;
525
526 void RegisterReinterpretSurface(Surface reinterpret_surface) {
527 auto interval = GetReinterpretInterval(reinterpret_surface);
528 reinterpreted_surfaces.insert({interval, reinterpret_surface});
529 reinterpret_surface->MarkReinterpreted();
530 }
531
532 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
533 const SurfaceInterval interval{addr};
534 for (auto& pair :
535 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
536 return pair.second;
537 }
538 return nullptr;
539 }
540
541 void Register(const Surface& object) override {
542 RasterizerCache<Surface>::Register(object);
543 }
544
545 /// Unregisters an object from the cache
546 void Unregister(const Surface& object) override {
547 if (object->IsReinterpreted()) {
548 auto interval = GetReinterpretInterval(object);
549 reinterpreted_surfaces.erase(interval);
550 }
551 RasterizerCache<Surface>::Unregister(object);
552 }
460}; 553};
461 554
462} // namespace OpenGL 555} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4883e4f62..7030db365 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,13 +6,11 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/hash.h" 7#include "common/hash.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/memory.h"
10#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 10#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_cache.h" 11#include "video_core/renderer_opengl/gl_shader_cache.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h" 12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
14#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 13#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
15#include "video_core/renderer_opengl/gl_shader_manager.h"
16#include "video_core/renderer_opengl/utils.h" 14#include "video_core/renderer_opengl/utils.h"
17#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
18 16
@@ -32,19 +30,16 @@ struct UnspecializedShader {
32namespace { 30namespace {
33 31
34/// Gets the address for the specified shader stage program 32/// Gets the address for the specified shader stage program
35VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 33GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
36 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 34 const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
37 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; 35 const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
38 const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + 36 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
39 shader_config.offset);
40 ASSERT_MSG(address, "Invalid GPU address");
41 return *address;
42} 37}
43 38
44/// Gets the shader program code from memory for the specified address 39/// Gets the shader program code from memory for the specified address
45ProgramCode GetShaderCode(VAddr addr) { 40ProgramCode GetShaderCode(const u8* host_ptr) {
46 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); 41 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
47 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 42 std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
48 return program_code; 43 return program_code;
49} 44}
50 45
@@ -214,12 +209,13 @@ std::set<GLenum> GetSupportedFormats() {
214 209
215} // namespace 210} // namespace
216 211
217CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 212CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
218 ShaderDiskCacheOpenGL& disk_cache, 213 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
219 const PrecompiledPrograms& precompiled_programs, 214 const PrecompiledPrograms& precompiled_programs,
220 ProgramCode&& program_code, ProgramCode&& program_code_b) 215 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
221 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, 216 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
222 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { 217 unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
218 precompiled_programs{precompiled_programs} {
223 219
224 const std::size_t code_size = CalculateProgramSize(program_code); 220 const std::size_t code_size = CalculateProgramSize(program_code);
225 const std::size_t code_size_b = 221 const std::size_t code_size_b =
@@ -243,12 +239,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
243 disk_cache.SaveRaw(raw); 239 disk_cache.SaveRaw(raw);
244} 240}
245 241
246CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 242CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
247 ShaderDiskCacheOpenGL& disk_cache, 243 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
248 const PrecompiledPrograms& precompiled_programs, 244 const PrecompiledPrograms& precompiled_programs,
249 GLShader::ProgramResult result) 245 GLShader::ProgramResult result, u8* host_ptr)
250 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, 246 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
251 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { 247 program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
248 precompiled_programs} {
252 249
253 code = std::move(result.first); 250 code = std::move(result.first);
254 entries = result.second; 251 entries = result.second;
@@ -271,7 +268,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
271 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 268 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
272 } 269 }
273 270
274 LabelGLObject(GL_PROGRAM, program->handle, addr); 271 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
275 } 272 }
276 273
277 handle = program->handle; 274 handle = program->handle;
@@ -323,7 +320,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
323 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 320 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
324 } 321 }
325 322
326 LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); 323 LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
327 324
328 return target_program->handle; 325 return target_program->handle;
329}; 326};
@@ -486,29 +483,32 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
486 return last_shaders[static_cast<u32>(program)]; 483 return last_shaders[static_cast<u32>(program)];
487 } 484 }
488 485
489 const VAddr program_addr{GetShaderAddress(program)}; 486 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
487 const GPUVAddr program_addr{GetShaderAddress(program)};
490 488
491 // Look up shader in the cache based on address 489 // Look up shader in the cache based on address
492 Shader shader{TryGet(program_addr)}; 490 const auto& host_ptr{memory_manager.GetPointer(program_addr)};
491 Shader shader{TryGet(host_ptr)};
493 492
494 if (!shader) { 493 if (!shader) {
495 // No shader found - create a new one 494 // No shader found - create a new one
496 ProgramCode program_code = GetShaderCode(program_addr); 495 ProgramCode program_code{GetShaderCode(host_ptr)};
497 ProgramCode program_code_b; 496 ProgramCode program_code_b;
498 if (program == Maxwell::ShaderProgram::VertexA) { 497 if (program == Maxwell::ShaderProgram::VertexA) {
499 program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); 498 program_code_b = GetShaderCode(
499 memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
500 } 500 }
501 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 501 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
502 502 const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
503 const auto found = precompiled_shaders.find(unique_identifier); 503 const auto found = precompiled_shaders.find(unique_identifier);
504 if (found != precompiled_shaders.end()) { 504 if (found != precompiled_shaders.end()) {
505 shader = 505 shader =
506 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, 506 std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
507 precompiled_programs, found->second); 507 precompiled_programs, found->second, host_ptr);
508 } else { 508 } else {
509 shader = std::make_shared<CachedShader>( 509 shader = std::make_shared<CachedShader>(
510 program_addr, unique_identifier, program, disk_cache, precompiled_programs, 510 cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
511 std::move(program_code), std::move(program_code_b)); 511 std::move(program_code), std::move(program_code_b), host_ptr);
512 } 512 }
513 Register(shader); 513 Register(shader);
514 } 514 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 97eed192f..fd1c85115 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
39 39
40class CachedShader final : public RasterizerCacheObject { 40class CachedShader final : public RasterizerCacheObject {
41public: 41public:
42 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 42 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
43 ShaderDiskCacheOpenGL& disk_cache, 43 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
44 const PrecompiledPrograms& precompiled_programs, 44 const PrecompiledPrograms& precompiled_programs,
45 ProgramCode&& program_code, ProgramCode&& program_code_b); 45 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
46 46
47 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 47 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
48 ShaderDiskCacheOpenGL& disk_cache, 48 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
49 const PrecompiledPrograms& precompiled_programs, 49 const PrecompiledPrograms& precompiled_programs,
50 GLShader::ProgramResult result); 50 GLShader::ProgramResult result, u8* host_ptr);
51 51
52 VAddr GetAddr() const override { 52 VAddr GetCpuAddr() const override {
53 return addr; 53 return cpu_addr;
54 } 54 }
55 55
56 std::size_t GetSizeInBytes() const override { 56 std::size_t GetSizeInBytes() const override {
@@ -91,7 +91,8 @@ private:
91 91
92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; 92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
93 93
94 VAddr addr{}; 94 u8* host_ptr{};
95 VAddr cpu_addr{};
95 u64 unique_identifier{}; 96 u64 unique_identifier{};
96 Maxwell::ShaderProgram program_type{}; 97 Maxwell::ShaderProgram program_type{};
97 ShaderDiskCacheOpenGL& disk_cache; 98 ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 70e124dc4..11d1169f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <array> 5#include <array>
6#include <string> 6#include <string>
7#include <string_view> 7#include <string_view>
8#include <utility>
8#include <variant> 9#include <variant>
10#include <vector>
9 11
10#include <fmt/format.h> 12#include <fmt/format.h>
11 13
@@ -20,6 +22,7 @@
20namespace OpenGL::GLShader { 22namespace OpenGL::GLShader {
21 23
22using Tegra::Shader::Attribute; 24using Tegra::Shader::Attribute;
25using Tegra::Shader::AttributeUse;
23using Tegra::Shader::Header; 26using Tegra::Shader::Header;
24using Tegra::Shader::IpaInterpMode; 27using Tegra::Shader::IpaInterpMode;
25using Tegra::Shader::IpaMode; 28using Tegra::Shader::IpaMode;
@@ -288,34 +291,22 @@ private:
288 code.AddNewLine(); 291 code.AddNewLine();
289 } 292 }
290 293
291 std::string GetInputFlags(const IpaMode& input_mode) { 294 std::string GetInputFlags(AttributeUse attribute) {
292 const IpaSampleMode sample_mode = input_mode.sampling_mode;
293 const IpaInterpMode interp_mode = input_mode.interpolation_mode;
294 std::string out; 295 std::string out;
295 296
296 switch (interp_mode) { 297 switch (attribute) {
297 case IpaInterpMode::Flat: 298 case AttributeUse::Constant:
298 out += "flat "; 299 out += "flat ";
299 break; 300 break;
300 case IpaInterpMode::Linear: 301 case AttributeUse::ScreenLinear:
301 out += "noperspective "; 302 out += "noperspective ";
302 break; 303 break;
303 case IpaInterpMode::Perspective: 304 case AttributeUse::Perspective:
304 // Default, Smooth 305 // Default, Smooth
305 break; 306 break;
306 default: 307 default:
307 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); 308 LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
308 } 309 UNREACHABLE();
309 switch (sample_mode) {
310 case IpaSampleMode::Centroid:
311 // It can be implemented with the "centroid " keyword in GLSL
312 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
313 break;
314 case IpaSampleMode::Default:
315 // Default, n/a
316 break;
317 default:
318 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
319 } 310 }
320 return out; 311 return out;
321 } 312 }
@@ -324,16 +315,11 @@ private:
324 const auto& attributes = ir.GetInputAttributes(); 315 const auto& attributes = ir.GetInputAttributes();
325 for (const auto element : attributes) { 316 for (const auto element : attributes) {
326 const Attribute::Index index = element.first; 317 const Attribute::Index index = element.first;
327 const IpaMode& input_mode = *element.second.begin();
328 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { 318 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
329 // Skip when it's not a generic attribute 319 // Skip when it's not a generic attribute
330 continue; 320 continue;
331 } 321 }
332 322
333 ASSERT(element.second.size() > 0);
334 UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
335 "Multiple input flag modes are not supported in GLSL");
336
337 // TODO(bunnei): Use proper number of elements for these 323 // TODO(bunnei): Use proper number of elements for these
338 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); 324 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
339 if (stage != ShaderStage::Vertex) { 325 if (stage != ShaderStage::Vertex) {
@@ -345,8 +331,14 @@ private:
345 if (stage == ShaderStage::Geometry) { 331 if (stage == ShaderStage::Geometry) {
346 attr = "gs_" + attr + "[]"; 332 attr = "gs_" + attr + "[]";
347 } 333 }
348 code.AddLine("layout (location = " + std::to_string(idx) + ") " + 334 std::string suffix;
349 GetInputFlags(input_mode) + "in vec4 " + attr + ';'); 335 if (stage == ShaderStage::Fragment) {
336 const auto input_mode =
337 header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
338 suffix = GetInputFlags(input_mode);
339 }
340 code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
341 attr + ';');
350 } 342 }
351 if (!attributes.empty()) 343 if (!attributes.empty())
352 code.AddNewLine(); 344 code.AddNewLine();
@@ -616,17 +608,8 @@ private:
616 608
617 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { 609 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
618 std::string value = VisitOperand(operation, operand_index); 610 std::string value = VisitOperand(operation, operand_index);
619
620 switch (type) { 611 switch (type) {
621 case Type::Bool: 612 case Type::HalfFloat: {
622 case Type::Bool2:
623 case Type::Float:
624 return value;
625 case Type::Int:
626 return "ftoi(" + value + ')';
627 case Type::Uint:
628 return "ftou(" + value + ')';
629 case Type::HalfFloat:
630 const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); 613 const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
631 if (!half_meta) { 614 if (!half_meta) {
632 value = "toHalf2(" + value + ')'; 615 value = "toHalf2(" + value + ')';
@@ -643,6 +626,26 @@ private:
643 return "vec2(toHalf2(" + value + ")[1])"; 626 return "vec2(toHalf2(" + value + ")[1])";
644 } 627 }
645 } 628 }
629 default:
630 return CastOperand(value, type);
631 }
632 }
633
634 std::string CastOperand(const std::string& value, Type type) const {
635 switch (type) {
636 case Type::Bool:
637 case Type::Bool2:
638 case Type::Float:
639 return value;
640 case Type::Int:
641 return "ftoi(" + value + ')';
642 case Type::Uint:
643 return "ftou(" + value + ')';
644 case Type::HalfFloat:
645 // Can't be handled as a stand-alone value
646 UNREACHABLE();
647 return value;
648 }
646 UNREACHABLE(); 649 UNREACHABLE();
647 return value; 650 return value;
648 } 651 }
@@ -650,6 +653,7 @@ private:
650 std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { 653 std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
651 switch (type) { 654 switch (type) {
652 case Type::Bool: 655 case Type::Bool:
656 case Type::Bool2:
653 case Type::Float: 657 case Type::Float:
654 if (needs_parenthesis) { 658 if (needs_parenthesis) {
655 return '(' + value + ')'; 659 return '(' + value + ')';
@@ -715,51 +719,68 @@ private:
715 } 719 }
716 720
717 std::string GenerateTexture(Operation operation, const std::string& func, 721 std::string GenerateTexture(Operation operation, const std::string& func,
718 bool is_extra_int = false) { 722 const std::vector<std::pair<Type, Node>>& extras) {
719 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 723 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
720 724
721 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 725 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
722 const auto count = static_cast<u32>(operation.GetOperandsCount());
723 ASSERT(meta); 726 ASSERT(meta);
724 727
728 const std::size_t count = operation.GetOperandsCount();
729 const bool has_array = meta->sampler.IsArray();
730 const bool has_shadow = meta->sampler.IsShadow();
731
725 std::string expr = func; 732 std::string expr = func;
726 expr += '('; 733 expr += '(';
727 expr += GetSampler(meta->sampler); 734 expr += GetSampler(meta->sampler);
728 expr += ", "; 735 expr += ", ";
729 736
730 expr += coord_constructors[meta->coords_count - 1]; 737 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
731 expr += '('; 738 expr += '(';
732 for (u32 i = 0; i < count; ++i) { 739 for (std::size_t i = 0; i < count; ++i) {
733 const bool is_extra = i >= meta->coords_count; 740 expr += Visit(operation[i]);
734 const bool is_array = i == meta->array_index;
735
736 std::string operand = [&]() {
737 if (is_extra && is_extra_int) {
738 if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
739 return std::to_string(static_cast<s32>(immediate->GetValue()));
740 } else {
741 return "ftoi(" + Visit(operation[i]) + ')';
742 }
743 } else {
744 return Visit(operation[i]);
745 }
746 }();
747 if (is_array) {
748 ASSERT(!is_extra);
749 operand = "float(ftoi(" + operand + "))";
750 }
751 741
752 expr += operand; 742 const std::size_t next = i + 1;
743 if (next < count)
744 expr += ", ";
745 }
746 if (has_array) {
747 expr += ", float(ftoi(" + Visit(meta->array) + "))";
748 }
749 if (has_shadow) {
750 expr += ", " + Visit(meta->depth_compare);
751 }
752 expr += ')';
753 753
754 if (i + 1 == meta->coords_count) { 754 for (const auto& extra_pair : extras) {
755 expr += ')'; 755 const auto [type, operand] = extra_pair;
756 if (operand == nullptr) {
757 continue;
758 }
759 expr += ", ";
760
761 switch (type) {
762 case Type::Int:
763 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
764 // Inline the string as an immediate integer in GLSL (some extra arguments are
765 // required to be constant)
766 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
767 } else {
768 expr += "ftoi(" + Visit(operand) + ')';
769 }
770 break;
771 case Type::Float:
772 expr += Visit(operand);
773 break;
774 default: {
775 const auto type_int = static_cast<u32>(type);
776 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
777 expr += '0';
778 break;
756 } 779 }
757 if (i + 1 < count) {
758 expr += ", ";
759 } 780 }
760 } 781 }
761 expr += ')'; 782
762 return expr; 783 return expr + ')';
763 } 784 }
764 785
765 std::string Assign(Operation operation) { 786 std::string Assign(Operation operation) {
@@ -1134,37 +1155,38 @@ private:
1134 Type::HalfFloat); 1155 Type::HalfFloat);
1135 } 1156 }
1136 1157
1137 std::string F4Texture(Operation operation) { 1158 std::string Texture(Operation operation) {
1138 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1159 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1139 ASSERT(meta); 1160 ASSERT(meta);
1140 1161
1141 std::string expr = GenerateTexture(operation, "texture"); 1162 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
1142 if (meta->sampler.IsShadow()) { 1163 if (meta->sampler.IsShadow()) {
1143 expr = "vec4(" + expr + ')'; 1164 expr = "vec4(" + expr + ')';
1144 } 1165 }
1145 return expr + GetSwizzle(meta->element); 1166 return expr + GetSwizzle(meta->element);
1146 } 1167 }
1147 1168
1148 std::string F4TextureLod(Operation operation) { 1169 std::string TextureLod(Operation operation) {
1149 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1170 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1150 ASSERT(meta); 1171 ASSERT(meta);
1151 1172
1152 std::string expr = GenerateTexture(operation, "textureLod"); 1173 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
1153 if (meta->sampler.IsShadow()) { 1174 if (meta->sampler.IsShadow()) {
1154 expr = "vec4(" + expr + ')'; 1175 expr = "vec4(" + expr + ')';
1155 } 1176 }
1156 return expr + GetSwizzle(meta->element); 1177 return expr + GetSwizzle(meta->element);
1157 } 1178 }
1158 1179
1159 std::string F4TextureGather(Operation operation) { 1180 std::string TextureGather(Operation operation) {
1160 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1181 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1161 ASSERT(meta); 1182 ASSERT(meta);
1162 1183
1163 return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + 1184 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1185 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
1164 GetSwizzle(meta->element); 1186 GetSwizzle(meta->element);
1165 } 1187 }
1166 1188
1167 std::string F4TextureQueryDimensions(Operation operation) { 1189 std::string TextureQueryDimensions(Operation operation) {
1168 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1190 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1169 ASSERT(meta); 1191 ASSERT(meta);
1170 1192
@@ -1184,40 +1206,44 @@ private:
1184 return "0"; 1206 return "0";
1185 } 1207 }
1186 1208
1187 std::string F4TextureQueryLod(Operation operation) { 1209 std::string TextureQueryLod(Operation operation) {
1188 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1210 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1189 ASSERT(meta); 1211 ASSERT(meta);
1190 1212
1191 if (meta->element < 2) { 1213 if (meta->element < 2) {
1192 return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + 1214 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
1193 GetSwizzle(meta->element) + "))"; 1215 " * vec2(256))" + GetSwizzle(meta->element) + "))";
1194 } 1216 }
1195 return "0"; 1217 return "0";
1196 } 1218 }
1197 1219
1198 std::string F4TexelFetch(Operation operation) { 1220 std::string TexelFetch(Operation operation) {
1199 constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; 1221 constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
1200 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1222 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1201 const auto count = static_cast<u32>(operation.GetOperandsCount());
1202 ASSERT(meta); 1223 ASSERT(meta);
1224 UNIMPLEMENTED_IF(meta->sampler.IsArray());
1225 const std::size_t count = operation.GetOperandsCount();
1203 1226
1204 std::string expr = "texelFetch("; 1227 std::string expr = "texelFetch(";
1205 expr += GetSampler(meta->sampler); 1228 expr += GetSampler(meta->sampler);
1206 expr += ", "; 1229 expr += ", ";
1207 1230
1208 expr += constructors[meta->coords_count - 1]; 1231 expr += constructors.at(operation.GetOperandsCount() - 1);
1209 expr += '('; 1232 expr += '(';
1210 for (u32 i = 0; i < count; ++i) { 1233 for (std::size_t i = 0; i < count; ++i) {
1211 expr += VisitOperand(operation, i, Type::Int); 1234 expr += VisitOperand(operation, i, Type::Int);
1212 1235 const std::size_t next = i + 1;
1213 if (i + 1 == meta->coords_count) { 1236 if (next == count)
1214 expr += ')'; 1237 expr += ')';
1215 } 1238 else if (next < count)
1216 if (i + 1 < count) {
1217 expr += ", "; 1239 expr += ", ";
1218 } 1240 }
1241 if (meta->lod) {
1242 expr += ", ";
1243 expr += CastOperand(Visit(meta->lod), Type::Int);
1219 } 1244 }
1220 expr += ')'; 1245 expr += ')';
1246
1221 return expr + GetSwizzle(meta->element); 1247 return expr + GetSwizzle(meta->element);
1222 } 1248 }
1223 1249
@@ -1454,12 +1480,12 @@ private:
1454 &GLSLDecompiler::Logical2HNotEqual, 1480 &GLSLDecompiler::Logical2HNotEqual,
1455 &GLSLDecompiler::Logical2HGreaterEqual, 1481 &GLSLDecompiler::Logical2HGreaterEqual,
1456 1482
1457 &GLSLDecompiler::F4Texture, 1483 &GLSLDecompiler::Texture,
1458 &GLSLDecompiler::F4TextureLod, 1484 &GLSLDecompiler::TextureLod,
1459 &GLSLDecompiler::F4TextureGather, 1485 &GLSLDecompiler::TextureGather,
1460 &GLSLDecompiler::F4TextureQueryDimensions, 1486 &GLSLDecompiler::TextureQueryDimensions,
1461 &GLSLDecompiler::F4TextureQueryLod, 1487 &GLSLDecompiler::TextureQueryLod,
1462 &GLSLDecompiler::F4TexelFetch, 1488 &GLSLDecompiler::TexelFetch,
1463 1489
1464 &GLSLDecompiler::Branch, 1490 &GLSLDecompiler::Branch,
1465 &GLSLDecompiler::PushFlowStack, 1491 &GLSLDecompiler::PushFlowStack,
@@ -1562,4 +1588,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
1562 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 1588 return {decompiler.GetResult(), decompiler.GetShaderEntries()};
1563} 1589}
1564 1590
1565} // namespace OpenGL::GLShader \ No newline at end of file 1591} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 81882822b..d2d979997 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,17 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once
6
7#include <cstring> 5#include <cstring>
8#include <fmt/format.h> 6#include <fmt/format.h>
9#include <lz4.h>
10 7
11#include "common/assert.h" 8#include "common/assert.h"
12#include "common/common_paths.h" 9#include "common/common_paths.h"
13#include "common/common_types.h" 10#include "common/common_types.h"
14#include "common/file_util.h" 11#include "common/file_util.h"
15#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/lz4_compression.h"
16#include "common/scm_rev.h" 14#include "common/scm_rev.h"
17 15
18#include "core/core.h" 16#include "core/core.h"
@@ -51,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
51 return hash; 49 return hash;
52} 50}
53 51
54template <typename T>
55std::vector<u8> CompressData(const T* source, std::size_t source_size) {
56 if (source_size > LZ4_MAX_INPUT_SIZE) {
57 // Source size exceeds LZ4 maximum input size
58 return {};
59 }
60 const auto source_size_int = static_cast<int>(source_size);
61 const int max_compressed_size = LZ4_compressBound(source_size_int);
62 std::vector<u8> compressed(max_compressed_size);
63 const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
64 reinterpret_cast<char*>(compressed.data()),
65 source_size_int, max_compressed_size);
66 if (compressed_size <= 0) {
67 // Compression failed
68 return {};
69 }
70 compressed.resize(compressed_size);
71 return compressed;
72}
73
74std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
75 std::vector<u8> uncompressed(uncompressed_size);
76 const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
77 reinterpret_cast<char*>(uncompressed.data()),
78 static_cast<int>(compressed.size()),
79 static_cast<int>(uncompressed.size()));
80 if (static_cast<int>(uncompressed_size) != size_check) {
81 // Decompression failed
82 return {};
83 }
84 return uncompressed;
85}
86
87} // namespace 52} // namespace
88 53
89ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
@@ -294,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
294 return {}; 259 return {};
295 } 260 }
296 261
297 dump.binary = DecompressData(compressed_binary, binary_length); 262 dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length);
298 if (dump.binary.empty()) { 263 if (dump.binary.empty()) {
299 return {}; 264 return {};
300 } 265 }
@@ -323,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
323 return {}; 288 return {};
324 } 289 }
325 290
326 const std::vector<u8> code = DecompressData(compressed_code, code_size); 291 const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size);
327 if (code.empty()) { 292 if (code.empty()) {
328 return {}; 293 return {};
329 } 294 }
@@ -509,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
509 if (!IsUsable()) 474 if (!IsUsable())
510 return; 475 return;
511 476
512 const std::vector<u8> compressed_code{CompressData(code.data(), code.size())}; 477 const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC(
478 reinterpret_cast<const u8*>(code.data()), code.size(), 9)};
513 if (compressed_code.empty()) { 479 if (compressed_code.empty()) {
514 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", 480 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
515 unique_identifier); 481 unique_identifier);
@@ -539,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
539 std::vector<u8> binary(binary_length); 505 std::vector<u8> binary(binary_length);
540 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 506 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
541 507
542 const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size()); 508 const std::vector<u8> compressed_binary =
509 Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9);
510
543 if (compressed_binary.empty()) { 511 if (compressed_binary.empty()) {
544 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", 512 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
545 usage.unique_identifier); 513 usage.unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 04e1db911..7d96649af 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
124layout (location = 6) out vec4 FragColor6; 124layout (location = 6) out vec4 FragColor6;
125layout (location = 7) out vec4 FragColor7; 125layout (location = 7) out vec4 FragColor7;
126 126
127layout (location = 0) in vec4 position; 127layout (location = 0) in noperspective vec4 position;
128 128
129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { 129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
130 vec4 viewport_flip; 130 vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
172 return {out, program.second}; 172 return {out, program.second};
173} 173}
174 174
175} // namespace OpenGL::GLShader \ No newline at end of file 175} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
6#include "video_core/renderer_opengl/gl_shader_manager.h" 5#include "video_core/renderer_opengl/gl_shader_manager.h"
7 6
8namespace OpenGL::GLShader { 7namespace OpenGL::GLShader {
9 8
10void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { 9using Tegra::Engines::Maxwell3D;
11 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 10
12 const auto& regs = gpu.regs; 11void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
13 const auto& state = gpu.state; 12 const auto& regs = maxwell.regs;
13 const auto& state = maxwell.state;
14 14
15 // TODO(bunnei): Support more than one viewport 15 // TODO(bunnei): Support more than one viewport
16 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; 16 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
18 18
19 u32 func = static_cast<u32>(regs.alpha_test_func); 19 u32 func = static_cast<u32>(regs.alpha_test_func);
20 // Normalize the gl variants of opCompare to be the same as the normal variants 20 // Normalize the gl variants of opCompare to be the same as the normal variants
21 u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never); 21 const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
22 if (func >= op_gl_variant_base) { 22 if (func >= op_gl_variant_base) {
23 func = func - op_gl_variant_base + 1U; 23 func = func - op_gl_variant_base + 1U;
24 } 24 }
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
31 31
32 // Assign in which stage the position has to be flipped 32 // Assign in which stage the position has to be flipped
33 // (the last stage before the fragment shader). 33 // (the last stage before the fragment shader).
34 if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { 34 constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
35 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); 35 if (maxwell.regs.shader_config[geometry_index].enable) {
36 flip_stage = geometry_index;
36 } else { 37 } else {
37 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); 38 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
38 } 39 }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..8eef2a920 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
12 12
13namespace OpenGL::GLShader { 13namespace OpenGL::GLShader {
14 14
15using Tegra::Engines::Maxwell3D;
16
17/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned 15/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
18// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at 16/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
19// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. 17/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
20// Not following that rule will cause problems on some AMD drivers. 18/// Not following that rule will cause problems on some AMD drivers.
21struct MaxwellUniformData { 19struct MaxwellUniformData {
22 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); 20 void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
21
23 alignas(16) GLvec4 viewport_flip; 22 alignas(16) GLvec4 viewport_flip;
24 struct alignas(16) { 23 struct alignas(16) {
25 GLuint instance_id; 24 GLuint instance_id;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 81af803bc..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,7 +11,9 @@
11namespace OpenGL { 11namespace OpenGL {
12 12
13OpenGLState OpenGLState::cur_state; 13OpenGLState OpenGLState::cur_state;
14
14bool OpenGLState::s_rgb_used; 15bool OpenGLState::s_rgb_used;
16
15OpenGLState::OpenGLState() { 17OpenGLState::OpenGLState() {
16 // These all match default OpenGL values 18 // These all match default OpenGL values
17 geometry_shaders.enabled = false; 19 geometry_shaders.enabled = false;
@@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() {
112} 114}
113 115
114void OpenGLState::ApplySRgb() const { 116void OpenGLState::ApplySRgb() const {
115 // sRGB
116 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { 117 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
117 if (framebuffer_srgb.enabled) { 118 if (framebuffer_srgb.enabled) {
118 // Track if sRGB is used 119 // Track if sRGB is used
@@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const {
125} 126}
126 127
127void OpenGLState::ApplyCulling() const { 128void OpenGLState::ApplyCulling() const {
128 // Culling 129 if (cull.enabled != cur_state.cull.enabled) {
129 const bool cull_changed = cull.enabled != cur_state.cull.enabled;
130 if (cull_changed) {
131 if (cull.enabled) { 130 if (cull.enabled) {
132 glEnable(GL_CULL_FACE); 131 glEnable(GL_CULL_FACE);
133 } else { 132 } else {
134 glDisable(GL_CULL_FACE); 133 glDisable(GL_CULL_FACE);
135 } 134 }
136 } 135 }
137 if (cull.enabled) {
138 if (cull_changed || cull.mode != cur_state.cull.mode) {
139 glCullFace(cull.mode);
140 }
141 136
142 if (cull_changed || cull.front_face != cur_state.cull.front_face) { 137 if (cull.mode != cur_state.cull.mode) {
143 glFrontFace(cull.front_face); 138 glCullFace(cull.mode);
144 } 139 }
140
141 if (cull.front_face != cur_state.cull.front_face) {
142 glFrontFace(cull.front_face);
145 } 143 }
146} 144}
147 145
@@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const {
172} 170}
173 171
174void OpenGLState::ApplyDepth() const { 172void OpenGLState::ApplyDepth() const {
175 // Depth test 173 if (depth.test_enabled != cur_state.depth.test_enabled) {
176 const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
177 if (depth_test_changed) {
178 if (depth.test_enabled) { 174 if (depth.test_enabled) {
179 glEnable(GL_DEPTH_TEST); 175 glEnable(GL_DEPTH_TEST);
180 } else { 176 } else {
181 glDisable(GL_DEPTH_TEST); 177 glDisable(GL_DEPTH_TEST);
182 } 178 }
183 } 179 }
184 if (depth.test_enabled && 180
185 (depth_test_changed || depth.test_func != cur_state.depth.test_func)) { 181 if (depth.test_func != cur_state.depth.test_func) {
186 glDepthFunc(depth.test_func); 182 glDepthFunc(depth.test_func);
187 } 183 }
188 // Depth mask 184
189 if (depth.write_mask != cur_state.depth.write_mask) { 185 if (depth.write_mask != cur_state.depth.write_mask) {
190 glDepthMask(depth.write_mask); 186 glDepthMask(depth.write_mask);
191 } 187 }
192} 188}
193 189
194void OpenGLState::ApplyPrimitiveRestart() const { 190void OpenGLState::ApplyPrimitiveRestart() const {
195 const bool primitive_restart_changed = 191 if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
196 primitive_restart.enabled != cur_state.primitive_restart.enabled;
197 if (primitive_restart_changed) {
198 if (primitive_restart.enabled) { 192 if (primitive_restart.enabled) {
199 glEnable(GL_PRIMITIVE_RESTART); 193 glEnable(GL_PRIMITIVE_RESTART);
200 } else { 194 } else {
201 glDisable(GL_PRIMITIVE_RESTART); 195 glDisable(GL_PRIMITIVE_RESTART);
202 } 196 }
203 } 197 }
204 if (primitive_restart_changed || 198
205 (primitive_restart.enabled && 199 if (primitive_restart.index != cur_state.primitive_restart.index) {
206 primitive_restart.index != cur_state.primitive_restart.index)) {
207 glPrimitiveRestartIndex(primitive_restart.index); 200 glPrimitiveRestartIndex(primitive_restart.index);
208 } 201 }
209} 202}
210 203
211void OpenGLState::ApplyStencilTest() const { 204void OpenGLState::ApplyStencilTest() const {
212 const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; 205 if (stencil.test_enabled != cur_state.stencil.test_enabled) {
213 if (stencil_test_changed) {
214 if (stencil.test_enabled) { 206 if (stencil.test_enabled) {
215 glEnable(GL_STENCIL_TEST); 207 glEnable(GL_STENCIL_TEST);
216 } else { 208 } else {
217 glDisable(GL_STENCIL_TEST); 209 glDisable(GL_STENCIL_TEST);
218 } 210 }
219 } 211 }
220 if (stencil.test_enabled) { 212
221 auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, 213 const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
222 const auto& prev_config) { 214 if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
223 if (stencil_test_changed || config.test_func != prev_config.test_func || 215 config.test_mask != prev_config.test_mask) {
224 config.test_ref != prev_config.test_ref || 216 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
225 config.test_mask != prev_config.test_mask) { 217 }
226 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); 218 if (config.action_depth_fail != prev_config.action_depth_fail ||
227 } 219 config.action_depth_pass != prev_config.action_depth_pass ||
228 if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || 220 config.action_stencil_fail != prev_config.action_stencil_fail) {
229 config.action_depth_pass != prev_config.action_depth_pass || 221 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
230 config.action_stencil_fail != prev_config.action_stencil_fail) { 222 config.action_depth_pass);
231 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, 223 }
232 config.action_depth_pass); 224 if (config.write_mask != prev_config.write_mask) {
233 } 225 glStencilMaskSeparate(face, config.write_mask);
234 if (config.write_mask != prev_config.write_mask) { 226 }
235 glStencilMaskSeparate(face, config.write_mask); 227 };
236 } 228 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
237 }; 229 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
238 config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
239 config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
240 }
241} 230}
242// Viewport does not affects glClearBuffer so emulate viewport using scissor test 231// Viewport does not affects glClearBuffer so emulate viewport using scissor test
243void OpenGLState::EmulateViewportWithScissor() { 232void OpenGLState::EmulateViewportWithScissor() {
@@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const {
278 updated.depth_range_far != current.depth_range_far) { 267 updated.depth_range_far != current.depth_range_far) {
279 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); 268 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
280 } 269 }
281 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 270
282 if (scissor_changed) { 271 if (updated.scissor.enabled != current.scissor.enabled) {
283 if (updated.scissor.enabled) { 272 if (updated.scissor.enabled) {
284 glEnablei(GL_SCISSOR_TEST, i); 273 glEnablei(GL_SCISSOR_TEST, i);
285 } else { 274 } else {
286 glDisablei(GL_SCISSOR_TEST, i); 275 glDisablei(GL_SCISSOR_TEST, i);
287 } 276 }
288 } 277 }
289 if (updated.scissor.enabled && 278
290 (scissor_changed || updated.scissor.x != current.scissor.x || 279 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
291 updated.scissor.y != current.scissor.y || 280 updated.scissor.width != current.scissor.width ||
292 updated.scissor.width != current.scissor.width || 281 updated.scissor.height != current.scissor.height) {
293 updated.scissor.height != current.scissor.height)) {
294 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, 282 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
295 updated.scissor.height); 283 updated.scissor.height);
296 } 284 }
@@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const {
302 updated.height != current.height) { 290 updated.height != current.height) {
303 glViewport(updated.x, updated.y, updated.width, updated.height); 291 glViewport(updated.x, updated.y, updated.width, updated.height);
304 } 292 }
293
305 if (updated.depth_range_near != current.depth_range_near || 294 if (updated.depth_range_near != current.depth_range_near ||
306 updated.depth_range_far != current.depth_range_far) { 295 updated.depth_range_far != current.depth_range_far) {
307 glDepthRange(updated.depth_range_near, updated.depth_range_far); 296 glDepthRange(updated.depth_range_near, updated.depth_range_far);
308 } 297 }
309 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 298
310 if (scissor_changed) { 299 if (updated.scissor.enabled != current.scissor.enabled) {
311 if (updated.scissor.enabled) { 300 if (updated.scissor.enabled) {
312 glEnable(GL_SCISSOR_TEST); 301 glEnable(GL_SCISSOR_TEST);
313 } else { 302 } else {
314 glDisable(GL_SCISSOR_TEST); 303 glDisable(GL_SCISSOR_TEST);
315 } 304 }
316 } 305 }
317 if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x || 306
318 updated.scissor.y != current.scissor.y || 307 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
319 updated.scissor.width != current.scissor.width || 308 updated.scissor.width != current.scissor.width ||
320 updated.scissor.height != current.scissor.height)) { 309 updated.scissor.height != current.scissor.height) {
321 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, 310 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
322 updated.scissor.height); 311 updated.scissor.height);
323 } 312 }
@@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const {
327void OpenGLState::ApplyGlobalBlending() const { 316void OpenGLState::ApplyGlobalBlending() const {
328 const Blend& current = cur_state.blend[0]; 317 const Blend& current = cur_state.blend[0];
329 const Blend& updated = blend[0]; 318 const Blend& updated = blend[0];
330 const bool blend_changed = updated.enabled != current.enabled; 319 if (updated.enabled != current.enabled) {
331 if (blend_changed) {
332 if (updated.enabled) { 320 if (updated.enabled) {
333 glEnable(GL_BLEND); 321 glEnable(GL_BLEND);
334 } else { 322 } else {
@@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const {
338 if (!updated.enabled) { 326 if (!updated.enabled) {
339 return; 327 return;
340 } 328 }
341 if (blend_changed || updated.src_rgb_func != current.src_rgb_func || 329 if (updated.src_rgb_func != current.src_rgb_func ||
342 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 330 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
343 updated.dst_a_func != current.dst_a_func) { 331 updated.dst_a_func != current.dst_a_func) {
344 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, 332 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
345 updated.dst_a_func); 333 updated.dst_a_func);
346 } 334 }
347 335
348 if (blend_changed || updated.rgb_equation != current.rgb_equation || 336 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
349 updated.a_equation != current.a_equation) {
350 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); 337 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
351 } 338 }
352} 339}
@@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const {
354void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { 341void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
355 const Blend& updated = blend[target]; 342 const Blend& updated = blend[target];
356 const Blend& current = cur_state.blend[target]; 343 const Blend& current = cur_state.blend[target];
357 const bool blend_changed = updated.enabled != current.enabled || force; 344 if (updated.enabled != current.enabled || force) {
358 if (blend_changed) {
359 if (updated.enabled) { 345 if (updated.enabled) {
360 glEnablei(GL_BLEND, static_cast<GLuint>(target)); 346 glEnablei(GL_BLEND, static_cast<GLuint>(target));
361 } else { 347 } else {
362 glDisablei(GL_BLEND, static_cast<GLuint>(target)); 348 glDisablei(GL_BLEND, static_cast<GLuint>(target));
363 } 349 }
364 } 350 }
365 if (!updated.enabled) { 351
366 return; 352 if (updated.src_rgb_func != current.src_rgb_func ||
367 }
368 if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
369 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 353 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
370 updated.dst_a_func != current.dst_a_func) { 354 updated.dst_a_func != current.dst_a_func) {
371 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, 355 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
372 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); 356 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
373 } 357 }
374 358
375 if (blend_changed || updated.rgb_equation != current.rgb_equation || 359 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
376 updated.a_equation != current.a_equation) {
377 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, 360 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
378 updated.a_equation); 361 updated.a_equation);
379 } 362 }
@@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const {
397} 380}
398 381
399void OpenGLState::ApplyLogicOp() const { 382void OpenGLState::ApplyLogicOp() const {
400 const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; 383 if (logic_op.enabled != cur_state.logic_op.enabled) {
401 if (logic_op_changed) {
402 if (logic_op.enabled) { 384 if (logic_op.enabled) {
403 glEnable(GL_COLOR_LOGIC_OP); 385 glEnable(GL_COLOR_LOGIC_OP);
404 } else { 386 } else {
@@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const {
406 } 388 }
407 } 389 }
408 390
409 if (logic_op.enabled && 391 if (logic_op.operation != cur_state.logic_op.operation) {
410 (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
411 glLogicOp(logic_op.operation); 392 glLogicOp(logic_op.operation);
412 } 393 }
413} 394}
414 395
415void OpenGLState::ApplyPolygonOffset() const { 396void OpenGLState::ApplyPolygonOffset() const {
416
417 const bool fill_enable_changed = 397 const bool fill_enable_changed =
418 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; 398 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
419 const bool line_enable_changed = 399 const bool line_enable_changed =
@@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const {
448 } 428 }
449 } 429 }
450 430
451 if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && 431 if (factor_changed || units_changed || clamp_changed) {
452 (factor_changed || units_changed || clamp_changed)) {
453
454 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { 432 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
455 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); 433 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
456 } else { 434 } else {
@@ -483,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
483 461
484 if (has_delta) { 462 if (has_delta) {
485 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
486 textures.data()); 464 textures.data() + first);
487 } 465 }
488} 466}
489 467
@@ -504,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
504 } 482 }
505 if (has_delta) { 483 if (has_delta) {
506 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
507 samplers.data()); 485 samplers.data() + first);
508 } 486 }
509} 487}
510 488
@@ -528,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const {
528 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { 506 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
529 return; 507 return;
530 } 508 }
531 if (depth_clamp.far_plane != depth_clamp.near_plane) { 509 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
532 UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!"); 510 "Unimplemented Depth Clamp Separation!");
533 } 511
534 if (depth_clamp.far_plane || depth_clamp.near_plane) { 512 if (depth_clamp.far_plane || depth_clamp.near_plane) {
535 glEnable(GL_DEPTH_CLAMP); 513 glEnable(GL_DEPTH_CLAMP);
536 } else { 514 } else {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6476a9e1a..a01efeb05 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default;
107void RendererOpenGL::SwapBuffers( 107void RendererOpenGL::SwapBuffers(
108 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 108 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
109 109
110 Core::System::GetInstance().GetPerfStats().EndSystemFrame(); 110 system.GetPerfStats().EndSystemFrame();
111 111
112 // Maintain the rasterizer's state as a priority 112 // Maintain the rasterizer's state as a priority
113 OpenGLState prev_state = OpenGLState::GetCurState(); 113 OpenGLState prev_state = OpenGLState::GetCurState();
@@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers(
137 137
138 render_window.PollEvents(); 138 render_window.PollEvents();
139 139
140 Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); 140 system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
141 Core::System::GetInstance().GetPerfStats().BeginSystemFrame(); 141 system.GetPerfStats().BeginSystemFrame();
142 142
143 // Restore the rasterizer state 143 // Restore the rasterizer state
144 prev_state.Apply(); 144 prev_state.Apply();
@@ -164,12 +164,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
164 // Reset the screen info's display texture to its own permanent texture 164 // Reset the screen info's display texture to its own permanent texture
165 screen_info.display_texture = screen_info.texture.resource.handle; 165 screen_info.display_texture = screen_info.texture.resource.handle;
166 166
167 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, 167 rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
168 Memory::FlushMode::Flush);
169 168
170 VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, 169 constexpr u32 linear_bpp = 4;
171 Memory::GetPointer(framebuffer_addr), 170 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
172 gl_framebuffer_data.data(), true); 171 framebuffer.width, framebuffer.height, bytes_per_pixel,
172 linear_bpp, Memory::GetPointer(framebuffer_addr),
173 gl_framebuffer_data.data());
173 174
174 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 175 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
175 176
@@ -244,19 +245,35 @@ void RendererOpenGL::InitOpenGLObjects() {
244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 245 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
245} 246}
246 247
248void RendererOpenGL::AddTelemetryFields() {
249 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
250 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
251 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
252
253 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
254 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
255 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
256
257 auto& telemetry_session = system.TelemetrySession();
258 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
260 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
261}
262
247void RendererOpenGL::CreateRasterizer() { 263void RendererOpenGL::CreateRasterizer() {
248 if (rasterizer) { 264 if (rasterizer) {
249 return; 265 return;
250 } 266 }
251 // Initialize sRGB Usage 267 // Initialize sRGB Usage
252 OpenGLState::ClearsRGBUsed(); 268 OpenGLState::ClearsRGBUsed();
253 rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info); 269 rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
254} 270}
255 271
256void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 272void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
257 const Tegra::FramebufferConfig& framebuffer) { 273 const Tegra::FramebufferConfig& framebuffer) {
258 texture.width = framebuffer.width; 274 texture.width = framebuffer.width;
259 texture.height = framebuffer.height; 275 texture.height = framebuffer.height;
276 texture.pixel_format = framebuffer.pixel_format;
260 277
261 GLint internal_format; 278 GLint internal_format;
262 switch (framebuffer.pixel_format) { 279 switch (framebuffer.pixel_format) {
@@ -380,7 +397,8 @@ void RendererOpenGL::CaptureScreenshot() {
380 GLuint renderbuffer; 397 GLuint renderbuffer;
381 glGenRenderbuffers(1, &renderbuffer); 398 glGenRenderbuffers(1, &renderbuffer);
382 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); 399 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
383 glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); 400 glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
401 layout.height);
384 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); 402 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
385 403
386 DrawScreen(layout); 404 DrawScreen(layout);
@@ -464,17 +482,7 @@ bool RendererOpenGL::Init() {
464 glDebugMessageCallback(DebugHandler, nullptr); 482 glDebugMessageCallback(DebugHandler, nullptr);
465 } 483 }
466 484
467 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 485 AddTelemetryFields();
468 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
469 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
470
471 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
472 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
473 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
474
475 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
476 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
477 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
478 486
479 if (!GLAD_GL_VERSION_4_3) { 487 if (!GLAD_GL_VERSION_4_3) {
480 return false; 488 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 7e13e566b..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
39/// Structure used for storing information about the display target for the Switch screen 39/// Structure used for storing information about the display target for the Switch screen
40struct ScreenInfo { 40struct ScreenInfo {
41 GLuint display_texture; 41 GLuint display_texture;
42 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; 42 const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
43 TextureInfo texture; 43 TextureInfo texture;
44}; 44};
45 45
@@ -60,6 +60,7 @@ public:
60 60
61private: 61private:
62 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
63 void CreateRasterizer(); 64 void CreateRasterizer();
64 65
65 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -102,7 +103,7 @@ private:
102 103
103 /// Used for transforming the framebuffer orientation 104 /// Used for transforming the framebuffer orientation
104 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 105 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
105 MathUtil::Rectangle<int> framebuffer_crop_rect; 106 Common::Rectangle<int> framebuffer_crop_rect;
106}; 107};
107 108
108} // namespace OpenGL 109} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
new file mode 100644
index 000000000..ba25b5bc7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vulkan/vulkan.hpp>
8
9namespace Vulkan {
10
11// vulkan.hpp unique handlers use DispatchLoaderStatic
12template <typename T>
13using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
14
15using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
16using UniqueBuffer = UniqueHandle<vk::Buffer>;
17using UniqueBufferView = UniqueHandle<vk::BufferView>;
18using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
19using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
20using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
21using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
22using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
23using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
24using UniqueDevice = UniqueHandle<vk::Device>;
25using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
26using UniqueEvent = UniqueHandle<vk::Event>;
27using UniqueFence = UniqueHandle<vk::Fence>;
28using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
29using UniqueImage = UniqueHandle<vk::Image>;
30using UniqueImageView = UniqueHandle<vk::ImageView>;
31using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
32using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
33using UniquePipeline = UniqueHandle<vk::Pipeline>;
34using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
35using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
36using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
37using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
38using UniqueSampler = UniqueHandle<vk::Sampler>;
39using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
40using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
41using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
42using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
43using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
44
45} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..388b5ffd5
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,122 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <optional>
8#include <tuple>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "core/memory.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
21 std::size_t alignment, u8* host_ptr)
22 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
23 alignment{alignment} {}
24
25VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
26 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
27 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
28 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
29 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
30 vk::BufferUsageFlagBits::eIndexBuffer |
31 vk::BufferUsageFlagBits::eUniformBuffer;
32 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
33 vk::AccessFlagBits::eUniformRead;
34 stream_buffer =
35 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
36 vk::PipelineStageFlagBits::eAllCommands);
37 buffer_handle = stream_buffer->GetBuffer();
38}
39
40VKBufferCache::~VKBufferCache() = default;
41
42u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
43 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
44 ASSERT_MSG(cpu_addr, "Invalid GPU address");
45
46 // Cache management is a big overhead, so only cache entries with a given size.
47 // TODO: Figure out which size is the best for given games.
48 cache &= size >= 2048;
49
50 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
51 if (cache) {
52 auto entry = TryGet(host_ptr);
53 if (entry) {
54 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
55 return entry->GetOffset();
56 }
57 Unregister(entry);
58 }
59 }
60
61 AlignBuffer(alignment);
62 const u64 uploaded_offset = buffer_offset;
63
64 if (!host_ptr) {
65 return uploaded_offset;
66 }
67
68 std::memcpy(buffer_ptr, host_ptr, size);
69 buffer_ptr += size;
70 buffer_offset += size;
71
72 if (cache) {
73 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
74 alignment, host_ptr);
75 Register(entry);
76 }
77
78 return uploaded_offset;
79}
80
81u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
82 AlignBuffer(alignment);
83 std::memcpy(buffer_ptr, raw_pointer, size);
84 const u64 uploaded_offset = buffer_offset;
85
86 buffer_ptr += size;
87 buffer_offset += size;
88 return uploaded_offset;
89}
90
91std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
92 AlignBuffer(alignment);
93 u8* const uploaded_ptr = buffer_ptr;
94 const u64 uploaded_offset = buffer_offset;
95
96 buffer_ptr += size;
97 buffer_offset += size;
98 return {uploaded_ptr, uploaded_offset};
99}
100
101void VKBufferCache::Reserve(std::size_t max_size) {
102 bool invalidate;
103 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
104 buffer_offset = buffer_offset_base;
105
106 if (invalidate) {
107 InvalidateAll();
108 }
109}
110
111VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
112 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
113}
114
115void VKBufferCache::AlignBuffer(std::size_t alignment) {
116 // Align the offset, not the mapped pointer
117 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
118 buffer_ptr += offset_aligned - buffer_offset;
119 buffer_offset = offset_aligned;
120}
121
122} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..08b786aad
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,103 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <tuple>
9
10#include "common/common_types.h"
11#include "video_core/gpu.h"
12#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15
16namespace Tegra {
17class MemoryManager;
18}
19
20namespace Vulkan {
21
22class VKDevice;
23class VKFence;
24class VKMemoryManager;
25class VKStreamBuffer;
26
27class CachedBufferEntry final : public RasterizerCacheObject {
28public:
29 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
30 u8* host_ptr);
31
32 VAddr GetCpuAddr() const override {
33 return cpu_addr;
34 }
35
36 std::size_t GetSizeInBytes() const override {
37 return size;
38 }
39
40 std::size_t GetSize() const {
41 return size;
42 }
43
44 u64 GetOffset() const {
45 return offset;
46 }
47
48 std::size_t GetAlignment() const {
49 return alignment;
50 }
51
52 // We do not have to flush this cache as things in it are never modified by us.
53 void Flush() override {}
54
55private:
56 VAddr cpu_addr{};
57 std::size_t size{};
58 u64 offset{};
59 std::size_t alignment{};
60};
61
62class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
63public:
64 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
65 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
66 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
67 ~VKBufferCache();
68
69 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
70 /// allocated.
71 u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
72
73 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
74 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
75
76 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
77 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
78
79 /// Reserves a region of memory to be used in subsequent upload/reserve operations.
80 void Reserve(std::size_t max_size);
81
82 /// Ensures that the set data is sent to the device.
83 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
84
85 /// Returns the buffer cache handle.
86 vk::Buffer GetBuffer() const {
87 return buffer_handle;
88 }
89
90private:
91 void AlignBuffer(std::size_t alignment);
92
93 Tegra::MemoryManager& tegra_memory_manager;
94
95 std::unique_ptr<VKStreamBuffer> stream_buffer;
96 vk::Buffer buffer_handle;
97
98 u8* buffer_ptr = nullptr;
99 u64 buffer_offset = 0;
100 u64 buffer_offset_base = 0;
101};
102
103} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
new file mode 100644
index 000000000..00242ecbe
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,238 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <optional>
7#include <set>
8#include <vector>
9#include "common/assert.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12
13namespace Vulkan {
14
15namespace Alternatives {
16
17constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
18 vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
19constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
20 vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
21
22} // namespace Alternatives
23
24constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
25 switch (format) {
26 case vk::Format::eD24UnormS8Uint:
27 return Alternatives::Depth24UnormS8Uint.data();
28 case vk::Format::eD16UnormS8Uint:
29 return Alternatives::Depth16UnormS8Uint.data();
30 default:
31 return nullptr;
32 }
33}
34
35constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
36 FormatType format_type) {
37 switch (format_type) {
38 case FormatType::Linear:
39 return properties.linearTilingFeatures;
40 case FormatType::Optimal:
41 return properties.optimalTilingFeatures;
42 case FormatType::Buffer:
43 return properties.bufferFeatures;
44 default:
45 return {};
46 }
47}
48
49VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
50 vk::SurfaceKHR surface)
51 : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
52 SetupFamilies(dldi, surface);
53 SetupProperties(dldi);
54}
55
56VKDevice::~VKDevice() = default;
57
58bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
59 const auto queue_cis = GetDeviceQueueCreateInfos();
60 vk::PhysicalDeviceFeatures device_features{};
61
62 const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
63 const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
64 0, nullptr, static_cast<u32>(extensions.size()),
65 extensions.data(), &device_features);
66 vk::Device dummy_logical;
67 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
68 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
69 return false;
70 }
71
72 dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
73 logical = UniqueDevice(
74 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
75
76 graphics_queue = logical->getQueue(graphics_family, 0, dld);
77 present_queue = logical->getQueue(present_family, 0, dld);
78 return true;
79}
80
81vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
82 vk::FormatFeatureFlags wanted_usage,
83 FormatType format_type) const {
84 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
85 return wanted_format;
86 }
87 // The wanted format is not supported by hardware, search for alternatives
88 const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
89 if (alternatives == nullptr) {
90 LOG_CRITICAL(Render_Vulkan,
91 "Format={} with usage={} and type={} has no defined alternatives and host "
92 "hardware does not support it",
93 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
94 static_cast<u32>(format_type));
95 UNREACHABLE();
96 return wanted_format;
97 }
98
99 std::size_t i = 0;
100 for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
101 alternative = alternatives[++i]) {
102 if (!IsFormatSupported(alternative, wanted_usage, format_type))
103 continue;
104 LOG_WARNING(Render_Vulkan,
105 "Emulating format={} with alternative format={} with usage={} and type={}",
106 static_cast<u32>(wanted_format), static_cast<u32>(alternative),
107 static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
108 return alternative;
109 }
110
111 // No alternatives found, panic
112 LOG_CRITICAL(Render_Vulkan,
113 "Format={} with usage={} and type={} is not supported by the host hardware and "
114 "doesn't support any of the alternatives",
115 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
116 static_cast<u32>(format_type));
117 UNREACHABLE();
118 return wanted_format;
119}
120
121bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 UNREACHABLE();
127 return true;
128 }
129 const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
130 return (supported_usage & wanted_usage) == wanted_usage;
131}
132
133bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
134 vk::SurfaceKHR surface) {
135 const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
136
137 bool has_swapchain{};
138 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
139 has_swapchain |= prop.extensionName == swapchain_extension;
140 }
141 if (!has_swapchain) {
142 // The device doesn't support creating swapchains.
143 return false;
144 }
145
146 bool has_graphics{}, has_present{};
147 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
148 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
149 const auto& family = queue_family_properties[i];
150 if (family.queueCount == 0)
151 continue;
152
153 has_graphics |=
154 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
155 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
156 }
157 if (!has_graphics || !has_present) {
158 // The device doesn't have a graphics and present queue.
159 return false;
160 }
161
162 // TODO(Rodrigo): Check if the device matches all requeriments.
163 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
164 if (props.limits.maxUniformBufferRange < 65536) {
165 return false;
166 }
167
168 // Device is suitable.
169 return true;
170}
171
172void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
173 std::optional<u32> graphics_family_, present_family_;
174
175 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
176 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
177 if (graphics_family_ && present_family_)
178 break;
179
180 const auto& queue_family = queue_family_properties[i];
181 if (queue_family.queueCount == 0)
182 continue;
183
184 if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
185 graphics_family_ = i;
186 if (physical.getSurfaceSupportKHR(i, surface, dldi))
187 present_family_ = i;
188 }
189 ASSERT(graphics_family_ && present_family_);
190
191 graphics_family = *graphics_family_;
192 present_family = *present_family_;
193}
194
195void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
196 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
197 device_type = props.deviceType;
198 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
199}
200
201std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
202 static const float QUEUE_PRIORITY = 1.f;
203
204 std::set<u32> unique_queue_families = {graphics_family, present_family};
205 std::vector<vk::DeviceQueueCreateInfo> queue_cis;
206
207 for (u32 queue_family : unique_queue_families)
208 queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
209
210 return queue_cis;
211}
212
213std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
214 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
215 std::map<vk::Format, vk::FormatProperties> format_properties;
216
217 const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
219 };
220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
234
235 return format_properties;
236}
237
238} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
new file mode 100644
index 000000000..e87c7a508
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <vector>
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14/// Format usage descriptor
15enum class FormatType { Linear, Optimal, Buffer };
16
17/// Handles data specific to a physical device.
18class VKDevice final {
19public:
20 explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
21 vk::SurfaceKHR surface);
22 ~VKDevice();
23
24 /// Initializes the device. Returns true on success.
25 bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
26
27 /**
28 * Returns a format supported by the device for the passed requeriments.
29 * @param wanted_format The ideal format to be returned. It may not be the returned format.
30 * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
31 * @param format_type Format type usage.
32 * @returns A format supported by the device.
33 */
34 vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
35 FormatType format_type) const;
36
37 /// Returns the dispatch loader with direct function pointers of the device
38 const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
39 return dld;
40 }
41
42 /// Returns the logical device
43 vk::Device GetLogical() const {
44 return logical.get();
45 }
46
47 /// Returns the physical device.
48 vk::PhysicalDevice GetPhysical() const {
49 return physical;
50 }
51
52 /// Returns the main graphics queue.
53 vk::Queue GetGraphicsQueue() const {
54 return graphics_queue;
55 }
56
57 /// Returns the main present queue.
58 vk::Queue GetPresentQueue() const {
59 return present_queue;
60 }
61
62 /// Returns main graphics queue family index.
63 u32 GetGraphicsFamily() const {
64 return graphics_family;
65 }
66
67 /// Returns main present queue family index.
68 u32 GetPresentFamily() const {
69 return present_family;
70 }
71
72 /// Returns if the device is integrated with the host CPU
73 bool IsIntegrated() const {
74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
75 }
76
77 /// Returns uniform buffer alignment requeriment
78 u64 GetUniformBufferAlignment() const {
79 return uniform_buffer_alignment;
80 }
81
82 /// Checks if the physical device is suitable.
83 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
84 vk::SurfaceKHR surface);
85
86private:
87 /// Sets up queue families.
88 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
89
90 /// Sets up device properties.
91 void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
92
93 /// Returns a list of queue initialization descriptors.
94 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
95
96 /// Returns true if a format is supported.
97 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
98 FormatType format_type) const;
99
100 /// Returns the device properties for Vulkan formats.
101 static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
102 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
103
104 const vk::PhysicalDevice physical; ///< Physical device
105 vk::DispatchLoaderDynamic dld; ///< Device function pointers
106 UniqueDevice logical; ///< Logical device
107 vk::Queue graphics_queue; ///< Main graphics queue
108 vk::Queue present_queue; ///< Main present queue
109 u32 graphics_family{}; ///< Main graphics queue family index
110 u32 present_family{}; ///< Main present queue family index
111 vk::PhysicalDeviceType device_type; ///< Physical device type
112 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment
113 std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
114};
115
116} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
new file mode 100644
index 000000000..0451babbf
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <tuple>
8#include <vector>
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16
17namespace Vulkan {
18
19// TODO(Rodrigo): Fine tune this number
20constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
21
22class VKMemoryAllocation final {
23public:
24 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
25 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
26 : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
27 shifted_type{ShiftType(type)}, is_mappable{properties &
28 vk::MemoryPropertyFlagBits::eHostVisible} {
29 if (is_mappable) {
30 const auto dev = device.GetLogical();
31 const auto& dld = device.GetDispatchLoader();
32 base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
33 }
34 }
35
36 ~VKMemoryAllocation() {
37 const auto dev = device.GetLogical();
38 const auto& dld = device.GetDispatchLoader();
39 if (is_mappable)
40 dev.unmapMemory(memory, dld);
41 dev.free(memory, nullptr, dld);
42 }
43
44 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
45 auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
46 static_cast<u64>(alignment));
47 if (!found) {
48 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
49 static_cast<u64>(alignment));
50 if (!found) {
51 // Signal out of memory, it'll try to do more allocations.
52 return nullptr;
53 }
54 }
55 u8* address = is_mappable ? base_address + *found : nullptr;
56 auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
57 *found + commit_size);
58 commits.push_back(commit.get());
59
60 // Last commit's address is highly probable to be free.
61 free_iterator = *found + commit_size;
62
63 return commit;
64 }
65
66 void Free(const VKMemoryCommitImpl* commit) {
67 ASSERT(commit);
68 const auto it =
69 std::find_if(commits.begin(), commits.end(),
70 [&](const auto& stored_commit) { return stored_commit == commit; });
71 if (it == commits.end()) {
72 LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
73 UNREACHABLE();
74 return;
75 }
76 commits.erase(it);
77 }
78
79 /// Returns whether this allocation is compatible with the arguments.
80 bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
81 return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
82 (type_mask & shifted_type) != 0;
83 }
84
85private:
86 static constexpr u32 ShiftType(u32 type) {
87 return 1U << type;
88 }
89
90 /// A memory allocator, it may return a free region between "start" and "end" with the solicited
91 /// requeriments.
92 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
93 u64 iterator = start;
94 while (iterator + size < end) {
95 const u64 try_left = Common::AlignUp(iterator, alignment);
96 const u64 try_right = try_left + size;
97
98 bool overlap = false;
99 for (const auto& commit : commits) {
100 const auto [commit_left, commit_right] = commit->interval;
101 if (try_left < commit_right && commit_left < try_right) {
102 // There's an overlap, continue the search where the overlapping commit ends.
103 iterator = commit_right;
104 overlap = true;
105 break;
106 }
107 }
108 if (!overlap) {
109 // A free address has been found.
110 return try_left;
111 }
112 }
113 // No free regions where found, return an empty optional.
114 return std::nullopt;
115 }
116
117 const VKDevice& device; ///< Vulkan device.
118 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
119 const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
120 const u64 alloc_size; ///< Size of this allocation.
121 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
122 const bool is_mappable; ///< Whether the allocation is mappable.
123
124 /// Base address of the mapped pointer.
125 u8* base_address{};
126
127 /// Hints where the next free region is likely going to be.
128 u64 free_iterator{};
129
130 /// Stores all commits done from this allocation.
131 std::vector<const VKMemoryCommitImpl*> commits;
132};
133
134VKMemoryManager::VKMemoryManager(const VKDevice& device)
135 : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
136 is_memory_unified{GetMemoryUnified(props)} {}
137
138VKMemoryManager::~VKMemoryManager() = default;
139
140VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
141 ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
142
143 // When a host visible commit is asked, search for host visible and coherent, otherwise search
144 // for a fast device local type.
145 const vk::MemoryPropertyFlags wanted_properties =
146 host_visible
147 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
148 : vk::MemoryPropertyFlagBits::eDeviceLocal;
149
150 const auto TryCommit = [&]() -> VKMemoryCommit {
151 for (auto& alloc : allocs) {
152 if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
153 continue;
154
155 if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
156 return commit;
157 }
158 }
159 return {};
160 };
161
162 if (auto commit = TryCommit(); commit) {
163 return commit;
164 }
165
166 // Commit has failed, allocate more memory.
167 if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
168 // TODO(Rodrigo): Try to use host memory.
169 LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
170 UNREACHABLE();
171 }
172
173 // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
174 // there's a bug.
175 auto commit = TryCommit();
176 ASSERT(commit);
177 return commit;
178}
179
180VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
181 const auto dev = device.GetLogical();
182 const auto& dld = device.GetDispatchLoader();
183 const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
184 auto commit = Commit(requeriments, host_visible);
185 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
186 return commit;
187}
188
189VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
190 const auto dev = device.GetLogical();
191 const auto& dld = device.GetDispatchLoader();
192 const auto requeriments = dev.getImageMemoryRequirements(image, dld);
193 auto commit = Commit(requeriments, host_visible);
194 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
195 return commit;
196}
197
198bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
199 u64 size) {
200 const u32 type = [&]() {
201 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
202 const auto flags = props.memoryTypes[type_index].propertyFlags;
203 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
204 // The type matches in type and in the wanted properties.
205 return type_index;
206 }
207 }
208 LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
209 UNREACHABLE();
210 return 0u;
211 }();
212
213 const auto dev = device.GetLogical();
214 const auto& dld = device.GetDispatchLoader();
215
216 // Try to allocate found type.
217 const vk::MemoryAllocateInfo memory_ai(size, type);
218 vk::DeviceMemory memory;
219 if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
220 res != vk::Result::eSuccess) {
221 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
222 return false;
223 }
224 allocs.push_back(
225 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
226 return true;
227}
228
229/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
230 for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
231 if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
232 // Memory is considered unified when heaps are device local only.
233 return false;
234 }
235 }
236 return true;
237}
238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end)
241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this);
245}
246
247u8* VKMemoryCommitImpl::GetData() const {
248 ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
249 return data;
250}
251
252} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
new file mode 100644
index 000000000..073597b35
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <utility>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKMemoryAllocation;
17class VKMemoryCommitImpl;
18
19using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
20
21class VKMemoryManager final {
22public:
23 explicit VKMemoryManager(const VKDevice& device);
24 ~VKMemoryManager();
25
26 /**
27 * Commits a memory with the specified requeriments.
28 * @param reqs Requeriments returned from a Vulkan call.
29 * @param host_visible Signals the allocator that it *must* use host visible and coherent
30 * memory. When passing false, it will try to allocate device local memory.
31 * @returns A memory commit.
32 */
33 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
34
35 /// Commits memory required by the buffer and binds it.
36 VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
37
38 /// Commits memory required by the image and binds it.
39 VKMemoryCommit Commit(vk::Image image, bool host_visible);
40
41 /// Returns true if the memory allocations are done always in host visible and coherent memory.
42 bool IsMemoryUnified() const {
43 return is_memory_unified;
44 }
45
46private:
47 /// Allocates a chunk of memory.
48 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
49
50 /// Returns true if the device uses an unified memory model.
51 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
52
53 const VKDevice& device; ///< Device handler.
54 const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
55 const bool is_memory_unified; ///< True if memory model is unified.
56 std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
57};
58
59class VKMemoryCommitImpl final {
60 friend VKMemoryAllocation;
61
62public:
63 explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
64 u64 begin, u64 end);
65 ~VKMemoryCommitImpl();
66
67 /// Returns the writeable memory map. The commit has to be mappable.
68 u8* GetData() const;
69
70 /// Returns the Vulkan memory handler.
71 vk::DeviceMemory GetMemory() const {
72 return memory;
73 }
74
75 /// Returns the start position of the commit relative to the allocation.
76 vk::DeviceSize GetOffset() const {
77 return static_cast<vk::DeviceSize>(interval.first);
78 }
79
80private:
81 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
82 vk::DeviceMemory memory; ///< Vulkan device memory handler.
83 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
84 u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
85};
86
87} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
new file mode 100644
index 000000000..13c46e5b8
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -0,0 +1,285 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include "common/assert.h"
8#include "common/logging/log.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_resource_manager.h"
12
13namespace Vulkan {
14
15// TODO(Rodrigo): Fine tune these numbers.
16constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
17constexpr std::size_t FENCES_GROW_STEP = 0x40;
18
19class CommandBufferPool final : public VKFencedPool {
20public:
21 CommandBufferPool(const VKDevice& device)
22 : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
23
24 void Allocate(std::size_t begin, std::size_t end) override {
25 const auto dev = device.GetLogical();
26 const auto& dld = device.GetDispatchLoader();
27 const u32 graphics_family = device.GetGraphicsFamily();
28
29 auto pool = std::make_unique<Pool>();
30
31 // Command buffers are going to be commited, recorded, executed every single usage cycle.
32 // They are also going to be reseted when commited.
33 const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
34 vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
35 const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
36 pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
37
38 const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
39 vk::CommandBufferLevel::ePrimary,
40 static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
41 pool->cmdbufs =
42 dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
43
44 pools.push_back(std::move(pool));
45 }
46
47 vk::CommandBuffer Commit(VKFence& fence) {
48 const std::size_t index = CommitResource(fence);
49 const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
50 const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
51 return *pools[pool_index]->cmdbufs[sub_index];
52 }
53
54private:
55 struct Pool {
56 UniqueCommandPool handle;
57 std::vector<UniqueCommandBuffer> cmdbufs;
58 };
59
60 const VKDevice& device;
61
62 std::vector<std::unique_ptr<Pool>> pools;
63};
64
65VKResource::VKResource() = default;
66
67VKResource::~VKResource() = default;
68
69VKFence::VKFence(const VKDevice& device, UniqueFence handle)
70 : device{device}, handle{std::move(handle)} {}
71
72VKFence::~VKFence() = default;
73
74void VKFence::Wait() {
75 const auto dev = device.GetLogical();
76 const auto& dld = device.GetDispatchLoader();
77 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
78}
79
80void VKFence::Release() {
81 is_owned = false;
82}
83
84void VKFence::Commit() {
85 is_owned = true;
86 is_used = true;
87}
88
89bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
90 if (!is_used) {
91 // If a fence is not used it's always free.
92 return true;
93 }
94 if (is_owned && !owner_wait) {
95 // The fence is still being owned (Release has not been called) and ownership wait has
96 // not been asked.
97 return false;
98 }
99
100 const auto dev = device.GetLogical();
101 const auto& dld = device.GetDispatchLoader();
102 if (gpu_wait) {
103 // Wait for the fence if it has been requested.
104 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
105 } else {
106 if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
107 // Vulkan fence is not ready, not much it can do here
108 return false;
109 }
110 }
111
112 // Broadcast resources their free state.
113 for (auto* resource : protected_resources) {
114 resource->OnFenceRemoval(this);
115 }
116 protected_resources.clear();
117
118 // Prepare fence for reusage.
119 dev.resetFences({*handle}, dld);
120 is_used = false;
121 return true;
122}
123
124void VKFence::Protect(VKResource* resource) {
125 protected_resources.push_back(resource);
126}
127
128void VKFence::Unprotect(VKResource* resource) {
129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
130 ASSERT(it != protected_resources.end());
131
132 resource->OnFenceRemoval(this);
133 protected_resources.erase(it);
134}
135
136VKFenceWatch::VKFenceWatch() = default;
137
138VKFenceWatch::~VKFenceWatch() {
139 if (fence) {
140 fence->Unprotect(this);
141 }
142}
143
144void VKFenceWatch::Wait() {
145 if (fence == nullptr) {
146 return;
147 }
148 fence->Wait();
149 fence->Unprotect(this);
150}
151
152void VKFenceWatch::Watch(VKFence& new_fence) {
153 Wait();
154 fence = &new_fence;
155 fence->Protect(this);
156}
157
158bool VKFenceWatch::TryWatch(VKFence& new_fence) {
159 if (fence) {
160 return false;
161 }
162 fence = &new_fence;
163 fence->Protect(this);
164 return true;
165}
166
167void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
168 ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
169 fence = nullptr;
170}
171
172VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
173
174VKFencedPool::~VKFencedPool() = default;
175
176std::size_t VKFencedPool::CommitResource(VKFence& fence) {
177 const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
178 for (std::size_t iterator = begin; iterator < end; ++iterator) {
179 if (watches[iterator]->TryWatch(fence)) {
180 // The resource is now being watched, a free resource was successfully found.
181 return iterator;
182 }
183 }
184 return {};
185 };
186 // Try to find a free resource from the hinted position to the end.
187 auto found = Search(free_iterator, watches.size());
188 if (!found) {
189 // Search from beginning to the hinted position.
190 found = Search(0, free_iterator);
191 if (!found) {
192 // Both searches failed, the pool is full; handle it.
193 const std::size_t free_resource = ManageOverflow();
194
195 // Watch will wait for the resource to be free.
196 watches[free_resource]->Watch(fence);
197 found = free_resource;
198 }
199 }
200 // Free iterator is hinted to the resource after the one that's been commited.
201 free_iterator = (*found + 1) % watches.size();
202 return *found;
203}
204
205std::size_t VKFencedPool::ManageOverflow() {
206 const std::size_t old_capacity = watches.size();
207 Grow();
208
209 // The last entry is guaranted to be free, since it's the first element of the freshly
210 // allocated resources.
211 return old_capacity;
212}
213
214void VKFencedPool::Grow() {
215 const std::size_t old_capacity = watches.size();
216 watches.resize(old_capacity + grow_step);
217 std::generate(watches.begin() + old_capacity, watches.end(),
218 []() { return std::make_unique<VKFenceWatch>(); });
219 Allocate(old_capacity, old_capacity + grow_step);
220}
221
222VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
223 GrowFences(FENCES_GROW_STEP);
224 command_buffer_pool = std::make_unique<CommandBufferPool>(device);
225}
226
227VKResourceManager::~VKResourceManager() = default;
228
229VKFence& VKResourceManager::CommitFence() {
230 const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
231 const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
232 const auto hinted = fences.begin() + fences_iterator;
233
234 auto it = std::find_if(hinted, fences.end(), Tick);
235 if (it == fences.end()) {
236 it = std::find_if(fences.begin(), hinted, Tick);
237 if (it == hinted) {
238 return nullptr;
239 }
240 }
241 fences_iterator = std::distance(fences.begin(), it) + 1;
242 if (fences_iterator >= fences.size())
243 fences_iterator = 0;
244
245 auto& fence = *it;
246 fence->Commit();
247 return fence.get();
248 };
249
250 VKFence* found_fence = StepFences(false, false);
251 if (!found_fence) {
252 // Try again, this time waiting.
253 found_fence = StepFences(true, false);
254
255 if (!found_fence) {
256 // Allocate new fences and try again.
257 LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
258 fences.size() + FENCES_GROW_STEP);
259
260 GrowFences(FENCES_GROW_STEP);
261 found_fence = StepFences(true, false);
262 ASSERT(found_fence != nullptr);
263 }
264 }
265 return *found_fence;
266}
267
268vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
269 return command_buffer_pool->Commit(fence);
270}
271
272void VKResourceManager::GrowFences(std::size_t new_fences_count) {
273 const auto dev = device.GetLogical();
274 const auto& dld = device.GetDispatchLoader();
275 const vk::FenceCreateInfo fence_ci;
276
277 const std::size_t previous_size = fences.size();
278 fences.resize(previous_size + new_fences_count);
279
280 std::generate(fences.begin() + previous_size, fences.end(), [&]() {
281 return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
282 });
283}
284
285} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
new file mode 100644
index 000000000..08ee86fa6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -0,0 +1,180 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9#include <vector>
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14class VKDevice;
15class VKFence;
16class VKResourceManager;
17
18class CommandBufferPool;
19
20/// Interface for a Vulkan resource
21class VKResource {
22public:
23 explicit VKResource();
24 virtual ~VKResource();
25
26 /**
27 * Signals the object that an owning fence has been signaled.
28 * @param signaling_fence Fence that signals its usage end.
29 */
30 virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
31};
32
33/**
34 * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
35 * They must be commited from the resource manager. Their usage flow is: commit the fence from the
36 * resource manager, protect resources with it and use them, send the fence to an execution queue
37 * and Wait for it if needed and then call Release. Used resources will automatically be signaled
38 * when they are free to be reused.
39 * @brief Protects resources for concurrent usage and signals its release.
40 */
41class VKFence {
42 friend class VKResourceManager;
43
44public:
45 explicit VKFence(const VKDevice& device, UniqueFence handle);
46 ~VKFence();
47
48 /**
49 * Waits for the fence to be signaled.
50 * @warning You must have ownership of the fence and it has to be previously sent to a queue to
51 * call this function.
52 */
53 void Wait();
54
55 /**
56 * Releases ownership of the fence. Pass after it has been sent to an execution queue.
57 * Unmanaged usage of the fence after the call will result in undefined behavior because it may
58 * be being used for something else.
59 */
60 void Release();
61
62 /// Protects a resource with this fence.
63 void Protect(VKResource* resource);
64
65 /// Removes protection for a resource.
66 void Unprotect(VKResource* resource);
67
68 /// Retreives the fence.
69 operator vk::Fence() const {
70 return *handle;
71 }
72
73private:
74 /// Take ownership of the fence.
75 void Commit();
76
77 /**
78 * Updates the fence status.
79 * @warning Waiting for the owner might soft lock the execution.
80 * @param gpu_wait Wait for the fence to be signaled by the driver.
81 * @param owner_wait Wait for the owner to signal its freedom.
82 * @returns True if the fence is free. Waiting for gpu and owner will always return true.
83 */
84 bool Tick(bool gpu_wait, bool owner_wait);
85
86 const VKDevice& device; ///< Device handler
87 UniqueFence handle; ///< Vulkan fence
88 std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
89 bool is_owned = false; ///< The fence has been commited but not released yet.
90 bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
91};
92
93/**
94 * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
95 * resources without having to inherit VKResource from their handlers.
96 */
97class VKFenceWatch final : public VKResource {
98public:
99 explicit VKFenceWatch();
100 ~VKFenceWatch() override;
101
102 /// Waits for the fence to be released.
103 void Wait();
104
105 /**
106 * Waits for a previous fence and watches a new one.
107 * @param new_fence New fence to wait to.
108 */
109 void Watch(VKFence& new_fence);
110
111 /**
112 * Checks if it's currently being watched and starts watching it if it's available.
113 * @returns True if a watch has started, false if it's being watched.
114 */
115 bool TryWatch(VKFence& new_fence);
116
117 void OnFenceRemoval(VKFence* signaling_fence) override;
118
119private:
120 VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
121};
122
123/**
124 * Handles a pool of resources protected by fences. Manages resource overflow allocating more
125 * resources.
126 */
127class VKFencedPool {
128public:
129 explicit VKFencedPool(std::size_t grow_step);
130 virtual ~VKFencedPool();
131
132protected:
133 /**
134 * Commits a free resource and protects it with a fence. It may allocate new resources.
135 * @param fence Fence that protects the commited resource.
136 * @returns Index of the resource commited.
137 */
138 std::size_t CommitResource(VKFence& fence);
139
140 /// Called when a chunk of resources have to be allocated.
141 virtual void Allocate(std::size_t begin, std::size_t end) = 0;
142
143private:
144 /// Manages pool overflow allocating new resources.
145 std::size_t ManageOverflow();
146
147 /// Allocates a new page of resources.
148 void Grow();
149
150 std::size_t grow_step = 0; ///< Number of new resources created after an overflow
151 std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
152 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
153};
154
155/**
156 * The resource manager handles all resources that can be protected with a fence avoiding
157 * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
158 */
159class VKResourceManager final {
160public:
161 explicit VKResourceManager(const VKDevice& device);
162 ~VKResourceManager();
163
164 /// Commits a fence. It has to be sent to a queue and released.
165 VKFence& CommitFence();
166
167 /// Commits an unused command buffer and protects it with a fence.
168 vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
169
170private:
171 /// Allocates new fences.
172 void GrowFences(std::size_t new_fences_count);
173
174 const VKDevice& device; ///< Device handler.
175 std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
176 std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
177 std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
178};
179
180} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
new file mode 100644
index 000000000..ed3178f09
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <optional>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/cityhash.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
13#include "video_core/renderer_vulkan/vk_sampler_cache.h"
14#include "video_core/textures/texture.h"
15
16namespace Vulkan {
17
18static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return vk::BorderColor::eFloatTransparentBlack;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return vk::BorderColor::eFloatOpaqueBlack;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return vk::BorderColor::eFloatOpaqueWhite;
26 } else {
27 return {};
28 }
29}
30
31std::size_t SamplerCacheKey::Hash() const {
32 static_assert(sizeof(raw) % sizeof(u64) == 0);
33 return static_cast<std::size_t>(
34 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
35}
36
37bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
38 return raw == rhs.raw;
39}
40
41VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
42
43VKSamplerCache::~VKSamplerCache() = default;
44
45vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
46 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
47 auto& sampler = entry->second;
48 if (is_cache_miss) {
49 sampler = CreateSampler(tsc);
50 }
51 return *sampler;
52}
53
54UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
55 const float max_anisotropy = tsc.GetMaxAnisotropy();
56 const bool has_anisotropy = max_anisotropy > 1.0f;
57
58 const auto border_color = tsc.GetBorderColor();
59 const auto vk_border_color = TryConvertBorderColor(border_color);
60 UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
61 border_color[0], border_color[1], border_color[2], border_color[3]);
62
63 constexpr bool unnormalized_coords = false;
64
65 const vk::SamplerCreateInfo sampler_ci(
66 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
67 MaxwellToVK::Sampler::Filter(tsc.min_filter),
68 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
69 MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
70 MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
71 max_anisotropy, tsc.depth_compare_enabled,
72 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
73 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
74 unnormalized_coords);
75
76 const auto& dld = device.GetDispatchLoader();
77 const auto dev = device.GetLogical();
78 return dev.createSamplerUnique(sampler_ci, nullptr, dld);
79}
80
81} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
new file mode 100644
index 000000000..c6394dc87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/textures/texture.h"
12
13namespace Vulkan {
14
15class VKDevice;
16
17struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
18 std::size_t Hash() const;
19
20 bool operator==(const SamplerCacheKey& rhs) const;
21
22 bool operator!=(const SamplerCacheKey& rhs) const {
23 return !operator==(rhs);
24 }
25};
26
27} // namespace Vulkan
28
29namespace std {
30
31template <>
32struct hash<Vulkan::SamplerCacheKey> {
33 std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
34 return k.Hash();
35 }
36};
37
38} // namespace std
39
40namespace Vulkan {
41
42class VKSamplerCache {
43public:
44 explicit VKSamplerCache(const VKDevice& device);
45 ~VKSamplerCache();
46
47 vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
48
49private:
50 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
51
52 const VKDevice& device;
53 std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
54};
55
56} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
new file mode 100644
index 000000000..f1fea1871
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/renderer_vulkan/declarations.h"
8#include "video_core/renderer_vulkan/vk_device.h"
9#include "video_core/renderer_vulkan/vk_resource_manager.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h"
11
12namespace Vulkan {
13
14VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
15 : device{device}, resource_manager{resource_manager} {
16 next_fence = &resource_manager.CommitFence();
17 AllocateNewContext();
18}
19
20VKScheduler::~VKScheduler() = default;
21
22VKExecutionContext VKScheduler::GetExecutionContext() const {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore);
28 current_fence->Release();
29 AllocateNewContext();
30 return GetExecutionContext();
31}
32
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
34 SubmitExecution(semaphore);
35 current_fence->Wait();
36 current_fence->Release();
37 AllocateNewContext();
38 return GetExecutionContext();
39}
40
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
42 const auto& dld = device.GetDispatchLoader();
43 current_cmdbuf.end(dld);
44
45 const auto queue = device.GetGraphicsQueue();
46 const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
47 &semaphore);
48 queue.submit({submit_info}, *current_fence, dld);
49}
50
51void VKScheduler::AllocateNewContext() {
52 current_fence = next_fence;
53 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
54 next_fence = &resource_manager.CommitFence();
55
56 const auto& dld = device.GetDispatchLoader();
57 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
58}
59
60} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
new file mode 100644
index 000000000..cfaf5376f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/declarations.h"
9
10namespace Vulkan {
11
12class VKDevice;
13class VKExecutionContext;
14class VKFence;
15class VKResourceManager;
16
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler {
20public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler();
23
24 /// Gets the current execution context.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const;
26
27 /// Sends the current execution context to the GPU. It invalidates the current execution context
28 /// and returns a new one.
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
32 /// the current execution context and returns a new one.
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34
35private:
36 void SubmitExecution(vk::Semaphore semaphore);
37
38 void AllocateNewContext();
39
40 const VKDevice& device;
41 VKResourceManager& resource_manager;
42 vk::CommandBuffer current_cmdbuf;
43 VKFence* current_fence = nullptr;
44 VKFence* next_fence = nullptr;
45};
46
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
27 pipeline_stage} {
28 CreateBuffers(memory_manager, usage);
29 ReserveWatches(WATCHES_INITIAL_RESERVE);
30}
31
32VKStreamBuffer::~VKStreamBuffer() = default;
33
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
35 ASSERT(size <= buffer_size);
36 mapped_size = size;
37
38 if (offset + size > buffer_size) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and
40 // reset the state.
41 invalidation_mark = used_watches;
42 used_watches = 0;
43 offset = 0;
44 }
45
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47}
48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51
52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59
60 if (used_watches + 1 >= watches.size()) {
61 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 }
64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence());
66
67 offset += size;
68
69 return exctx;
70}
71
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
73 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
74 nullptr);
75
76 const auto dev = device.GetLogical();
77 const auto& dld = device.GetDispatchLoader();
78 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
79 commit = memory_manager.Commit(*buffer, true);
80 mapped_pointer = commit->GetData();
81}
82
83void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
84 const std::size_t previous_size = watches.size();
85 watches.resize(previous_size + grow_size);
86 std::generate(watches.begin() + previous_size, watches.end(),
87 []() { return std::make_unique<VKFenceWatch>(); });
88}
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <tuple>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler;
23
24class VKStreamBuffer {
25public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer();
30
31 /**
32 * Reserves a region of memory from the stream buffer.
33 * @param size Size to reserve.
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated.
36 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
41
42 vk::Buffer GetBuffer() const {
43 return *buffer;
44 }
45
46private:
47 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
49
50 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size);
52
53 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58
59 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62
63 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy.
65
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t>
69 invalidation_mark{}; ///< Number of watches used in the current invalidation.
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
new file mode 100644
index 000000000..08279e562
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -0,0 +1,210 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <limits>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/logging/log.h"
12#include "core/core.h"
13#include "core/frontend/framebuffer_layout.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_swapchain.h"
18
19namespace Vulkan {
20
21namespace {
22vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
23 if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
24 return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
25 }
26 const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
27 return format.format == vk::Format::eB8G8R8A8Unorm &&
28 format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
29 });
30 return found != formats.end() ? *found : formats[0];
31}
32
33vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) {
34 // Mailbox doesn't lock the application like fifo (vsync), prefer it
35 const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) {
36 return mode == vk::PresentModeKHR::eMailbox;
37 });
38 return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
39}
40
41vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
42 u32 height) {
43 constexpr auto undefined_size{std::numeric_limits<u32>::max()};
44 if (capabilities.currentExtent.width != undefined_size) {
45 return capabilities.currentExtent;
46 }
47 vk::Extent2D extent = {width, height};
48 extent.width = std::max(capabilities.minImageExtent.width,
49 std::min(capabilities.maxImageExtent.width, extent.width));
50 extent.height = std::max(capabilities.minImageExtent.height,
51 std::min(capabilities.maxImageExtent.height, extent.height));
52 return extent;
53}
54} // namespace
55
56VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
57 : surface{surface}, device{device} {}
58
59VKSwapchain::~VKSwapchain() = default;
60
61void VKSwapchain::Create(u32 width, u32 height) {
62 const auto dev = device.GetLogical();
63 const auto& dld = device.GetDispatchLoader();
64 const auto physical_device = device.GetPhysical();
65
66 const vk::SurfaceCapabilitiesKHR capabilities{
67 physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
68 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
69 return;
70 }
71
72 dev.waitIdle(dld);
73 Destroy();
74
75 CreateSwapchain(capabilities, width, height);
76 CreateSemaphores();
77 CreateImageViews();
78
79 fences.resize(image_count, nullptr);
80}
81
82void VKSwapchain::AcquireNextImage() {
83 const auto dev{device.GetLogical()};
84 const auto& dld{device.GetDispatchLoader()};
85 dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
86 *present_semaphores[frame_index], {}, &image_index, dld);
87
88 if (auto& fence = fences[image_index]; fence) {
89 fence->Wait();
90 fence->Release();
91 fence = nullptr;
92 }
93}
94
95bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
96 const vk::Semaphore present_semaphore{*present_semaphores[frame_index]};
97 const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore};
98 const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
99 const auto& dld{device.GetDispatchLoader()};
100 const auto present_queue{device.GetPresentQueue()};
101 bool recreated = false;
102
103 const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1,
104 &swapchain.get(), &image_index, {});
105 switch (const auto result = present_queue.presentKHR(&present_info, dld); result) {
106 case vk::Result::eSuccess:
107 break;
108 case vk::Result::eErrorOutOfDateKHR:
109 if (current_width > 0 && current_height > 0) {
110 Create(current_width, current_height);
111 recreated = true;
112 }
113 break;
114 default:
115 LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!",
116 vk::to_string(result));
117 UNREACHABLE();
118 }
119
120 ASSERT(fences[image_index] == nullptr);
121 fences[image_index] = &fence;
122 frame_index = (frame_index + 1) % image_count;
123 return recreated;
124}
125
126bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
127 // TODO(Rodrigo): Handle framebuffer pixel format changes
128 return framebuffer.width != current_width || framebuffer.height != current_height;
129}
130
131void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
132 u32 height) {
133 const auto dev{device.GetLogical()};
134 const auto& dld{device.GetDispatchLoader()};
135 const auto physical_device{device.GetPhysical()};
136
137 const std::vector<vk::SurfaceFormatKHR> formats{
138 physical_device.getSurfaceFormatsKHR(surface, dld)};
139
140 const std::vector<vk::PresentModeKHR> present_modes{
141 physical_device.getSurfacePresentModesKHR(surface, dld)};
142
143 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
144 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
145 extent = ChooseSwapExtent(capabilities, width, height);
146
147 current_width = extent.width;
148 current_height = extent.height;
149
150 u32 requested_image_count{capabilities.minImageCount + 1};
151 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
152 requested_image_count = capabilities.maxImageCount;
153 }
154
155 vk::SwapchainCreateInfoKHR swapchain_ci(
156 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
157 extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
158 capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
159 {});
160
161 const u32 graphics_family{device.GetGraphicsFamily()};
162 const u32 present_family{device.GetPresentFamily()};
163 const std::array<u32, 2> queue_indices{graphics_family, present_family};
164 if (graphics_family != present_family) {
165 swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent;
166 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
167 swapchain_ci.pQueueFamilyIndices = queue_indices.data();
168 } else {
169 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
170 }
171
172 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
173
174 images = dev.getSwapchainImagesKHR(*swapchain, dld);
175 image_count = static_cast<u32>(images.size());
176 image_format = surface_format.format;
177}
178
179void VKSwapchain::CreateSemaphores() {
180 const auto dev{device.GetLogical()};
181 const auto& dld{device.GetDispatchLoader()};
182
183 present_semaphores.resize(image_count);
184 for (std::size_t i = 0; i < image_count; i++) {
185 present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
186 }
187}
188
189void VKSwapchain::CreateImageViews() {
190 const auto dev{device.GetLogical()};
191 const auto& dld{device.GetDispatchLoader()};
192
193 image_views.resize(image_count);
194 for (std::size_t i = 0; i < image_count; i++) {
195 const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D,
196 image_format, {},
197 {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
198 image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
199 }
200}
201
202void VKSwapchain::Destroy() {
203 frame_index = 0;
204 present_semaphores.clear();
205 framebuffers.clear();
206 image_views.clear();
207 swapchain.reset();
208}
209
210} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
new file mode 100644
index 000000000..2ad84f185
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Layout {
13struct FramebufferLayout;
14}
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20
21class VKSwapchain {
22public:
23 explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device);
24 ~VKSwapchain();
25
26 /// Creates (or recreates) the swapchain with a given size.
27 void Create(u32 width, u32 height);
28
29 /// Acquires the next image in the swapchain, waits as needed.
30 void AcquireNextImage();
31
32 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
33 /// recreated. Takes responsability for the ownership of fence.
34 bool Present(vk::Semaphore render_semaphore, VKFence& fence);
35
36 /// Returns true when the framebuffer layout has changed.
37 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
38
39 const vk::Extent2D& GetSize() const {
40 return extent;
41 }
42
43 u32 GetImageCount() const {
44 return image_count;
45 }
46
47 u32 GetImageIndex() const {
48 return image_index;
49 }
50
51 vk::Image GetImageIndex(u32 index) const {
52 return images[index];
53 }
54
55 vk::ImageView GetImageViewIndex(u32 index) const {
56 return *image_views[index];
57 }
58
59 vk::Format GetImageFormat() const {
60 return image_format;
61 }
62
63private:
64 void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
65 void CreateSemaphores();
66 void CreateImageViews();
67
68 void Destroy();
69
70 const vk::SurfaceKHR surface;
71 const VKDevice& device;
72
73 UniqueSwapchainKHR swapchain;
74
75 u32 image_count{};
76 std::vector<vk::Image> images;
77 std::vector<UniqueImageView> image_views;
78 std::vector<UniqueFramebuffer> framebuffers;
79 std::vector<VKFence*> fences;
80 std::vector<UniqueSemaphore> present_semaphores;
81
82 u32 image_index{};
83 u32 frame_index{};
84
85 vk::Format image_format{};
86 vk::Extent2D extent{};
87
88 u32 current_width{};
89 u32 current_height{};
90};
91
92} // namespace Vulkan
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 740ac3118..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
168 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
169 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
170 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 38bb692d6..9fd4b273e 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
41 41
42 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); 42 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
43 43
44 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); 44 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
45 SetRegister(bb, instr.gpr0, value); 45 SetRegister(bb, instr.gpr0, value);
46 break; 46 break;
47 } 47 }
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod
284 SetRegister(bb, dest, value); 284 SetRegister(bb, dest, value);
285} 285}
286 286
287} // namespace VideoCommon::Shader \ No newline at end of file 287} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index a992f73f8..55a6fbbf2 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
118 118
119 value = [&]() { 119 value = [&]() {
120 switch (instr.conversion.f2i.rounding) { 120 switch (instr.conversion.f2i.rounding) {
121 case Tegra::Shader::F2iRoundingOp::None: 121 case Tegra::Shader::F2iRoundingOp::RoundEven:
122 return value; 122 return Operation(OperationCode::FRoundEven, PRECISE, value);
123 case Tegra::Shader::F2iRoundingOp::Floor: 123 case Tegra::Shader::F2iRoundingOp::Floor:
124 return Operation(OperationCode::FFloor, PRECISE, value); 124 return Operation(OperationCode::FFloor, PRECISE, value);
125 case Tegra::Shader::F2iRoundingOp::Ceil: 125 case Tegra::Shader::F2iRoundingOp::Ceil:
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
146 return pc; 146 return pc;
147} 147}
148 148
149} // namespace VideoCommon::Shader \ No newline at end of file 149} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e006f8138..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
@@ -48,7 +30,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
48 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, 30 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
49 "Unaligned attribute loads are not supported"); 31 "Unaligned attribute loads are not supported");
50 32
51 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, 33 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
52 Tegra::Shader::IpaSampleMode::Default}; 34 Tegra::Shader::IpaSampleMode::Default};
53 35
54 u64 next_element = instr.attribute.fmt20.element; 36 u64 next_element = instr.attribute.fmt20.element;
@@ -247,197 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
247 } 229 }
248 break; 230 break;
249 } 231 }
250 case OpCode::Id::TEX: {
251 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
252 "AOFFI is not implemented");
253
254 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
255 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
256 }
257
258 const TextureType texture_type{instr.tex.texture_type};
259 const bool is_array = instr.tex.array != 0;
260 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
261 const auto process_mode = instr.tex.GetTextureProcessMode();
262 WriteTexInstructionFloat(
263 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
264 break;
265 }
266 case OpCode::Id::TEXS: {
267 const TextureType texture_type{instr.texs.GetTextureType()};
268 const bool is_array{instr.texs.IsArrayTexture()};
269 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
270 const auto process_mode = instr.texs.GetTextureProcessMode();
271
272 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
273 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
274 }
275
276 const Node4 components =
277 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
278
279 if (instr.texs.fp32_flag) {
280 WriteTexsInstructionFloat(bb, instr, components);
281 } else {
282 WriteTexsInstructionHalfFloat(bb, instr, components);
283 }
284 break;
285 }
286 case OpCode::Id::TLD4: {
287 ASSERT(instr.tld4.array == 0);
288 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
289 "AOFFI is not implemented");
290 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
291 "NDV is not implemented");
292 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
293 "PTP is not implemented");
294
295 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
296 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
297 }
298
299 const auto texture_type = instr.tld4.texture_type.Value();
300 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
301 const bool is_array = instr.tld4.array != 0;
302 WriteTexInstructionFloat(bb, instr,
303 GetTld4Code(instr, texture_type, depth_compare, is_array));
304 break;
305 }
306 case OpCode::Id::TLD4S: {
307 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
308 "AOFFI is not implemented");
309
310 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
311 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
312 }
313
314 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
315 const Node op_a = GetRegister(instr.gpr8);
316 const Node op_b = GetRegister(instr.gpr20);
317
318 std::vector<Node> coords;
319
320 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
321 if (depth_compare) {
322 // Note: TLD4S coordinate encoding works just like TEXS's
323 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
324 coords.push_back(op_a);
325 coords.push_back(op_y);
326 coords.push_back(op_b);
327 } else {
328 coords.push_back(op_a);
329 coords.push_back(op_b);
330 }
331 const auto num_coords = static_cast<u32>(coords.size());
332 coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
333
334 const auto& sampler =
335 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
336
337 Node4 values;
338 for (u32 element = 0; element < values.size(); ++element) {
339 auto params = coords;
340 MetaTexture meta{sampler, element, num_coords};
341 values[element] =
342 Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
343 }
344
345 WriteTexsInstructionFloat(bb, instr, values);
346 break;
347 }
348 case OpCode::Id::TXQ: {
349 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
350 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
351 }
352
353 // TODO: The new commits on the texture refactor, change the way samplers work.
354 // Sadly, not all texture instructions specify the type of texture their sampler
355 // uses. This must be fixed at a later instance.
356 const auto& sampler =
357 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
358
359 u32 indexer = 0;
360 switch (instr.txq.query_type) {
361 case Tegra::Shader::TextureQueryType::Dimension: {
362 for (u32 element = 0; element < 4; ++element) {
363 if (instr.txq.IsComponentEnabled(element)) {
364 MetaTexture meta{sampler, element};
365 const Node value = Operation(OperationCode::F4TextureQueryDimensions,
366 std::move(meta), GetRegister(instr.gpr8));
367 SetTemporal(bb, indexer++, value);
368 }
369 }
370 for (u32 i = 0; i < indexer; ++i) {
371 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
372 }
373 break;
374 }
375 default:
376 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
377 static_cast<u32>(instr.txq.query_type.Value()));
378 }
379 break;
380 }
381 case OpCode::Id::TMML: {
382 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
383 "NDV is not implemented");
384
385 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
386 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
387 }
388
389 auto texture_type = instr.tmml.texture_type.Value();
390 const bool is_array = instr.tmml.array != 0;
391 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
392
393 std::vector<Node> coords;
394
395 // TODO: Add coordinates for different samplers once other texture types are implemented.
396 switch (texture_type) {
397 case TextureType::Texture1D:
398 coords.push_back(GetRegister(instr.gpr8));
399 break;
400 case TextureType::Texture2D:
401 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
402 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
403 break;
404 default:
405 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
406
407 // Fallback to interpreting as a 2D texture for now
408 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
409 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
410 texture_type = TextureType::Texture2D;
411 }
412
413 for (u32 element = 0; element < 2; ++element) {
414 auto params = coords;
415 MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
416 const Node value =
417 Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
418 SetTemporal(bb, element, value);
419 }
420 for (u32 element = 0; element < 2; ++element) {
421 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
422 }
423
424 break;
425 }
426 case OpCode::Id::TLDS: {
427 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
428 const bool is_array{instr.tlds.IsArrayTexture()};
429
430 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
431 "AOFFI is not implemented");
432 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
433
434 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
435 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
436 }
437
438 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
439 break;
440 }
441 default: 232 default:
442 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
443 } 234 }
@@ -445,327 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
445 return pc; 236 return pc;
446} 237}
447 238
448const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
449 bool is_array, bool is_shadow) {
450 const auto offset = static_cast<std::size_t>(sampler.index.Value());
451
452 // If this sampler has already been used, return the existing mapping.
453 const auto itr =
454 std::find_if(used_samplers.begin(), used_samplers.end(),
455 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
456 if (itr != used_samplers.end()) {
457 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
458 itr->IsShadow() == is_shadow);
459 return *itr;
460 }
461
462 // Otherwise create a new mapping for this sampler
463 const std::size_t next_index = used_samplers.size();
464 const Sampler entry{offset, next_index, type, is_array, is_shadow};
465 return *used_samplers.emplace(entry).first;
466}
467
468void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
469 u32 dest_elem = 0;
470 for (u32 elem = 0; elem < 4; ++elem) {
471 if (!instr.tex.IsComponentEnabled(elem)) {
472 // Skip disabled components
473 continue;
474 }
475 SetTemporal(bb, dest_elem++, components[elem]);
476 }
477 // After writing values in temporals, move them to the real registers
478 for (u32 i = 0; i < dest_elem; ++i) {
479 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
480 }
481}
482
483void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
484 const Node4& components) {
485 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
486 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
487
488 u32 dest_elem = 0;
489 for (u32 component = 0; component < 4; ++component) {
490 if (!instr.texs.IsComponentEnabled(component))
491 continue;
492 SetTemporal(bb, dest_elem++, components[component]);
493 }
494
495 for (u32 i = 0; i < dest_elem; ++i) {
496 if (i < 2) {
497 // Write the first two swizzle components to gpr0 and gpr0+1
498 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
499 } else {
500 ASSERT(instr.texs.HasTwoDestinations());
501 // Write the rest of the swizzle components to gpr28 and gpr28+1
502 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
503 }
504 }
505}
506
507void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
508 const Node4& components) {
509 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
510 // float instruction).
511
512 Node4 values;
513 u32 dest_elem = 0;
514 for (u32 component = 0; component < 4; ++component) {
515 if (!instr.texs.IsComponentEnabled(component))
516 continue;
517 values[dest_elem++] = components[component];
518 }
519 if (dest_elem == 0)
520 return;
521
522 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
523
524 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
525 if (dest_elem <= 2) {
526 SetRegister(bb, instr.gpr0, first_value);
527 return;
528 }
529
530 SetTemporal(bb, 0, first_value);
531 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
532
533 SetRegister(bb, instr.gpr0, GetTemporal(0));
534 SetRegister(bb, instr.gpr28, GetTemporal(1));
535}
536
537Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
538 TextureProcessMode process_mode, bool depth_compare, bool is_array,
539 std::size_t array_offset, std::size_t bias_offset,
540 std::vector<Node>&& coords) {
541 UNIMPLEMENTED_IF_MSG(
542 (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
543 (texture_type == TextureType::TextureCube && is_array && depth_compare),
544 "This method is not supported.");
545
546 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
547
548 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
549 process_mode == TextureProcessMode::LL ||
550 process_mode == TextureProcessMode::LLA;
551
552 // LOD selection (either via bias or explicit textureLod) not supported in GL for
553 // sampler2DArrayShadow and samplerCubeArrayShadow.
554 const bool gl_lod_supported =
555 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
556 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
557
558 const OperationCode read_method =
559 lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
560
561 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
562
563 std::optional<u32> array_offset_value;
564 if (is_array)
565 array_offset_value = static_cast<u32>(array_offset);
566
567 const auto coords_count = static_cast<u32>(coords.size());
568
569 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
570 if (process_mode == TextureProcessMode::LZ) {
571 coords.push_back(Immediate(0.0f));
572 } else {
573 // If present, lod or bias are always stored in the register indexed by the gpr20
574 // field with an offset depending on the usage of the other registers
575 coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
576 }
577 }
578
579 Node4 values;
580 for (u32 element = 0; element < values.size(); ++element) {
581 auto params = coords;
582 MetaTexture meta{sampler, element, coords_count, array_offset_value};
583 values[element] = Operation(read_method, std::move(meta), std::move(params));
584 }
585
586 return values;
587}
588
589Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
590 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
591 const bool lod_bias_enabled =
592 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
593
594 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
595 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
596 // If enabled arrays index is always stored in the gpr8 field
597 const u64 array_register = instr.gpr8.Value();
598 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
599 const u64 coord_register = array_register + (is_array ? 1 : 0);
600
601 std::vector<Node> coords;
602 for (std::size_t i = 0; i < coord_count; ++i) {
603 coords.push_back(GetRegister(coord_register + i));
604 }
605 // 1D.DC in opengl the 2nd component is ignored.
606 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
607 coords.push_back(Immediate(0.0f));
608 }
609 std::size_t array_offset{};
610 if (is_array) {
611 array_offset = coords.size();
612 coords.push_back(GetRegister(array_register));
613 }
614 if (depth_compare) {
615 // Depth is always stored in the register signaled by gpr20
616 // or in the next register if lod or bias are used
617 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
618 coords.push_back(GetRegister(depth_register));
619 }
620 // Fill ignored coordinates
621 while (coords.size() < total_coord_count) {
622 coords.push_back(Immediate(0));
623 }
624
625 return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
626 0, std::move(coords));
627}
628
629Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
630 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
631 const bool lod_bias_enabled =
632 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
633
634 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
635 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
636 // If enabled arrays index is always stored in the gpr8 field
637 const u64 array_register = instr.gpr8.Value();
638 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
639 const u64 coord_register = array_register + (is_array ? 1 : 0);
640 const u64 last_coord_register =
641 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
642 ? static_cast<u64>(instr.gpr20.Value())
643 : coord_register + 1;
644
645 std::vector<Node> coords;
646 for (std::size_t i = 0; i < coord_count; ++i) {
647 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
648 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
649 }
650
651 std::size_t array_offset{};
652 if (is_array) {
653 array_offset = coords.size();
654 coords.push_back(GetRegister(array_register));
655 }
656 if (depth_compare) {
657 // Depth is always stored in the register signaled by gpr20
658 // or in the next register if lod or bias are used
659 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
660 coords.push_back(GetRegister(depth_register));
661 }
662 // Fill ignored coordinates
663 while (coords.size() < total_coord_count) {
664 coords.push_back(Immediate(0));
665 }
666
667 return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
668 (coord_count > 2 ? 1 : 0), std::move(coords));
669}
670
671Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
672 bool is_array) {
673 const std::size_t coord_count = GetCoordCount(texture_type);
674 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
675 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
676
677 // If enabled arrays index is always stored in the gpr8 field
678 const u64 array_register = instr.gpr8.Value();
679 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
680 const u64 coord_register = array_register + (is_array ? 1 : 0);
681
682 std::vector<Node> coords;
683
684 for (size_t i = 0; i < coord_count; ++i) {
685 coords.push_back(GetRegister(coord_register + i));
686 }
687 std::optional<u32> array_offset;
688 if (is_array) {
689 array_offset = static_cast<u32>(coords.size());
690 coords.push_back(GetRegister(array_register));
691 }
692
693 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
694
695 Node4 values;
696 for (u32 element = 0; element < values.size(); ++element) {
697 auto params = coords;
698 MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
699 values[element] =
700 Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
701 }
702
703 return values;
704}
705
706Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
707 const std::size_t type_coord_count = GetCoordCount(texture_type);
708 const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
709 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
710
711 // If enabled arrays index is always stored in the gpr8 field
712 const u64 array_register = instr.gpr8.Value();
713 // if is array gpr20 is used
714 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
715
716 const u64 last_coord_register =
717 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
718 ? static_cast<u64>(instr.gpr20.Value())
719 : coord_register + 1;
720
721 std::vector<Node> coords;
722
723 for (std::size_t i = 0; i < type_coord_count; ++i) {
724 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
725 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
726 }
727 std::optional<u32> array_offset;
728 if (is_array) {
729 array_offset = static_cast<u32>(coords.size());
730 coords.push_back(GetRegister(array_register));
731 }
732 const auto coords_count = static_cast<u32>(coords.size());
733
734 if (lod_enabled) {
735 // When lod is used always is in grp20
736 coords.push_back(GetRegister(instr.gpr20));
737 } else {
738 coords.push_back(Immediate(0));
739 }
740
741 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
742
743 Node4 values;
744 for (u32 element = 0; element < values.size(); ++element) {
745 auto params = coords;
746 MetaTexture meta{sampler, element, coords_count, array_offset};
747 values[element] =
748 Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
749 }
750 return values;
751}
752
753std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
754 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
755 std::size_t max_coords, std::size_t max_inputs) {
756 const std::size_t coord_count = GetCoordCount(texture_type);
757
758 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
759 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
760 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
761 UNIMPLEMENTED_MSG("Unsupported Texture operation");
762 total_coord_count = std::min(total_coord_count, max_coords);
763 }
764 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
765 total_coord_count +=
766 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
767
768 return {coord_count, total_coord_count};
769}
770
771} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index f9502e3d0..d750a2936 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
135 instr.ipa.sample_mode.Value()}; 135 instr.ipa.sample_mode.Value()};
136 136
137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); 137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
138 const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); 138 Node value = attr;
139 const Tegra::Shader::Attribute::Index index = attribute.index.Value();
140 if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
141 index <= Tegra::Shader::Attribute::Index::Attribute_31) {
142 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
143 // In theory by setting them as perspective, OpenGL does the perspective correction.
144 // A way must figured to reverse the last step of it.
145 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
146 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
147 }
148 }
149 value = GetSaturatedFloat(value, instr.ipa.saturate);
139 150
140 SetRegister(bb, instr.gpr0, value); 151 SetRegister(bb, instr.gpr0, value);
141 break; 152 break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
175 return pc; 186 return pc;
176} 187}
177 188
178} // namespace VideoCommon::Shader \ No newline at end of file 189} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a99ae19bf
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,534 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) {
329 const bool is_array = array;
330 const bool is_shadow = depth_compare;
331
332 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
333 (texture_type == TextureType::TextureCube && is_array && is_shadow),
334 "This method is not supported.");
335
336 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
337
338 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
339 process_mode == TextureProcessMode::LL ||
340 process_mode == TextureProcessMode::LLA;
341
342 // LOD selection (either via bias or explicit textureLod) not supported in GL for
343 // sampler2DArrayShadow and samplerCubeArrayShadow.
344 const bool gl_lod_supported =
345 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
346 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
347
348 const OperationCode read_method =
349 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
350
351 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
352
353 Node bias = {};
354 Node lod = {};
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 switch (process_mode) {
357 case TextureProcessMode::LZ:
358 lod = Immediate(0.0f);
359 break;
360 case TextureProcessMode::LB:
361 // If present, lod or bias are always stored in the register indexed by the gpr20
362 // field with an offset depending on the usage of the other registers
363 bias = GetRegister(instr.gpr20.Value() + bias_offset);
364 break;
365 case TextureProcessMode::LL:
366 lod = GetRegister(instr.gpr20.Value() + bias_offset);
367 break;
368 default:
369 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
370 break;
371 }
372 }
373
374 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 }
380
381 return values;
382}
383
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
386 const bool lod_bias_enabled =
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
388
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
391 // If enabled arrays index is always stored in the gpr8 field
392 const u64 array_register = instr.gpr8.Value();
393 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
394 const u64 coord_register = array_register + (is_array ? 1 : 0);
395
396 std::vector<Node> coords;
397 for (std::size_t i = 0; i < coord_count; ++i) {
398 coords.push_back(GetRegister(coord_register + i));
399 }
400 // 1D.DC in OpenGL the 2nd component is ignored.
401 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
402 coords.push_back(Immediate(0.0f));
403 }
404
405 const Node array = is_array ? GetRegister(array_register) : nullptr;
406
407 Node dc{};
408 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
412 dc = GetRegister(depth_register);
413 }
414
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
416}
417
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
419 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
420 const bool lod_bias_enabled =
421 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
422
423 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
424 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
425 // If enabled arrays index is always stored in the gpr8 field
426 const u64 array_register = instr.gpr8.Value();
427 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
428 const u64 coord_register = array_register + (is_array ? 1 : 0);
429 const u64 last_coord_register =
430 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
431 ? static_cast<u64>(instr.gpr20.Value())
432 : coord_register + 1;
433 const u32 bias_offset = coord_count > 2 ? 1 : 0;
434
435 std::vector<Node> coords;
436 for (std::size_t i = 0; i < coord_count; ++i) {
437 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
438 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
439 }
440
441 const Node array = is_array ? GetRegister(array_register) : nullptr;
442
443 Node dc{};
444 if (depth_compare) {
445 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
446 // or bias are used
447 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
448 dc = GetRegister(depth_register);
449 }
450
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
452}
453
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) {
456 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
459
460 // If enabled arrays index is always stored in the gpr8 field
461 const u64 array_register = instr.gpr8.Value();
462 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
463 const u64 coord_register = array_register + (is_array ? 1 : 0);
464
465 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i)
467 coords.push_back(GetRegister(coord_register + i));
468
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470
471 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 }
477
478 return values;
479}
480
481Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
482 const std::size_t type_coord_count = GetCoordCount(texture_type);
483 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
484
485 // If enabled arrays index is always stored in the gpr8 field
486 const u64 array_register = instr.gpr8.Value();
487 // if is array gpr20 is used
488 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
489
490 const u64 last_coord_register =
491 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
492 ? static_cast<u64>(instr.gpr20.Value())
493 : coord_register + 1;
494
495 std::vector<Node> coords;
496 for (std::size_t i = 0; i < type_coord_count; ++i) {
497 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
498 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
499 }
500
501 const Node array = is_array ? GetRegister(array_register) : nullptr;
502 // When lod is used always is in gpr20
503 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
504
505 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
506
507 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 }
513 return values;
514}
515
516std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
517 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
518 std::size_t max_coords, std::size_t max_inputs) {
519 const std::size_t coord_count = GetCoordCount(texture_type);
520
521 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
522 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
523 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
524 UNIMPLEMENTED_MSG("Unsupported Texture operation");
525 total_coord_count = std::min(total_coord_count, max_coords);
526 }
527 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
528 total_coord_count +=
529 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
530
531 return {coord_count, total_coord_count};
532}
533
534} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1d4fbef53..5bc3a3900 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -156,12 +156,12 @@ enum class OperationCode {
156 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 156 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
157 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 157 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
158 158
159 F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 159 Texture, /// (MetaTexture, float[N] coords) -> float4
160 F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 160 TextureLod, /// (MetaTexture, float[N] coords) -> float4
161 F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 161 TextureGather, /// (MetaTexture, float[N] coords) -> float4
162 F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 162 TextureQueryDimensions, /// (MetaTexture, float a) -> float4
163 F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 163 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
164 F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 164 TexelFetch, /// (MetaTexture, int[N], int) -> float4
165 165
166 Branch, /// (uint branch_target) -> void 166 Branch, /// (uint branch_target) -> void
167 PushFlowStack, /// (uint branch_target) -> void 167 PushFlowStack, /// (uint branch_target) -> void
@@ -288,9 +288,12 @@ struct MetaHalfArithmetic {
288 288
289struct MetaTexture { 289struct MetaTexture {
290 const Sampler& sampler; 290 const Sampler& sampler;
291 Node array{};
292 Node depth_compare{};
293 Node bias{};
294 Node lod{};
295 Node component{};
291 u32 element{}; 296 u32 element{};
292 u32 coords_count{};
293 std::optional<u32> array_index;
294}; 297};
295 298
296constexpr MetaArithmetic PRECISE = {true}; 299constexpr MetaArithmetic PRECISE = {true};
@@ -613,6 +616,7 @@ private:
613 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 616 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
614 u32 DecodeConversion(NodeBlock& bb, u32 pc); 617 u32 DecodeConversion(NodeBlock& bb, u32 pc);
615 u32 DecodeMemory(NodeBlock& bb, u32 pc); 618 u32 DecodeMemory(NodeBlock& bb, u32 pc);
619 u32 DecodeTexture(NodeBlock& bb, u32 pc);
616 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 620 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
617 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 621 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
618 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 622 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
@@ -754,9 +758,8 @@ private:
754 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); 758 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
755 759
756 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 760 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
757 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 761 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
758 bool is_array, std::size_t array_offset, std::size_t bias_offset, 762 Node array, Node depth_compare, u32 bias_offset);
759 std::vector<Node>&& coords);
760 763
761 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, 764 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
762 u64 byte_height); 765 u64 byte_height);
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index be4635342..33b071747 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
20 return {node, cursor}; 20 return {node, cursor};
21 } 21 }
22 if (const auto conditional = std::get_if<ConditionalNode>(node)) { 22 if (const auto conditional = std::get_if<ConditionalNode>(node)) {
23 const auto& code = conditional->GetCode(); 23 const auto& conditional_code = conditional->GetCode();
24 const auto [found, internal_cursor] = 24 const auto [found, internal_cursor] = FindOperation(
25 FindOperation(code, static_cast<s64>(code.size() - 1), operation_code); 25 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
26 if (found) 26 if (found)
27 return {found, cursor}; 27 return {found, cursor};
28 } 28 }
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
58 return nullptr; 58 return nullptr;
59 } 59 }
60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { 60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
61 const auto& code = conditional->GetCode(); 61 const auto& conditional_code = conditional->GetCode();
62 return TrackCbuf(tracked, code, static_cast<s64>(code.size())); 62 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
63 } 63 }
64 return nullptr; 64 return nullptr;
65} 65}
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 2f6612a35..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
89 89
90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
91 switch (format) { 91 switch (format) {
92 // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
93 // gamma.
94 case Tegra::RenderTargetFormat::RGBA8_SRGB: 92 case Tegra::RenderTargetFormat::RGBA8_SRGB:
95 return PixelFormat::RGBA8_SRGB; 93 return PixelFormat::RGBA8_SRGB;
96 case Tegra::RenderTargetFormat::RGBA8_UNORM: 94 case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -426,6 +424,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
426 switch (format) { 424 switch (format) {
427 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 425 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
428 return PixelFormat::ABGR8U; 426 return PixelFormat::ABGR8U;
427 case Tegra::FramebufferConfig::PixelFormat::BGRA8:
428 return PixelFormat::BGRA8;
429 default: 429 default:
430 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 430 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
431 UNREACHABLE(); 431 UNREACHABLE();
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/textures/astc.h"
14#include "video_core/textures/convert.h"
15
16namespace Tegra::Texture {
17
18using VideoCore::Surface::PixelFormat;
19
20template <bool reverse>
21void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
22 union S8Z24 {
23 BitField<0, 24, u32> z24;
24 BitField<24, 8, u32> s8;
25 };
26 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
27
28 union Z24S8 {
29 BitField<0, 8, u32> s8;
30 BitField<8, 24, u32> z24;
31 };
32 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
33
34 S8Z24 s8z24_pixel{};
35 Z24S8 z24s8_pixel{};
36 constexpr auto bpp{
37 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
38 for (std::size_t y = 0; y < height; ++y) {
39 for (std::size_t x = 0; x < width; ++x) {
40 const std::size_t offset{bpp * (y * width + x)};
41 if constexpr (reverse) {
42 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
43 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
44 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
45 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
46 } else {
47 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
48 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
49 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
50 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
51 }
52 }
53 }
54}
55
56static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
57 SwapS8Z24ToZ24S8<false>(data, width, height);
58}
59
60static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
61 SwapS8Z24ToZ24S8<true>(data, width, height);
62}
63
64void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
65 bool convert_astc, bool convert_s8z24) {
66 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
67 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
68 u32 block_width{};
69 u32 block_height{};
70 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
71 const std::vector<u8> rgba8_data =
72 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
73 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
74
75 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
76 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
77 }
78}
79
80void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
81 bool convert_astc, bool convert_s8z24) {
82 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
83 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
84 static_cast<u32>(pixel_format));
85 UNREACHABLE();
86
87 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
88 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
89 }
90}
91
92} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace Tegra::Texture {
11
12void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
13 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
14
15void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "core/memory.h"
10#include "video_core/gpu.h" 9#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -103,8 +102,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 102 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 103 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 104 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 105 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 106 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 107 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 108 }
110 pixel_base += stride_x; 109 pixel_base += stride_x;
@@ -154,7 +153,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 153 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 154 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 155 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 156 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 157 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 158 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 159 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
230 } 229 }
231} 230}
232 231
233void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 232void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
234 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 233 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
235 u32 block_depth, u32 width_spacing) { 234 u32 block_depth, u32 width_spacing) {
236 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 235 CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
237 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 236 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
238 bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, 237 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
239 block_height, block_depth, width_spacing); 238 width_spacing);
240} 239}
241 240
242std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 241std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
243 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 242 u32 width, u32 height, u32 depth, u32 block_height,
244 u32 block_height, u32 block_depth, u32 width_spacing) { 243 u32 block_depth, u32 width_spacing) {
245 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 244 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
246 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 245 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
247 width, height, depth, block_height, block_depth, width_spacing); 246 width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
249} 248}
250 249
251void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 250void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
252 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 251 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
253 u32 block_height) {
254 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 252 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
255 gob_size_x}; 253 gob_size_x};
256 for (u32 line = 0; line < subrect_height; ++line) { 254 for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
262 const u32 gob_address = 260 const u32 gob_address =
263 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 261 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
264 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 262 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
265 const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 263 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
266 const VAddr dest_addr = swizzled_data + swizzled_offset; 264 u8* dest_addr = swizzled_data + swizzled_offset;
267 265
268 Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); 266 std::memcpy(dest_addr, source_line, bytes_per_pixel);
269 } 267 }
270 } 268 }
271} 269}
272 270
273void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 271void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
274 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 272 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
275 u32 block_height, u32 offset_x, u32 offset_y) { 273 u32 offset_x, u32 offset_y) {
276 for (u32 line = 0; line < subrect_height; ++line) { 274 for (u32 line = 0; line < subrect_height; ++line) {
277 const u32 y2 = line + offset_y; 275 const u32 y2 = line + offset_y;
278 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + 276 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
282 const u32 x2 = (x + offset_x) * bytes_per_pixel; 280 const u32 x2 = (x + offset_x) * bytes_per_pixel;
283 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; 281 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
284 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; 282 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
285 const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; 283 u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
286 const VAddr source_addr = swizzled_data + swizzled_offset; 284 u8* source_addr = swizzled_data + swizzled_offset;
287 285
288 Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); 286 std::memcpy(dest_line, source_addr, bytes_per_pixel);
289 } 287 }
290 } 288 }
291} 289}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,18 +16,15 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format. 20void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */ 26std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 27 u32 width, u32 height, u32 depth,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
32 u32 block_depth = TICEntry::DefaultBlockHeight, 29 u32 block_depth = TICEntry::DefaultBlockHeight,
33 u32 width_spacing = 0); 30 u32 width_spacing = 0);
@@ -37,25 +34,21 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
52/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
55 u32 block_height); 48
56/// Copies a tiled subrectangle into a linear surface. 49/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
59 u32 block_height, u32 offset_x, u32 offset_y); 52 u32 offset_x, u32 offset_y);
60 53
61} // namespace Tegra::Texture 54} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0fc5530f2..93ecc6e31 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
@@ -282,34 +283,62 @@ enum class TextureMipmapFilter : u32 {
282 283
283struct TSCEntry { 284struct TSCEntry {
284 union { 285 union {
285 BitField<0, 3, WrapMode> wrap_u; 286 struct {
286 BitField<3, 3, WrapMode> wrap_v; 287 union {
287 BitField<6, 3, WrapMode> wrap_p; 288 BitField<0, 3, WrapMode> wrap_u;
288 BitField<9, 1, u32> depth_compare_enabled; 289 BitField<3, 3, WrapMode> wrap_v;
289 BitField<10, 3, DepthCompareFunc> depth_compare_func; 290 BitField<6, 3, WrapMode> wrap_p;
290 BitField<13, 1, u32> srgb_conversion; 291 BitField<9, 1, u32> depth_compare_enabled;
291 BitField<20, 3, u32> max_anisotropy; 292 BitField<10, 3, DepthCompareFunc> depth_compare_func;
293 BitField<13, 1, u32> srgb_conversion;
294 BitField<20, 3, u32> max_anisotropy;
295 };
296 union {
297 BitField<0, 2, TextureFilter> mag_filter;
298 BitField<4, 2, TextureFilter> min_filter;
299 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
300 BitField<9, 1, u32> cubemap_interface_filtering;
301 BitField<12, 13, u32> mip_lod_bias;
302 };
303 union {
304 BitField<0, 12, u32> min_lod_clamp;
305 BitField<12, 12, u32> max_lod_clamp;
306 BitField<24, 8, u32> srgb_border_color_r;
307 };
308 union {
309 BitField<12, 8, u32> srgb_border_color_g;
310 BitField<20, 8, u32> srgb_border_color_b;
311 };
312 std::array<f32, 4> border_color;
313 };
314 std::array<u8, 0x20> raw;
292 }; 315 };
293 union { 316
294 BitField<0, 2, TextureFilter> mag_filter; 317 float GetMaxAnisotropy() const {
295 BitField<4, 2, TextureFilter> min_filter; 318 return static_cast<float>(1U << max_anisotropy);
296 BitField<6, 2, TextureMipmapFilter> mip_filter; 319 }
297 BitField<9, 1, u32> cubemap_interface_filtering; 320
298 BitField<12, 13, u32> mip_lod_bias; 321 float GetMinLod() const {
299 }; 322 return static_cast<float>(min_lod_clamp) / 256.0f;
300 union { 323 }
301 BitField<0, 12, u32> min_lod_clamp; 324
302 BitField<12, 12, u32> max_lod_clamp; 325 float GetMaxLod() const {
303 BitField<24, 8, u32> srgb_border_color_r; 326 return static_cast<float>(max_lod_clamp) / 256.0f;
304 }; 327 }
305 union { 328
306 BitField<12, 8, u32> srgb_border_color_g; 329 float GetLodBias() const {
307 BitField<20, 8, u32> srgb_border_color_b; 330 // Sign extend the 13-bit value.
308 }; 331 constexpr u32 mask = 1U << (13 - 1);
309 float border_color_r; 332 return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
310 float border_color_g; 333 }
311 float border_color_b; 334
312 float border_color_a; 335 std::array<float, 4> GetBorderColor() const {
336 if (srgb_conversion) {
337 return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
338 srgb_border_color_b / 255.0f, border_color[3]};
339 }
340 return border_color;
341 }
313}; 342};
314static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 343static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
315 344
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <future>
9#include <string> 7#include <string>
10 8
11namespace WebService { 9namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..dc149d2ed 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/web_result.h" 12#include "common/web_result.h"
13#include "core/settings.h"
14#include "web_service/web_backend.h" 13#include "web_service/web_backend.h"
15 14
16namespace WebService { 15namespace WebService {
@@ -25,7 +24,7 @@ constexpr u32 TIMEOUT_SECONDS = 30;
25struct Client::Impl { 24struct Client::Impl {
26 Impl(std::string host, std::string username, std::string token) 25 Impl(std::string host, std::string username, std::string token)
27 : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} { 26 : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} {
28 std::lock_guard<std::mutex> lock(jwt_cache.mutex); 27 std::lock_guard lock{jwt_cache.mutex};
29 if (this->username == jwt_cache.username && this->token == jwt_cache.token) { 28 if (this->username == jwt_cache.username && this->token == jwt_cache.token) {
30 jwt = jwt_cache.jwt; 29 jwt = jwt_cache.jwt;
31 } 30 }
@@ -152,7 +151,7 @@ struct Client::Impl {
152 if (result.result_code != Common::WebResult::Code::Success) { 151 if (result.result_code != Common::WebResult::Code::Success) {
153 LOG_ERROR(WebService, "UpdateJWT failed"); 152 LOG_ERROR(WebService, "UpdateJWT failed");
154 } else { 153 } else {
155 std::lock_guard<std::mutex> lock(jwt_cache.mutex); 154 std::lock_guard lock{jwt_cache.mutex};
156 jwt_cache.username = username; 155 jwt_cache.username = username;
157 jwt_cache.token = token; 156 jwt_cache.token = token;
158 jwt_cache.jwt = jwt = result.returned_data; 157 jwt_cache.jwt = jwt = result.returned_data;
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 5c1b65a2c..f95f7fe3c 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -58,10 +58,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
58 58
59 scroll_area = new QScrollArea; 59 scroll_area = new QScrollArea;
60 60
61 buttons = new QDialogButtonBox; 61 buttons = new QDialogButtonBox(QDialogButtonBox::Cancel | QDialogButtonBox::Ok);
62 buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole);
63 buttons->addButton(tr("OK"), QDialogButtonBox::AcceptRole);
64
65 connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept); 62 connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept);
66 connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject); 63 connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject);
67 64
@@ -163,6 +160,6 @@ void QtProfileSelector::SelectProfile(
163 160
164void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) { 161void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) {
165 // Acquire the HLE mutex 162 // Acquire the HLE mutex
166 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 163 std::lock_guard lock{HLE::g_hle_lock};
167 callback(uuid); 164 callback(uuid);
168} 165}
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index 8a26fdff1..f3eb29b25 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -75,13 +75,13 @@ QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
75 length_label->setText(QStringLiteral("%1/%2").arg(text.size()).arg(parameters.max_length)); 75 length_label->setText(QStringLiteral("%1/%2").arg(text.size()).arg(parameters.max_length));
76 }); 76 });
77 77
78 buttons = new QDialogButtonBox; 78 buttons = new QDialogButtonBox(QDialogButtonBox::Cancel);
79 buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole); 79 if (parameters.submit_text.empty()) {
80 buttons->addButton(parameters.submit_text.empty() 80 buttons->addButton(QDialogButtonBox::Ok);
81 ? tr("OK") 81 } else {
82 : QString::fromStdU16String(parameters.submit_text), 82 buttons->addButton(QString::fromStdU16String(parameters.submit_text),
83 QDialogButtonBox::AcceptRole); 83 QDialogButtonBox::AcceptRole);
84 84 }
85 connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept); 85 connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept);
86 connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject); 86 connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject);
87 layout->addWidget(header_label); 87 layout->addWidget(header_label);
@@ -141,12 +141,12 @@ void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
141 141
142void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) { 142void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) {
143 // Acquire the HLE mutex 143 // Acquire the HLE mutex
144 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 144 std::lock_guard lock{HLE::g_hle_lock};
145 text_output(text); 145 text_output(text);
146} 146}
147 147
148void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() { 148void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() {
149 // Acquire the HLE mutex 149 // Acquire the HLE mutex
150 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 150 std::lock_guard lock{HLE::g_hle_lock};
151 finished_check(); 151 finished_check();
152} 152}
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..ac80b2fa2 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
@@ -102,12 +104,12 @@ void QtWebBrowser::OpenPage(std::string_view url, std::function<void()> unpack_r
102 104
103void QtWebBrowser::MainWindowUnpackRomFS() { 105void QtWebBrowser::MainWindowUnpackRomFS() {
104 // Acquire the HLE mutex 106 // Acquire the HLE mutex
105 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 107 std::lock_guard lock{HLE::g_hle_lock};
106 unpack_romfs_callback(); 108 unpack_romfs_callback();
107} 109}
108 110
109void QtWebBrowser::MainWindowFinishedBrowsing() { 111void QtWebBrowser::MainWindowFinishedBrowsing() {
110 // Acquire the HLE mutex 112 // Acquire the HLE mutex
111 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 113 std::lock_guard lock{HLE::g_hle_lock};
112 finished_callback(); 114 finished_callback();
113} 115}
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 73b04b749..7438fbc0a 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,15 +20,10 @@
20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
21 21
22void EmuThread::run() { 22void EmuThread::run() {
23 if (!Settings::values.use_multi_core) { 23 render_window->MakeCurrent();
24 // Single core mode must acquire OpenGL context for entire emulation session
25 render_window->MakeCurrent();
26 }
27 24
28 MicroProfileOnThreadCreate("EmuThread"); 25 MicroProfileOnThreadCreate("EmuThread");
29 26
30 stop_run = false;
31
32 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); 27 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
33 28
34 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( 29 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
@@ -38,7 +33,12 @@ void EmuThread::run() {
38 33
39 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 34 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
40 35
41 // holds whether the cpu was running during the last iteration, 36 if (Settings::values.use_asynchronous_gpu_emulation) {
37 // Release OpenGL context for the GPU thread
38 render_window->DoneCurrent();
39 }
40
41 // Holds whether the cpu was running during the last iteration,
42 // so that the DebugModeLeft signal can be emitted before the 42 // so that the DebugModeLeft signal can be emitted before the
43 // next execution step 43 // next execution step
44 bool was_active = false; 44 bool was_active = false;
@@ -67,7 +67,7 @@ void EmuThread::run() {
67 67
68 was_active = false; 68 was_active = false;
69 } else { 69 } else {
70 std::unique_lock<std::mutex> lock(running_mutex); 70 std::unique_lock lock{running_mutex};
71 running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; }); 71 running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
72 } 72 }
73 } 73 }
@@ -121,7 +121,6 @@ GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
121 setAttribute(Qt::WA_AcceptTouchEvents); 121 setAttribute(Qt::WA_AcceptTouchEvents);
122 122
123 InputCommon::Init(); 123 InputCommon::Init();
124 InputCommon::StartJoystickEventHandler();
125 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent), 124 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent),
126 &GMainWindow::OnLoadComplete); 125 &GMainWindow::OnLoadComplete);
127} 126}
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 7226e690e..3183621bc 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -53,7 +53,7 @@ public:
53 * @note This function is thread-safe 53 * @note This function is thread-safe
54 */ 54 */
55 void SetRunning(bool running) { 55 void SetRunning(bool running) {
56 std::unique_lock<std::mutex> lock(running_mutex); 56 std::unique_lock lock{running_mutex};
57 this->running = running; 57 this->running = running;
58 lock.unlock(); 58 lock.unlock();
59 running_cv.notify_all(); 59 running_cv.notify_all();
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 5f0896f84..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,15 +53,15 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
61 button(QWizard::CancelButton)->setVisible(false); 61 button(QWizard::CancelButton)->setVisible(false);
62 62
63 testcase_watcher.setFuture(QtConcurrent::run( 63 testcase_watcher.setFuture(QtConcurrent::run(
64 [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); 64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
65 break; 65 break;
66 default: 66 default:
67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); 67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9546dadf..dead9f807 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -209,7 +209,7 @@ void Config::ReadPlayerValues() {
209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
210 auto& player = Settings::values.players[p]; 210 auto& player = Settings::values.players[p];
211 211
212 player.connected = qt_config->value(QString("player_%1_connected").arg(p), false).toBool(); 212 player.connected = ReadSetting(QString("player_%1_connected").arg(p), false).toBool();
213 213
214 player.type = static_cast<Settings::ControllerType>( 214 player.type = static_cast<Settings::ControllerType>(
215 qt_config 215 qt_config
@@ -269,7 +269,7 @@ void Config::ReadPlayerValues() {
269} 269}
270 270
271void Config::ReadDebugValues() { 271void Config::ReadDebugValues() {
272 Settings::values.debug_pad_enabled = qt_config->value("debug_pad_enabled", false).toBool(); 272 Settings::values.debug_pad_enabled = ReadSetting("debug_pad_enabled", false).toBool();
273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); 274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
275 Settings::values.debug_pad_buttons[i] = 275 Settings::values.debug_pad_buttons[i] =
@@ -298,7 +298,7 @@ void Config::ReadDebugValues() {
298} 298}
299 299
300void Config::ReadKeyboardValues() { 300void Config::ReadKeyboardValues() {
301 Settings::values.keyboard_enabled = qt_config->value("keyboard_enabled", false).toBool(); 301 Settings::values.keyboard_enabled = ReadSetting("keyboard_enabled", false).toBool();
302 302
303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(), 303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam); 304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -311,7 +311,7 @@ void Config::ReadKeyboardValues() {
311} 311}
312 312
313void Config::ReadMouseValues() { 313void Config::ReadMouseValues() {
314 Settings::values.mouse_enabled = qt_config->value("mouse_enabled", false).toBool(); 314 Settings::values.mouse_enabled = ReadSetting("mouse_enabled", false).toBool();
315 315
316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]); 317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
@@ -327,16 +327,14 @@ void Config::ReadMouseValues() {
327} 327}
328 328
329void Config::ReadTouchscreenValues() { 329void Config::ReadTouchscreenValues() {
330 Settings::values.touchscreen.enabled = qt_config->value("touchscreen_enabled", true).toBool(); 330 Settings::values.touchscreen.enabled = ReadSetting("touchscreen_enabled", true).toBool();
331 Settings::values.touchscreen.device = 331 Settings::values.touchscreen.device =
332 qt_config->value("touchscreen_device", "engine:emu_window").toString().toStdString(); 332 ReadSetting("touchscreen_device", "engine:emu_window").toString().toStdString();
333 333
334 Settings::values.touchscreen.finger = qt_config->value("touchscreen_finger", 0).toUInt(); 334 Settings::values.touchscreen.finger = ReadSetting("touchscreen_finger", 0).toUInt();
335 Settings::values.touchscreen.rotation_angle = qt_config->value("touchscreen_angle", 0).toUInt(); 335 Settings::values.touchscreen.rotation_angle = ReadSetting("touchscreen_angle", 0).toUInt();
336 Settings::values.touchscreen.diameter_x = 336 Settings::values.touchscreen.diameter_x = ReadSetting("touchscreen_diameter_x", 15).toUInt();
337 qt_config->value("touchscreen_diameter_x", 15).toUInt(); 337 Settings::values.touchscreen.diameter_y = ReadSetting("touchscreen_diameter_y", 15).toUInt();
338 Settings::values.touchscreen.diameter_y =
339 qt_config->value("touchscreen_diameter_y", 15).toUInt();
340 qt_config->endGroup(); 338 qt_config->endGroup();
341} 339}
342 340
@@ -357,40 +355,41 @@ void Config::ReadValues() {
357 ReadTouchscreenValues(); 355 ReadTouchscreenValues();
358 356
359 Settings::values.motion_device = 357 Settings::values.motion_device =
360 qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01") 358 ReadSetting("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
361 .toString() 359 .toString()
362 .toStdString(); 360 .toStdString();
363 361
364 qt_config->beginGroup("Core"); 362 qt_config->beginGroup("Core");
365 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 363 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
366 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 364 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
367 qt_config->endGroup(); 365 qt_config->endGroup();
368 366
369 qt_config->beginGroup("Renderer"); 367 qt_config->beginGroup("Renderer");
370 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); 368 Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat();
371 Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool(); 369 Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
372 Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt(); 370 Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
373 Settings::values.use_disk_shader_cache = 371 Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool();
374 qt_config->value("use_disk_shader_cache", false).toBool();
375 Settings::values.use_accurate_gpu_emulation = 372 Settings::values.use_accurate_gpu_emulation =
376 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 373 ReadSetting("use_accurate_gpu_emulation", false).toBool();
374 Settings::values.use_asynchronous_gpu_emulation =
375 ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
377 376
378 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 377 Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
379 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 378 Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
380 Settings::values.bg_blue = qt_config->value("bg_blue", 0.0).toFloat(); 379 Settings::values.bg_blue = ReadSetting("bg_blue", 0.0).toFloat();
381 qt_config->endGroup(); 380 qt_config->endGroup();
382 381
383 qt_config->beginGroup("Audio"); 382 qt_config->beginGroup("Audio");
384 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); 383 Settings::values.sink_id = ReadSetting("output_engine", "auto").toString().toStdString();
385 Settings::values.enable_audio_stretching = 384 Settings::values.enable_audio_stretching =
386 qt_config->value("enable_audio_stretching", true).toBool(); 385 ReadSetting("enable_audio_stretching", true).toBool();
387 Settings::values.audio_device_id = 386 Settings::values.audio_device_id =
388 qt_config->value("output_device", "auto").toString().toStdString(); 387 ReadSetting("output_device", "auto").toString().toStdString();
389 Settings::values.volume = qt_config->value("volume", 1).toFloat(); 388 Settings::values.volume = ReadSetting("volume", 1).toFloat();
390 qt_config->endGroup(); 389 qt_config->endGroup();
391 390
392 qt_config->beginGroup("Data Storage"); 391 qt_config->beginGroup("Data Storage");
393 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 392 Settings::values.use_virtual_sd = ReadSetting("use_virtual_sd", true).toBool();
394 FileUtil::GetUserPath( 393 FileUtil::GetUserPath(
395 FileUtil::UserPath::NANDDir, 394 FileUtil::UserPath::NANDDir,
396 qt_config 395 qt_config
@@ -408,30 +407,29 @@ void Config::ReadValues() {
408 qt_config->endGroup(); 407 qt_config->endGroup();
409 408
410 qt_config->beginGroup("Core"); 409 qt_config->beginGroup("Core");
411 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 410 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
412 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 411 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
413 qt_config->endGroup(); 412 qt_config->endGroup();
414 413
415 qt_config->beginGroup("System"); 414 qt_config->beginGroup("System");
416 Settings::values.use_docked_mode = qt_config->value("use_docked_mode", false).toBool(); 415 Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
417 Settings::values.enable_nfc = qt_config->value("enable_nfc", true).toBool();
418 416
419 Settings::values.current_user = std::clamp<int>(qt_config->value("current_user", 0).toInt(), 0, 417 Settings::values.current_user =
420 Service::Account::MAX_USERS - 1); 418 std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
421 419
422 Settings::values.language_index = qt_config->value("language_index", 1).toInt(); 420 Settings::values.language_index = ReadSetting("language_index", 1).toInt();
423 421
424 const auto rng_seed_enabled = qt_config->value("rng_seed_enabled", false).toBool(); 422 const auto rng_seed_enabled = ReadSetting("rng_seed_enabled", false).toBool();
425 if (rng_seed_enabled) { 423 if (rng_seed_enabled) {
426 Settings::values.rng_seed = qt_config->value("rng_seed", 0).toULongLong(); 424 Settings::values.rng_seed = ReadSetting("rng_seed", 0).toULongLong();
427 } else { 425 } else {
428 Settings::values.rng_seed = std::nullopt; 426 Settings::values.rng_seed = std::nullopt;
429 } 427 }
430 428
431 const auto custom_rtc_enabled = qt_config->value("custom_rtc_enabled", false).toBool(); 429 const auto custom_rtc_enabled = ReadSetting("custom_rtc_enabled", false).toBool();
432 if (custom_rtc_enabled) { 430 if (custom_rtc_enabled) {
433 Settings::values.custom_rtc = 431 Settings::values.custom_rtc =
434 std::chrono::seconds(qt_config->value("custom_rtc", 0).toULongLong()); 432 std::chrono::seconds(ReadSetting("custom_rtc", 0).toULongLong());
435 } else { 433 } else {
436 Settings::values.custom_rtc = std::nullopt; 434 Settings::values.custom_rtc = std::nullopt;
437 } 435 }
@@ -439,35 +437,35 @@ void Config::ReadValues() {
439 qt_config->endGroup(); 437 qt_config->endGroup();
440 438
441 qt_config->beginGroup("Miscellaneous"); 439 qt_config->beginGroup("Miscellaneous");
442 Settings::values.log_filter = qt_config->value("log_filter", "*:Info").toString().toStdString(); 440 Settings::values.log_filter = ReadSetting("log_filter", "*:Info").toString().toStdString();
443 Settings::values.use_dev_keys = qt_config->value("use_dev_keys", false).toBool(); 441 Settings::values.use_dev_keys = ReadSetting("use_dev_keys", false).toBool();
444 qt_config->endGroup(); 442 qt_config->endGroup();
445 443
446 qt_config->beginGroup("Debugging"); 444 qt_config->beginGroup("Debugging");
447 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool(); 445 Settings::values.use_gdbstub = ReadSetting("use_gdbstub", false).toBool();
448 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt(); 446 Settings::values.gdbstub_port = ReadSetting("gdbstub_port", 24689).toInt();
449 Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString(); 447 Settings::values.program_args = ReadSetting("program_args", "").toString().toStdString();
450 Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool(); 448 Settings::values.dump_exefs = ReadSetting("dump_exefs", false).toBool();
451 Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool(); 449 Settings::values.dump_nso = ReadSetting("dump_nso", false).toBool();
452 qt_config->endGroup(); 450 qt_config->endGroup();
453 451
454 qt_config->beginGroup("WebService"); 452 qt_config->beginGroup("WebService");
455 Settings::values.enable_telemetry = qt_config->value("enable_telemetry", true).toBool(); 453 Settings::values.enable_telemetry = ReadSetting("enable_telemetry", true).toBool();
456 Settings::values.web_api_url = 454 Settings::values.web_api_url =
457 qt_config->value("web_api_url", "https://api.yuzu-emu.org").toString().toStdString(); 455 ReadSetting("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
458 Settings::values.yuzu_username = qt_config->value("yuzu_username").toString().toStdString(); 456 Settings::values.yuzu_username = ReadSetting("yuzu_username").toString().toStdString();
459 Settings::values.yuzu_token = qt_config->value("yuzu_token").toString().toStdString(); 457 Settings::values.yuzu_token = ReadSetting("yuzu_token").toString().toStdString();
460 qt_config->endGroup(); 458 qt_config->endGroup();
461 459
462 const auto size = qt_config->beginReadArray("DisabledAddOns"); 460 const auto size = qt_config->beginReadArray("DisabledAddOns");
463 for (int i = 0; i < size; ++i) { 461 for (int i = 0; i < size; ++i) {
464 qt_config->setArrayIndex(i); 462 qt_config->setArrayIndex(i);
465 const auto title_id = qt_config->value("title_id", 0).toULongLong(); 463 const auto title_id = ReadSetting("title_id", 0).toULongLong();
466 std::vector<std::string> out; 464 std::vector<std::string> out;
467 const auto d_size = qt_config->beginReadArray("disabled"); 465 const auto d_size = qt_config->beginReadArray("disabled");
468 for (int j = 0; j < d_size; ++j) { 466 for (int j = 0; j < d_size; ++j) {
469 qt_config->setArrayIndex(j); 467 qt_config->setArrayIndex(j);
470 out.push_back(qt_config->value("d", "").toString().toStdString()); 468 out.push_back(ReadSetting("d", "").toString().toStdString());
471 } 469 }
472 qt_config->endArray(); 470 qt_config->endArray();
473 Settings::values.disabled_addons.insert_or_assign(title_id, out); 471 Settings::values.disabled_addons.insert_or_assign(title_id, out);
@@ -475,41 +473,38 @@ void Config::ReadValues() {
475 qt_config->endArray(); 473 qt_config->endArray();
476 474
477 qt_config->beginGroup("UI"); 475 qt_config->beginGroup("UI");
478 UISettings::values.theme = qt_config->value("theme", UISettings::themes[0].second).toString(); 476 UISettings::values.theme = ReadSetting("theme", UISettings::themes[0].second).toString();
479 UISettings::values.enable_discord_presence = 477 UISettings::values.enable_discord_presence =
480 qt_config->value("enable_discord_presence", true).toBool(); 478 ReadSetting("enable_discord_presence", true).toBool();
481 UISettings::values.screenshot_resolution_factor = 479 UISettings::values.screenshot_resolution_factor =
482 static_cast<u16>(qt_config->value("screenshot_resolution_factor", 0).toUInt()); 480 static_cast<u16>(ReadSetting("screenshot_resolution_factor", 0).toUInt());
483 UISettings::values.select_user_on_boot = 481 UISettings::values.select_user_on_boot = ReadSetting("select_user_on_boot", false).toBool();
484 qt_config->value("select_user_on_boot", false).toBool();
485 482
486 qt_config->beginGroup("UIGameList"); 483 qt_config->beginGroup("UIGameList");
487 UISettings::values.show_unknown = qt_config->value("show_unknown", true).toBool(); 484 UISettings::values.show_unknown = ReadSetting("show_unknown", true).toBool();
488 UISettings::values.show_add_ons = qt_config->value("show_add_ons", true).toBool(); 485 UISettings::values.show_add_ons = ReadSetting("show_add_ons", true).toBool();
489 UISettings::values.icon_size = qt_config->value("icon_size", 64).toUInt(); 486 UISettings::values.icon_size = ReadSetting("icon_size", 64).toUInt();
490 UISettings::values.row_1_text_id = qt_config->value("row_1_text_id", 3).toUInt(); 487 UISettings::values.row_1_text_id = ReadSetting("row_1_text_id", 3).toUInt();
491 UISettings::values.row_2_text_id = qt_config->value("row_2_text_id", 2).toUInt(); 488 UISettings::values.row_2_text_id = ReadSetting("row_2_text_id", 2).toUInt();
492 qt_config->endGroup(); 489 qt_config->endGroup();
493 490
494 qt_config->beginGroup("UILayout"); 491 qt_config->beginGroup("UILayout");
495 UISettings::values.geometry = qt_config->value("geometry").toByteArray(); 492 UISettings::values.geometry = ReadSetting("geometry").toByteArray();
496 UISettings::values.state = qt_config->value("state").toByteArray(); 493 UISettings::values.state = ReadSetting("state").toByteArray();
497 UISettings::values.renderwindow_geometry = 494 UISettings::values.renderwindow_geometry = ReadSetting("geometryRenderWindow").toByteArray();
498 qt_config->value("geometryRenderWindow").toByteArray(); 495 UISettings::values.gamelist_header_state = ReadSetting("gameListHeaderState").toByteArray();
499 UISettings::values.gamelist_header_state =
500 qt_config->value("gameListHeaderState").toByteArray();
501 UISettings::values.microprofile_geometry = 496 UISettings::values.microprofile_geometry =
502 qt_config->value("microProfileDialogGeometry").toByteArray(); 497 ReadSetting("microProfileDialogGeometry").toByteArray();
503 UISettings::values.microprofile_visible = 498 UISettings::values.microprofile_visible =
504 qt_config->value("microProfileDialogVisible", false).toBool(); 499 ReadSetting("microProfileDialogVisible", false).toBool();
505 qt_config->endGroup(); 500 qt_config->endGroup();
506 501
507 qt_config->beginGroup("Paths"); 502 qt_config->beginGroup("Paths");
508 UISettings::values.roms_path = qt_config->value("romsPath").toString(); 503 UISettings::values.roms_path = ReadSetting("romsPath").toString();
509 UISettings::values.symbols_path = qt_config->value("symbolsPath").toString(); 504 UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
510 UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString(); 505 UISettings::values.gamedir = ReadSetting("gameListRootDir", ".").toString();
511 UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool(); 506 UISettings::values.gamedir_deepscan = ReadSetting("gameListDeepScan", false).toBool();
512 UISettings::values.recent_files = qt_config->value("recentFiles").toStringList(); 507 UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
513 qt_config->endGroup(); 508 qt_config->endGroup();
514 509
515 qt_config->beginGroup("Shortcuts"); 510 qt_config->beginGroup("Shortcuts");
@@ -522,8 +517,8 @@ void Config::ReadValues() {
522 qt_config->beginGroup(hotkey); 517 qt_config->beginGroup(hotkey);
523 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut( 518 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut(
524 group + "/" + hotkey, 519 group + "/" + hotkey,
525 UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(), 520 UISettings::ContextualShortcut(ReadSetting("KeySeq").toString(),
526 qt_config->value("Context").toInt()))); 521 ReadSetting("Context").toInt())));
527 qt_config->endGroup(); 522 qt_config->endGroup();
528 } 523 }
529 524
@@ -531,16 +526,16 @@ void Config::ReadValues() {
531 } 526 }
532 qt_config->endGroup(); 527 qt_config->endGroup();
533 528
534 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool(); 529 UISettings::values.single_window_mode = ReadSetting("singleWindowMode", true).toBool();
535 UISettings::values.fullscreen = qt_config->value("fullscreen", false).toBool(); 530 UISettings::values.fullscreen = ReadSetting("fullscreen", false).toBool();
536 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool(); 531 UISettings::values.display_titlebar = ReadSetting("displayTitleBars", true).toBool();
537 UISettings::values.show_filter_bar = qt_config->value("showFilterBar", true).toBool(); 532 UISettings::values.show_filter_bar = ReadSetting("showFilterBar", true).toBool();
538 UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool(); 533 UISettings::values.show_status_bar = ReadSetting("showStatusBar", true).toBool();
539 UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool(); 534 UISettings::values.confirm_before_closing = ReadSetting("confirmClose", true).toBool();
540 UISettings::values.first_start = qt_config->value("firstStart", true).toBool(); 535 UISettings::values.first_start = ReadSetting("firstStart", true).toBool();
541 UISettings::values.callout_flags = qt_config->value("calloutFlags", 0).toUInt(); 536 UISettings::values.callout_flags = ReadSetting("calloutFlags", 0).toUInt();
542 UISettings::values.show_console = qt_config->value("showConsole", false).toBool(); 537 UISettings::values.show_console = ReadSetting("showConsole", false).toBool();
543 UISettings::values.profile_index = qt_config->value("profileIndex", 0).toUInt(); 538 UISettings::values.profile_index = ReadSetting("profileIndex", 0).toUInt();
544 539
545 ApplyDefaultProfileIfInputInvalid(); 540 ApplyDefaultProfileIfInputInvalid();
546 541
@@ -551,62 +546,79 @@ void Config::SavePlayerValues() {
551 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 546 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
552 const auto& player = Settings::values.players[p]; 547 const auto& player = Settings::values.players[p];
553 548
554 qt_config->setValue(QString("player_%1_connected").arg(p), player.connected); 549 WriteSetting(QString("player_%1_connected").arg(p), player.connected, false);
555 qt_config->setValue(QString("player_%1_type").arg(p), static_cast<u8>(player.type)); 550 WriteSetting(QString("player_%1_type").arg(p), static_cast<u8>(player.type),
551 static_cast<u8>(Settings::ControllerType::DualJoycon));
556 552
557 qt_config->setValue(QString("player_%1_body_color_left").arg(p), player.body_color_left); 553 WriteSetting(QString("player_%1_body_color_left").arg(p), player.body_color_left,
558 qt_config->setValue(QString("player_%1_body_color_right").arg(p), player.body_color_right); 554 Settings::JOYCON_BODY_NEON_BLUE);
559 qt_config->setValue(QString("player_%1_button_color_left").arg(p), 555 WriteSetting(QString("player_%1_body_color_right").arg(p), player.body_color_right,
560 player.button_color_left); 556 Settings::JOYCON_BODY_NEON_RED);
561 qt_config->setValue(QString("player_%1_button_color_right").arg(p), 557 WriteSetting(QString("player_%1_button_color_left").arg(p), player.button_color_left,
562 player.button_color_right); 558 Settings::JOYCON_BUTTONS_NEON_BLUE);
559 WriteSetting(QString("player_%1_button_color_right").arg(p), player.button_color_right,
560 Settings::JOYCON_BUTTONS_NEON_RED);
563 561
564 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 562 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
565 qt_config->setValue(QString("player_%1_").arg(p) + 563 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
566 QString::fromStdString(Settings::NativeButton::mapping[i]), 564 WriteSetting(QString("player_%1_").arg(p) +
567 QString::fromStdString(player.buttons[i])); 565 QString::fromStdString(Settings::NativeButton::mapping[i]),
566 QString::fromStdString(player.buttons[i]),
567 QString::fromStdString(default_param));
568 } 568 }
569 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 569 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
570 qt_config->setValue(QString("player_%1_").arg(p) + 570 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
571 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 571 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
572 QString::fromStdString(player.analogs[i])); 572 default_analogs[i][3], default_analogs[i][4], 0.5f);
573 WriteSetting(QString("player_%1_").arg(p) +
574 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
575 QString::fromStdString(player.analogs[i]),
576 QString::fromStdString(default_param));
573 } 577 }
574 } 578 }
575} 579}
576 580
577void Config::SaveDebugValues() { 581void Config::SaveDebugValues() {
578 qt_config->setValue("debug_pad_enabled", Settings::values.debug_pad_enabled); 582 WriteSetting("debug_pad_enabled", Settings::values.debug_pad_enabled, false);
579 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 583 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
580 qt_config->setValue(QString("debug_pad_") + 584 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
581 QString::fromStdString(Settings::NativeButton::mapping[i]), 585 WriteSetting(QString("debug_pad_") +
582 QString::fromStdString(Settings::values.debug_pad_buttons[i])); 586 QString::fromStdString(Settings::NativeButton::mapping[i]),
587 QString::fromStdString(Settings::values.debug_pad_buttons[i]),
588 QString::fromStdString(default_param));
583 } 589 }
584 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 590 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
585 qt_config->setValue(QString("debug_pad_") + 591 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
586 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 592 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
587 QString::fromStdString(Settings::values.debug_pad_analogs[i])); 593 default_analogs[i][3], default_analogs[i][4], 0.5f);
594 WriteSetting(QString("debug_pad_") +
595 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
596 QString::fromStdString(Settings::values.debug_pad_analogs[i]),
597 QString::fromStdString(default_param));
588 } 598 }
589} 599}
590 600
591void Config::SaveMouseValues() { 601void Config::SaveMouseValues() {
592 qt_config->setValue("mouse_enabled", Settings::values.mouse_enabled); 602 WriteSetting("mouse_enabled", Settings::values.mouse_enabled, false);
593 603
594 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 604 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
595 qt_config->setValue(QString("mouse_") + 605 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
596 QString::fromStdString(Settings::NativeMouseButton::mapping[i]), 606 WriteSetting(QString("mouse_") +
597 QString::fromStdString(Settings::values.mouse_buttons[i])); 607 QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
608 QString::fromStdString(Settings::values.mouse_buttons[i]),
609 QString::fromStdString(default_param));
598 } 610 }
599} 611}
600 612
601void Config::SaveTouchscreenValues() { 613void Config::SaveTouchscreenValues() {
602 qt_config->setValue("touchscreen_enabled", Settings::values.touchscreen.enabled); 614 WriteSetting("touchscreen_enabled", Settings::values.touchscreen.enabled, true);
603 qt_config->setValue("touchscreen_device", 615 WriteSetting("touchscreen_device", QString::fromStdString(Settings::values.touchscreen.device),
604 QString::fromStdString(Settings::values.touchscreen.device)); 616 "engine:emu_window");
605 617
606 qt_config->setValue("touchscreen_finger", Settings::values.touchscreen.finger); 618 WriteSetting("touchscreen_finger", Settings::values.touchscreen.finger, 0);
607 qt_config->setValue("touchscreen_angle", Settings::values.touchscreen.rotation_angle); 619 WriteSetting("touchscreen_angle", Settings::values.touchscreen.rotation_angle, 0);
608 qt_config->setValue("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x); 620 WriteSetting("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x, 15);
609 qt_config->setValue("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y); 621 WriteSetting("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y, 15);
610} 622}
611 623
612void Config::SaveValues() { 624void Config::SaveValues() {
@@ -617,89 +629,95 @@ void Config::SaveValues() {
617 SaveMouseValues(); 629 SaveMouseValues();
618 SaveTouchscreenValues(); 630 SaveTouchscreenValues();
619 631
620 qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device)); 632 WriteSetting("motion_device", QString::fromStdString(Settings::values.motion_device),
621 qt_config->setValue("keyboard_enabled", Settings::values.keyboard_enabled); 633 "engine:motion_emu,update_period:100,sensitivity:0.01");
634 WriteSetting("keyboard_enabled", Settings::values.keyboard_enabled, false);
622 635
623 qt_config->endGroup(); 636 qt_config->endGroup();
624 637
625 qt_config->beginGroup("Core"); 638 qt_config->beginGroup("Core");
626 qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit); 639 WriteSetting("use_cpu_jit", Settings::values.use_cpu_jit, true);
627 qt_config->setValue("use_multi_core", Settings::values.use_multi_core); 640 WriteSetting("use_multi_core", Settings::values.use_multi_core, false);
628 qt_config->endGroup(); 641 qt_config->endGroup();
629 642
630 qt_config->beginGroup("Renderer"); 643 qt_config->beginGroup("Renderer");
631 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); 644 WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0);
632 qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit); 645 WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
633 qt_config->setValue("frame_limit", Settings::values.frame_limit); 646 WriteSetting("frame_limit", Settings::values.frame_limit, 100);
634 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); 647 WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true);
635 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 648 WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
649 WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
650 false);
636 651
637 // Cast to double because Qt's written float values are not human-readable 652 // Cast to double because Qt's written float values are not human-readable
638 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 653 WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
639 qt_config->setValue("bg_green", (double)Settings::values.bg_green); 654 WriteSetting("bg_green", (double)Settings::values.bg_green, 0.0);
640 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 655 WriteSetting("bg_blue", (double)Settings::values.bg_blue, 0.0);
641 qt_config->endGroup(); 656 qt_config->endGroup();
642 657
643 qt_config->beginGroup("Audio"); 658 qt_config->beginGroup("Audio");
644 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); 659 WriteSetting("output_engine", QString::fromStdString(Settings::values.sink_id), "auto");
645 qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); 660 WriteSetting("enable_audio_stretching", Settings::values.enable_audio_stretching, true);
646 qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id)); 661 WriteSetting("output_device", QString::fromStdString(Settings::values.audio_device_id), "auto");
647 qt_config->setValue("volume", Settings::values.volume); 662 WriteSetting("volume", Settings::values.volume, 1.0f);
648 qt_config->endGroup(); 663 qt_config->endGroup();
649 664
650 qt_config->beginGroup("Data Storage"); 665 qt_config->beginGroup("Data Storage");
651 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 666 WriteSetting("use_virtual_sd", Settings::values.use_virtual_sd, true);
652 qt_config->setValue("nand_directory", 667 WriteSetting("nand_directory",
653 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir))); 668 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
654 qt_config->setValue("sdmc_directory", 669 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
655 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir))); 670 WriteSetting("sdmc_directory",
671 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
672 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
656 qt_config->endGroup(); 673 qt_config->endGroup();
657 674
658 qt_config->beginGroup("System"); 675 qt_config->beginGroup("System");
659 qt_config->setValue("use_docked_mode", Settings::values.use_docked_mode); 676 WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
660 qt_config->setValue("enable_nfc", Settings::values.enable_nfc); 677 WriteSetting("current_user", Settings::values.current_user, 0);
661 qt_config->setValue("current_user", Settings::values.current_user); 678 WriteSetting("language_index", Settings::values.language_index, 1);
662 qt_config->setValue("language_index", Settings::values.language_index);
663 679
664 qt_config->setValue("rng_seed_enabled", Settings::values.rng_seed.has_value()); 680 WriteSetting("rng_seed_enabled", Settings::values.rng_seed.has_value(), false);
665 qt_config->setValue("rng_seed", Settings::values.rng_seed.value_or(0)); 681 WriteSetting("rng_seed", Settings::values.rng_seed.value_or(0), 0);
666 682
667 qt_config->setValue("custom_rtc_enabled", Settings::values.custom_rtc.has_value()); 683 WriteSetting("custom_rtc_enabled", Settings::values.custom_rtc.has_value(), false);
668 qt_config->setValue("custom_rtc", 684 WriteSetting("custom_rtc",
669 QVariant::fromValue<long long>( 685 QVariant::fromValue<long long>(
670 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count())); 686 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
687 0);
671 688
672 qt_config->endGroup(); 689 qt_config->endGroup();
673 690
674 qt_config->beginGroup("Miscellaneous"); 691 qt_config->beginGroup("Miscellaneous");
675 qt_config->setValue("log_filter", QString::fromStdString(Settings::values.log_filter)); 692 WriteSetting("log_filter", QString::fromStdString(Settings::values.log_filter), "*:Info");
676 qt_config->setValue("use_dev_keys", Settings::values.use_dev_keys); 693 WriteSetting("use_dev_keys", Settings::values.use_dev_keys, false);
677 qt_config->endGroup(); 694 qt_config->endGroup();
678 695
679 qt_config->beginGroup("Debugging"); 696 qt_config->beginGroup("Debugging");
680 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub); 697 WriteSetting("use_gdbstub", Settings::values.use_gdbstub, false);
681 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port); 698 WriteSetting("gdbstub_port", Settings::values.gdbstub_port, 24689);
682 qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args)); 699 WriteSetting("program_args", QString::fromStdString(Settings::values.program_args), "");
683 qt_config->setValue("dump_exefs", Settings::values.dump_exefs); 700 WriteSetting("dump_exefs", Settings::values.dump_exefs, false);
684 qt_config->setValue("dump_nso", Settings::values.dump_nso); 701 WriteSetting("dump_nso", Settings::values.dump_nso, false);
685 qt_config->endGroup(); 702 qt_config->endGroup();
686 703
687 qt_config->beginGroup("WebService"); 704 qt_config->beginGroup("WebService");
688 qt_config->setValue("enable_telemetry", Settings::values.enable_telemetry); 705 WriteSetting("enable_telemetry", Settings::values.enable_telemetry, true);
689 qt_config->setValue("web_api_url", QString::fromStdString(Settings::values.web_api_url)); 706 WriteSetting("web_api_url", QString::fromStdString(Settings::values.web_api_url),
690 qt_config->setValue("yuzu_username", QString::fromStdString(Settings::values.yuzu_username)); 707 "https://api.yuzu-emu.org");
691 qt_config->setValue("yuzu_token", QString::fromStdString(Settings::values.yuzu_token)); 708 WriteSetting("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
709 WriteSetting("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
692 qt_config->endGroup(); 710 qt_config->endGroup();
693 711
694 qt_config->beginWriteArray("DisabledAddOns"); 712 qt_config->beginWriteArray("DisabledAddOns");
695 int i = 0; 713 int i = 0;
696 for (const auto& elem : Settings::values.disabled_addons) { 714 for (const auto& elem : Settings::values.disabled_addons) {
697 qt_config->setArrayIndex(i); 715 qt_config->setArrayIndex(i);
698 qt_config->setValue("title_id", QVariant::fromValue<u64>(elem.first)); 716 WriteSetting("title_id", QVariant::fromValue<u64>(elem.first), 0);
699 qt_config->beginWriteArray("disabled"); 717 qt_config->beginWriteArray("disabled");
700 for (std::size_t j = 0; j < elem.second.size(); ++j) { 718 for (std::size_t j = 0; j < elem.second.size(); ++j) {
701 qt_config->setArrayIndex(static_cast<int>(j)); 719 qt_config->setArrayIndex(static_cast<int>(j));
702 qt_config->setValue("d", QString::fromStdString(elem.second[j])); 720 WriteSetting("d", QString::fromStdString(elem.second[j]), "");
703 } 721 }
704 qt_config->endArray(); 722 qt_config->endArray();
705 ++i; 723 ++i;
@@ -707,60 +725,86 @@ void Config::SaveValues() {
707 qt_config->endArray(); 725 qt_config->endArray();
708 726
709 qt_config->beginGroup("UI"); 727 qt_config->beginGroup("UI");
710 qt_config->setValue("theme", UISettings::values.theme); 728 WriteSetting("theme", UISettings::values.theme, UISettings::themes[0].second);
711 qt_config->setValue("enable_discord_presence", UISettings::values.enable_discord_presence); 729 WriteSetting("enable_discord_presence", UISettings::values.enable_discord_presence, true);
712 qt_config->setValue("screenshot_resolution_factor", 730 WriteSetting("screenshot_resolution_factor", UISettings::values.screenshot_resolution_factor,
713 UISettings::values.screenshot_resolution_factor); 731 0);
714 qt_config->setValue("select_user_on_boot", UISettings::values.select_user_on_boot); 732 WriteSetting("select_user_on_boot", UISettings::values.select_user_on_boot, false);
715 733
716 qt_config->beginGroup("UIGameList"); 734 qt_config->beginGroup("UIGameList");
717 qt_config->setValue("show_unknown", UISettings::values.show_unknown); 735 WriteSetting("show_unknown", UISettings::values.show_unknown, true);
718 qt_config->setValue("show_add_ons", UISettings::values.show_add_ons); 736 WriteSetting("show_add_ons", UISettings::values.show_add_ons, true);
719 qt_config->setValue("icon_size", UISettings::values.icon_size); 737 WriteSetting("icon_size", UISettings::values.icon_size, 64);
720 qt_config->setValue("row_1_text_id", UISettings::values.row_1_text_id); 738 WriteSetting("row_1_text_id", UISettings::values.row_1_text_id, 3);
721 qt_config->setValue("row_2_text_id", UISettings::values.row_2_text_id); 739 WriteSetting("row_2_text_id", UISettings::values.row_2_text_id, 2);
722 qt_config->endGroup(); 740 qt_config->endGroup();
723 741
724 qt_config->beginGroup("UILayout"); 742 qt_config->beginGroup("UILayout");
725 qt_config->setValue("geometry", UISettings::values.geometry); 743 WriteSetting("geometry", UISettings::values.geometry);
726 qt_config->setValue("state", UISettings::values.state); 744 WriteSetting("state", UISettings::values.state);
727 qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry); 745 WriteSetting("geometryRenderWindow", UISettings::values.renderwindow_geometry);
728 qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state); 746 WriteSetting("gameListHeaderState", UISettings::values.gamelist_header_state);
729 qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry); 747 WriteSetting("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
730 qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible); 748 WriteSetting("microProfileDialogVisible", UISettings::values.microprofile_visible, false);
731 qt_config->endGroup(); 749 qt_config->endGroup();
732 750
733 qt_config->beginGroup("Paths"); 751 qt_config->beginGroup("Paths");
734 qt_config->setValue("romsPath", UISettings::values.roms_path); 752 WriteSetting("romsPath", UISettings::values.roms_path);
735 qt_config->setValue("symbolsPath", UISettings::values.symbols_path); 753 WriteSetting("symbolsPath", UISettings::values.symbols_path);
736 qt_config->setValue("screenshotPath", UISettings::values.screenshot_path); 754 WriteSetting("screenshotPath", UISettings::values.screenshot_path);
737 qt_config->setValue("gameListRootDir", UISettings::values.gamedir); 755 WriteSetting("gameListRootDir", UISettings::values.gamedir, ".");
738 qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan); 756 WriteSetting("gameListDeepScan", UISettings::values.gamedir_deepscan, false);
739 qt_config->setValue("recentFiles", UISettings::values.recent_files); 757 WriteSetting("recentFiles", UISettings::values.recent_files);
740 qt_config->endGroup(); 758 qt_config->endGroup();
741 759
742 qt_config->beginGroup("Shortcuts"); 760 qt_config->beginGroup("Shortcuts");
743 for (auto shortcut : UISettings::values.shortcuts) { 761 for (auto shortcut : UISettings::values.shortcuts) {
744 qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first); 762 WriteSetting(shortcut.first + "/KeySeq", shortcut.second.first);
745 qt_config->setValue(shortcut.first + "/Context", shortcut.second.second); 763 WriteSetting(shortcut.first + "/Context", shortcut.second.second);
746 } 764 }
747 qt_config->endGroup(); 765 qt_config->endGroup();
748 766
749 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode); 767 WriteSetting("singleWindowMode", UISettings::values.single_window_mode, true);
750 qt_config->setValue("fullscreen", UISettings::values.fullscreen); 768 WriteSetting("fullscreen", UISettings::values.fullscreen, false);
751 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar); 769 WriteSetting("displayTitleBars", UISettings::values.display_titlebar, true);
752 qt_config->setValue("showFilterBar", UISettings::values.show_filter_bar); 770 WriteSetting("showFilterBar", UISettings::values.show_filter_bar, true);
753 qt_config->setValue("showStatusBar", UISettings::values.show_status_bar); 771 WriteSetting("showStatusBar", UISettings::values.show_status_bar, true);
754 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing); 772 WriteSetting("confirmClose", UISettings::values.confirm_before_closing, true);
755 qt_config->setValue("firstStart", UISettings::values.first_start); 773 WriteSetting("firstStart", UISettings::values.first_start, true);
756 qt_config->setValue("calloutFlags", UISettings::values.callout_flags); 774 WriteSetting("calloutFlags", UISettings::values.callout_flags, 0);
757 qt_config->setValue("showConsole", UISettings::values.show_console); 775 WriteSetting("showConsole", UISettings::values.show_console, false);
758 qt_config->setValue("profileIndex", UISettings::values.profile_index); 776 WriteSetting("profileIndex", UISettings::values.profile_index, 0);
759 qt_config->endGroup(); 777 qt_config->endGroup();
760} 778}
761 779
780QVariant Config::ReadSetting(const QString& name) const {
781 return qt_config->value(name);
782}
783
784QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) const {
785 QVariant result;
786 if (qt_config->value(name + "/default", false).toBool()) {
787 result = default_value;
788 } else {
789 result = qt_config->value(name, default_value);
790 }
791 return result;
792}
793
794void Config::WriteSetting(const QString& name, const QVariant& value) {
795 qt_config->setValue(name, value);
796}
797
798void Config::WriteSetting(const QString& name, const QVariant& value,
799 const QVariant& default_value) {
800 qt_config->setValue(name + "/default", value == default_value);
801 qt_config->setValue(name, value);
802}
803
762void Config::Reload() { 804void Config::Reload() {
763 ReadValues(); 805 ReadValues();
806 // To apply default value changes
807 SaveValues();
764 Settings::Apply(); 808 Settings::Apply();
765} 809}
766 810
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index e73ad19bb..f4185db18 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,6 +42,11 @@ private:
42 void SaveMouseValues(); 42 void SaveMouseValues();
43 void SaveTouchscreenValues(); 43 void SaveTouchscreenValues();
44 44
45 QVariant ReadSetting(const QString& name) const;
46 QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
47 void WriteSetting(const QString& name, const QVariant& value);
48 void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
49
45 std::unique_ptr<QSettings> qt_config; 50 std::unique_ptr<QSettings> qt_config;
46 std::string qt_config_loc; 51 std::string qt_config_loc;
47}; 52};
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 4116b6cd7..389fcf667 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -33,7 +33,6 @@ void ConfigureGeneral::setConfiguration() {
33 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); 33 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
34 ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); 34 ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
35 ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit); 35 ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
36 ui->enable_nfc->setChecked(Settings::values.enable_nfc);
37} 36}
38 37
39void ConfigureGeneral::PopulateHotkeyList(const HotkeyRegistry& registry) { 38void ConfigureGeneral::PopulateHotkeyList(const HotkeyRegistry& registry) {
@@ -48,5 +47,4 @@ void ConfigureGeneral::applyConfiguration() {
48 ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); 47 ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
49 48
50 Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked(); 49 Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
51 Settings::values.enable_nfc = ui->enable_nfc->isChecked();
52} 50}
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index dff0ad5d0..01d1c0b8e 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -71,26 +71,6 @@
71 </widget> 71 </widget>
72 </item> 72 </item>
73 <item> 73 <item>
74 <widget class="QGroupBox" name="EmulationGroupBox">
75 <property name="title">
76 <string>Emulation</string>
77 </property>
78 <layout class="QHBoxLayout" name="EmulationHorizontalLayout">
79 <item>
80 <layout class="QVBoxLayout" name="EmulationVerticalLayout">
81 <item>
82 <widget class="QCheckBox" name="enable_nfc">
83 <property name="text">
84 <string>Enable NFC</string>
85 </property>
86 </widget>
87 </item>
88 </layout>
89 </item>
90 </layout>
91 </widget>
92 </item>
93 <item>
94 <widget class="QGroupBox" name="theme_group_box"> 74 <widget class="QGroupBox" name="theme_group_box">
95 <property name="title"> 75 <property name="title">
96 <string>Theme</string> 76 <string>Theme</string>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 0f5dd534b..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
75 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
78 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
79 Settings::values.bg_blue)); 81 Settings::values.bg_blue));
80} 82}
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
86 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
87 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
88 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
89 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
90 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
91 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 824f5810a..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -64,6 +64,13 @@
64 </widget> 64 </widget>
65 </item> 65 </item>
66 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
67 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
68 <item> 75 <item>
69 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 209798521..11023ed63 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -261,7 +261,7 @@ void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) {
261 261
262void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) { 262void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) {
263 if (surface_address != new_value) { 263 if (surface_address != new_value) {
264 surface_address = static_cast<Tegra::GPUVAddr>(new_value); 264 surface_address = static_cast<GPUVAddr>(new_value);
265 265
266 surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom)); 266 surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
267 emit Update(); 267 emit Update();
@@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
383 // TODO: Implement a good way to visualize alpha components! 383 // TODO: Implement a good way to visualize alpha components!
384 384
385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); 385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
386 std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
387 386
388 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. 387 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
389 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. 388 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
390 auto unswizzled_data = Tegra::Texture::UnswizzleTexture( 389 auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
391 *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, 390 gpu.MemoryManager().GetPointer(surface_address), 1, 1,
392 surface_height, 1U); 391 Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);
393 392
394 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, 393 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
395 surface_width, surface_height); 394 surface_width, surface_height);
@@ -398,7 +397,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
398 397
399 for (unsigned int y = 0; y < surface_height; ++y) { 398 for (unsigned int y = 0; y < surface_height; ++y) {
400 for (unsigned int x = 0; x < surface_width; ++x) { 399 for (unsigned int x = 0; x < surface_width; ++x) {
401 Math::Vec4<u8> color; 400 Common::Vec4<u8> color;
402 color[0] = texture_data[x + y * surface_width + 0]; 401 color[0] = texture_data[x + y * surface_width + 0];
403 color[1] = texture_data[x + y * surface_width + 1]; 402 color[1] = texture_data[x + y * surface_width + 1];
404 color[2] = texture_data[x + y * surface_width + 2]; 403 color[2] = texture_data[x + y * surface_width + 2];
diff --git a/src/yuzu/debugger/graphics/graphics_surface.h b/src/yuzu/debugger/graphics/graphics_surface.h
index 323e39d94..89445b18f 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.h
+++ b/src/yuzu/debugger/graphics/graphics_surface.h
@@ -87,7 +87,7 @@ private:
87 QPushButton* save_surface; 87 QPushButton* save_surface;
88 88
89 Source surface_source; 89 Source surface_source;
90 Tegra::GPUVAddr surface_address; 90 GPUVAddr surface_address;
91 unsigned surface_width; 91 unsigned surface_width;
92 unsigned surface_height; 92 unsigned surface_height;
93 Tegra::Texture::TextureFormat surface_format; 93 Tegra::Texture::TextureFormat surface_format;
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 8b30e0a85..86e03e46d 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -7,6 +7,7 @@
7#include <QMouseEvent> 7#include <QMouseEvent>
8#include <QPainter> 8#include <QPainter>
9#include <QString> 9#include <QString>
10#include <QTimer>
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "common/microprofile.h" 12#include "common/microprofile.h"
12#include "yuzu/debugger/profiler.h" 13#include "yuzu/debugger/profiler.h"
diff --git a/src/yuzu/debugger/profiler.h b/src/yuzu/debugger/profiler.h
index eae1e9e3c..8e69fdb06 100644
--- a/src/yuzu/debugger/profiler.h
+++ b/src/yuzu/debugger/profiler.h
@@ -4,10 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <QAbstractItemModel> 7#include <QWidget>
8#include <QDockWidget> 8
9#include <QTimer> 9class QAction;
10#include "common/microprofile.h" 10class QHideEvent;
11class QShowEvent;
11 12
12class MicroProfileDialog : public QWidget { 13class MicroProfileDialog : public QWidget {
13 Q_OBJECT 14 Q_OBJECT
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index f50225d5f..593bb681f 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const {
81 return text; 81 return text;
82} 82}
83 83
84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { 84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
85 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 85 : mutex_address(mutex_address) {
86
87 mutex_value = Memory::Read32(mutex_address); 86 mutex_value = Memory::Read32(mutex_address);
88 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); 87 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
89 owner = handle_table.Get<Kernel::Thread>(owner_handle); 88 owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -235,6 +234,9 @@ QString WaitTreeThread::GetText() const {
235 case Kernel::ThreadStatus::WaitMutex: 234 case Kernel::ThreadStatus::WaitMutex:
236 status = tr("waiting for mutex"); 235 status = tr("waiting for mutex");
237 break; 236 break;
237 case Kernel::ThreadStatus::WaitCondVar:
238 status = tr("waiting for condition variable");
239 break;
238 case Kernel::ThreadStatus::WaitArb: 240 case Kernel::ThreadStatus::WaitArb:
239 status = tr("waiting for address arbiter"); 241 status = tr("waiting for address arbiter");
240 break; 242 break;
@@ -270,6 +272,7 @@ QColor WaitTreeThread::GetColor() const {
270 case Kernel::ThreadStatus::WaitSynchAll: 272 case Kernel::ThreadStatus::WaitSynchAll:
271 case Kernel::ThreadStatus::WaitSynchAny: 273 case Kernel::ThreadStatus::WaitSynchAny:
272 case Kernel::ThreadStatus::WaitMutex: 274 case Kernel::ThreadStatus::WaitMutex:
275 case Kernel::ThreadStatus::WaitCondVar:
273 case Kernel::ThreadStatus::WaitArb: 276 case Kernel::ThreadStatus::WaitArb:
274 return QColor(Qt::GlobalColor::red); 277 return QColor(Qt::GlobalColor::red);
275 case Kernel::ThreadStatus::Dormant: 278 case Kernel::ThreadStatus::Dormant:
@@ -316,7 +319,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
316 319
317 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 320 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
318 if (mutex_wait_address != 0) { 321 if (mutex_wait_address != 0) {
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); 322 const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
323 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
320 } else { 324 } else {
321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); 325 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
322 } 326 }
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 365c3dbfe..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,6 +17,7 @@
17class EmuThread; 17class EmuThread;
18 18
19namespace Kernel { 19namespace Kernel {
20class HandleTable;
20class ReadableEvent; 21class ReadableEvent;
21class WaitObject; 22class WaitObject;
22class Thread; 23class Thread;
@@ -72,7 +73,7 @@ public:
72class WaitTreeMutexInfo : public WaitTreeExpandableItem { 73class WaitTreeMutexInfo : public WaitTreeExpandableItem {
73 Q_OBJECT 74 Q_OBJECT
74public: 75public:
75 explicit WaitTreeMutexInfo(VAddr mutex_address); 76 explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
76 ~WaitTreeMutexInfo() override; 77 ~WaitTreeMutexInfo() override;
77 78
78 QString GetText() const override; 79 QString GetText() const override;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index beba519b4..9efe626d0 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,6 +11,7 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
@@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
339 .arg(QString::fromStdString(std::to_string(key_code)))); 340 .arg(QString::fromStdString(std::to_string(key_code))));
340 }; 341 };
341 342
343 QMessageBox::information(
344 this, tr("Exit"),
345 tr("To exit the web application, use the game provided controls to select exit, select the "
346 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
347
342 bool running_exit_check = false; 348 bool running_exit_check = false;
343 while (!finished) { 349 while (!finished) {
344 QApplication::processEvents(); 350 QApplication::processEvents();
@@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() {
522 Qt::ApplicationShortcut); 528 Qt::ApplicationShortcut);
523 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", 529 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
524 QKeySequence(QKeySequence::Print)); 530 QKeySequence(QKeySequence::Print));
531 hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10));
525 532
526 hotkey_registry.LoadHotkeys(); 533 hotkey_registry.LoadHotkeys();
527 534
@@ -561,7 +568,10 @@ void GMainWindow::InitializeHotkeys() {
561 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 568 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
562 UpdateStatusBar(); 569 UpdateStatusBar();
563 }); 570 });
564 constexpr u16 SPEED_LIMIT_STEP = 5; 571 // TODO: Remove this comment/static whenever the next major release of
572 // MSVC occurs and we make it a requirement (see:
573 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
574 static constexpr u16 SPEED_LIMIT_STEP = 5;
565 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), 575 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
566 &QShortcut::activated, this, [&] { 576 &QShortcut::activated, this, [&] {
567 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 577 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -588,6 +598,12 @@ void GMainWindow::InitializeHotkeys() {
588 OnCaptureScreenshot(); 598 OnCaptureScreenshot();
589 } 599 }
590 }); 600 });
601 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
602 &QShortcut::activated, this, [&] {
603 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
604 OnDockedModeChanged(!Settings::values.use_docked_mode,
605 Settings::values.use_docked_mode);
606 });
591} 607}
592 608
593void GMainWindow::SetDefaultUIGeometry() { 609void GMainWindow::SetDefaultUIGeometry() {
@@ -848,7 +864,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
848 } 864 }
849 game_path = filename; 865 game_path = filename;
850 866
851 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 867 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
852 return true; 868 return true;
853} 869}
854 870
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ff05b3179..f24cc77fe 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -319,7 +319,6 @@ void Config::ReadValues() {
319 319
320 // System 320 // System
321 Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false); 321 Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
322 Settings::values.enable_nfc = sdl2_config->GetBoolean("System", "enable_nfc", true);
323 const auto size = sdl2_config->GetInteger("System", "users_size", 0); 322 const auto size = sdl2_config->GetInteger("System", "users_size", 0);
324 323
325 Settings::values.current_user = std::clamp<int>( 324 Settings::values.current_user = std::clamp<int>(
@@ -346,7 +345,7 @@ void Config::ReadValues() {
346 345
347 // Renderer 346 // Renderer
348 Settings::values.resolution_factor = 347 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 348 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 349 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 350 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 351 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -354,17 +353,20 @@ void Config::ReadValues() {
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 353 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
355 Settings::values.use_accurate_gpu_emulation = 354 Settings::values.use_accurate_gpu_emulation =
356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 355 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
356 Settings::values.use_asynchronous_gpu_emulation =
357 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
357 358
358 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 359 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
359 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 360 Settings::values.bg_green =
360 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 361 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
362 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
361 363
362 // Audio 364 // Audio
363 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 365 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
364 Settings::values.enable_audio_stretching = 366 Settings::values.enable_audio_stretching =
365 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 367 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
366 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 368 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
367 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 369 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
368 370
369 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 371 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
370 372
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index a81986f8e..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -118,6 +118,10 @@ use_disk_shader_cache =
118# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
119use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
120 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
121# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
122# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
123bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7df8eff53..de7a26e14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -135,16 +135,16 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
135} 135}
136 136
137EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { 137EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
138 InputCommon::Init();
139
140 SDL_SetMainReady();
141
142 // Initialize the window 138 // Initialize the window
143 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { 139 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
144 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); 140 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
145 exit(1); 141 exit(1);
146 } 142 }
147 143
144 InputCommon::Init();
145
146 SDL_SetMainReady();
147
148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); 148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
149 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); 149 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
150 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); 150 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
@@ -201,11 +201,9 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
201} 201}
202 202
203EmuWindow_SDL2::~EmuWindow_SDL2() { 203EmuWindow_SDL2::~EmuWindow_SDL2() {
204 InputCommon::SDL::CloseSDLJoysticks(); 204 InputCommon::Shutdown();
205 SDL_GL_DeleteContext(gl_context); 205 SDL_GL_DeleteContext(gl_context);
206 SDL_Quit(); 206 SDL_Quit();
207
208 InputCommon::Shutdown();
209} 207}
210 208
211void EmuWindow_SDL2::SwapBuffers() { 209void EmuWindow_SDL2::SwapBuffers() {
@@ -262,7 +260,6 @@ void EmuWindow_SDL2::PollEvents() {
262 is_open = false; 260 is_open = false;
263 break; 261 break;
264 default: 262 default:
265 InputCommon::SDL::HandleGameControllerEvent(event);
266 break; 263 break;
267 } 264 }
268 } 265 }
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c34b5467f..245f25847 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -114,9 +114,9 @@ int main(int argc, char** argv) {
114 }; 114 };
115 115
116 while (optind < argc) { 116 while (optind < argc) {
117 char arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index); 117 int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
118 if (arg != -1) { 118 if (arg != -1) {
119 switch (arg) { 119 switch (static_cast<char>(arg)) {
120 case 'g': 120 case 'g':
121 errno = 0; 121 errno = 0;
122 gdb_port = strtoul(optarg, &endarg, 0); 122 gdb_port = strtoul(optarg, &endarg, 0);
@@ -216,7 +216,7 @@ int main(int argc, char** argv) {
216 } 216 }
217 } 217 }
218 218
219 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
220 220
221 system.Renderer().Rasterizer().LoadDiskResources(); 221 system.Renderer().Rasterizer().LoadDiskResources();
222 222