summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE.md21
-rw-r--r--.gitmodules9
-rw-r--r--.travis.yml2
-rw-r--r--.travis/common/travis-ci.env2
-rwxr-xr-x.travis/macos/build.sh2
-rwxr-xr-x.travis/macos/deps.sh3
-rwxr-xr-x.travis/macos/upload.sh94
-rw-r--r--CMakeLists.txt115
-rw-r--r--CMakeModules/GenerateSCMRev.cmake95
-rw-r--r--README.md2
-rw-r--r--externals/CMakeLists.txt9
m---------externals/Vulkan-Headers0
m---------externals/cubeb0
m---------externals/opus0
m---------externals/sirit0
m---------externals/zstd0
-rw-r--r--src/CMakeLists.txt61
-rw-r--r--src/audio_core/audio_out.cpp5
-rw-r--r--src/audio_core/audio_out.h8
-rw-r--r--src/audio_core/audio_renderer.cpp7
-rw-r--r--src/audio_core/audio_renderer.h20
-rw-r--r--src/audio_core/buffer.h2
-rw-r--r--src/audio_core/codec.cpp4
-rw-r--r--src/audio_core/cubeb_sink.cpp23
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/audio_core/stream.cpp18
-rw-r--r--src/audio_core/stream.h30
-rw-r--r--src/common/CMakeLists.txt106
-rw-r--r--src/common/assert.h18
-rw-r--r--src/common/bit_field.h28
-rw-r--r--src/common/bit_util.h47
-rw-r--r--src/common/color.h40
-rw-r--r--src/common/common_paths.h1
-rw-r--r--src/common/common_types.h7
-rw-r--r--src/common/detached_tasks.cpp8
-rw-r--r--src/common/file_util.cpp1
-rw-r--r--src/common/file_util.h1
-rw-r--r--src/common/logging/backend.cpp76
-rw-r--r--src/common/logging/backend.h5
-rw-r--r--src/common/logging/log.h1
-rw-r--r--src/common/lz4_compression.cpp76
-rw-r--r--src/common/lz4_compression.h55
-rw-r--r--src/common/math_util.h4
-rw-r--r--src/common/memory_hook.cpp (renamed from src/core/memory_hook.cpp)6
-rw-r--r--src/common/memory_hook.h (renamed from src/core/memory_hook.h)4
-rw-r--r--src/common/multi_level_queue.h337
-rw-r--r--src/common/page_table.cpp31
-rw-r--r--src/common/page_table.h84
-rw-r--r--src/common/quaternion.h10
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/scm_rev.h1
-rw-r--r--src/common/scope_exit.h2
-rw-r--r--src/common/swap.h276
-rw-r--r--src/common/thread.cpp37
-rw-r--r--src/common/thread.h14
-rw-r--r--src/common/thread_queue_list.h6
-rw-r--r--src/common/threadsafe_queue.h55
-rw-r--r--src/common/uint128.cpp45
-rw-r--r--src/common/uint128.h19
-rw-r--r--src/common/vector_math.h4
-rw-r--r--src/common/zstd_compression.cpp53
-rw-r--r--src/common/zstd_compression.h42
-rw-r--r--src/core/CMakeLists.txt23
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp23
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h14
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp42
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h12
-rw-r--r--src/core/core.cpp89
-rw-r--r--src/core/core.h32
-rw-r--r--src/core/core_cpu.cpp26
-rw-r--r--src/core/core_cpu.h12
-rw-r--r--src/core/core_timing.cpp201
-rw-r--r--src/core/core_timing.h215
-rw-r--r--src/core/core_timing_util.cpp10
-rw-r--r--src/core/core_timing_util.h7
-rw-r--r--src/core/cpu_core_manager.cpp2
-rw-r--r--src/core/crypto/key_manager.cpp6
-rw-r--r--src/core/file_sys/cheat_engine.cpp492
-rw-r--r--src/core/file_sys/cheat_engine.h234
-rw-r--r--src/core/file_sys/content_archive.h15
-rw-r--r--src/core/file_sys/control_metadata.cpp6
-rw-r--r--src/core/file_sys/control_metadata.h30
-rw-r--r--src/core/file_sys/errors.h3
-rw-r--r--src/core/file_sys/fsmitm_romfsbuild.cpp4
-rw-r--r--src/core/file_sys/nca_metadata.cpp8
-rw-r--r--src/core/file_sys/nca_metadata.h3
-rw-r--r--src/core/file_sys/patch_manager.cpp101
-rw-r--r--src/core/file_sys/patch_manager.h11
-rw-r--r--src/core/file_sys/program_metadata.cpp27
-rw-r--r--src/core/file_sys/program_metadata.h2
-rw-r--r--src/core/file_sys/registered_cache.cpp277
-rw-r--r--src/core/file_sys/registered_cache.h156
-rw-r--r--src/core/file_sys/romfs_factory.cpp2
-rw-r--r--src/core/file_sys/savedata_factory.cpp8
-rw-r--r--src/core/file_sys/savedata_factory.h11
-rw-r--r--src/core/file_sys/submission_package.cpp13
-rw-r--r--src/core/file_sys/submission_package.h11
-rw-r--r--src/core/file_sys/system_archive/system_archive.cpp3
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp52
-rw-r--r--src/core/file_sys/system_archive/system_version.h16
-rw-r--r--src/core/file_sys/vfs_vector.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp8
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/frontend/framebuffer_layout.cpp12
-rw-r--r--src/core/frontend/framebuffer_layout.h2
-rw-r--r--src/core/frontend/input.h2
-rw-r--r--src/core/frontend/scope_acquire_window_context.cpp18
-rw-r--r--src/core/frontend/scope_acquire_window_context.h23
-rw-r--r--src/core/gdbstub/gdbstub.cpp16
-rw-r--r--src/core/hle/ipc.h48
-rw-r--r--src/core/hle/ipc_helpers.h75
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp187
-rw-r--r--src/core/hle/kernel/address_arbiter.h80
-rw-r--r--src/core/hle/kernel/client_port.cpp15
-rw-r--r--src/core/hle/kernel/client_port.h2
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h11
-rw-r--r--src/core/hle/kernel/code_set.cpp12
-rw-r--r--src/core/hle/kernel/code_set.h89
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/handle_table.cpp40
-rw-r--r--src/core/hle/kernel/handle_table.h25
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/kernel.cpp71
-rw-r--r--src/core/hle/kernel/kernel.h36
-rw-r--r--src/core/hle/kernel/mutex.cpp35
-rw-r--r--src/core/hle/kernel/mutex.h20
-rw-r--r--src/core/hle/kernel/object.cpp3
-rw-r--r--src/core/hle/kernel/object.h3
-rw-r--r--src/core/hle/kernel/process.cpp78
-rw-r--r--src/core/hle/kernel/process.h123
-rw-r--r--src/core/hle/kernel/process_capability.cpp4
-rw-r--r--src/core/hle/kernel/process_capability.h4
-rw-r--r--src/core/hle/kernel/readable_event.cpp6
-rw-r--r--src/core/hle/kernel/readable_event.h6
-rw-r--r--src/core/hle/kernel/resource_limit.cpp7
-rw-r--r--src/core/hle/kernel/resource_limit.h13
-rw-r--r--src/core/hle/kernel/scheduler.cpp79
-rw-r--r--src/core/hle/kernel/scheduler.h12
-rw-r--r--src/core/hle/kernel/server_port.cpp13
-rw-r--r--src/core/hle/kernel/server_port.h44
-rw-r--r--src/core/hle/kernel/server_session.cpp98
-rw-r--r--src/core/hle/kernel/server_session.h64
-rw-r--r--src/core/hle/kernel/shared_memory.cpp16
-rw-r--r--src/core/hle/kernel/shared_memory.h12
-rw-r--r--src/core/hle/kernel/svc.cpp668
-rw-r--r--src/core/hle/kernel/svc.h6
-rw-r--r--src/core/hle/kernel/svc_wrap.h350
-rw-r--r--src/core/hle/kernel/thread.cpp113
-rw-r--r--src/core/hle/kernel/thread.h34
-rw-r--r--src/core/hle/kernel/timer.cpp88
-rw-r--r--src/core/hle/kernel/timer.h90
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp81
-rw-r--r--src/core/hle/kernel/transfer_memory.h103
-rw-r--r--src/core/hle/kernel/vm_manager.cpp145
-rw-r--r--src/core/hle/kernel/vm_manager.h93
-rw-r--r--src/core/hle/kernel/wait_object.h8
-rw-r--r--src/core/hle/kernel/writable_event.h2
-rw-r--r--src/core/hle/result.h23
-rw-r--r--src/core/hle/service/am/am.cpp192
-rw-r--r--src/core/hle/service/am/am.h31
-rw-r--r--src/core/hle/service/am/applet_ae.cpp3
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/am/applets/web_browser.cpp2
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp4
-rw-r--r--src/core/hle/service/audio/audin_u.cpp17
-rw-r--r--src/core/hle/service/audio/audout_u.cpp31
-rw-r--r--src/core/hle/service/audio/audrec_u.cpp10
-rw-r--r--src/core/hle/service/audio/audren_u.cpp96
-rw-r--r--src/core/hle/service/audio/audren_u.h5
-rw-r--r--src/core/hle/service/audio/errors.h15
-rw-r--r--src/core/hle/service/audio/hwopus.cpp257
-rw-r--r--src/core/hle/service/btdrv/btdrv.cpp147
-rw-r--r--src/core/hle/service/btm/btm.cpp152
-rw-r--r--src/core/hle/service/fatal/fatal.cpp89
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp30
-rw-r--r--src/core/hle/service/filesystem/filesystem.h4
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp126
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h11
-rw-r--r--src/core/hle/service/hid/controllers/controller_base.h7
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.h32
-rw-r--r--src/core/hle/service/hid/controllers/gesture.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/gesture.h2
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.h2
-rw-r--r--src/core/hle/service/hid/controllers/mouse.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/mouse.h2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/npad.h104
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.h2
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.cpp7
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.h6
-rw-r--r--src/core/hle/service/hid/controllers/xpad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/xpad.h2
-rw-r--r--src/core/hle/service/hid/hid.cpp21
-rw-r--r--src/core/hle/service/hid/hid.h11
-rw-r--r--src/core/hle/service/hid/irs.cpp2
-rw-r--r--src/core/hle/service/ldr/ldr.cpp8
-rw-r--r--src/core/hle/service/lm/lm.cpp2
-rw-r--r--src/core/hle/service/ncm/ncm.cpp8
-rw-r--r--src/core/hle/service/nfc/nfc.cpp2
-rw-r--r--src/core/hle/service/nfp/nfp.cpp2
-rw-r--r--src/core/hle/service/ns/ns.cpp34
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp17
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvdrv/interface.h2
-rw-r--r--src/core/hle/service/nvdrv/nvmemp.h2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp162
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h88
-rw-r--r--src/core/hle/service/pm/pm.cpp15
-rw-r--r--src/core/hle/service/pm/pm.h7
-rw-r--r--src/core/hle/service/psc/psc.cpp17
-rw-r--r--src/core/hle/service/service.cpp15
-rw-r--r--src/core/hle/service/service.h17
-rw-r--r--src/core/hle/service/set/set_cal.h2
-rw-r--r--src/core/hle/service/set/set_sys.cpp79
-rw-r--r--src/core/hle/service/set/set_sys.h2
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/sm/sm.h2
-rw-r--r--src/core/hle/service/sockets/sfdnsres.cpp12
-rw-r--r--src/core/hle/service/spl/module.cpp4
-rw-r--r--src/core/hle/service/ssl/ssl.cpp10
-rw-r--r--src/core/hle/service/time/time.cpp9
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp71
-rw-r--r--src/core/hle/service/vi/display/vi_display.h98
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.cpp13
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.h52
-rw-r--r--src/core/hle/service/vi/vi.cpp139
-rw-r--r--src/core/hle/service/vi/vi.h40
-rw-r--r--src/core/hle/service/vi/vi_m.cpp12
-rw-r--r--src/core/hle/service/vi/vi_m.h19
-rw-r--r--src/core/hle/service/vi/vi_s.cpp12
-rw-r--r--src/core/hle/service/vi/vi_s.h19
-rw-r--r--src/core/hle/service/vi/vi_u.cpp12
-rw-r--r--src/core/hle/service/vi/vi_u.h19
-rw-r--r--src/core/loader/elf.cpp3
-rw-r--r--src/core/loader/linker.cpp147
-rw-r--r--src/core/loader/linker.h36
-rw-r--r--src/core/loader/nro.cpp3
-rw-r--r--src/core/loader/nro.h4
-rw-r--r--src/core/loader/nso.cpp121
-rw-r--r--src/core/loader/nso.h43
-rw-r--r--src/core/loader/xci.h2
-rw-r--r--src/core/memory.cpp243
-rw-r--r--src/core/memory.h94
-rw-r--r--src/core/memory_setup.h19
-rw-r--r--src/core/perf_stats.cpp10
-rw-r--r--src/core/settings.cpp31
-rw-r--r--src/core/settings.h4
-rw-r--r--src/core/telemetry_session.cpp4
-rw-r--r--src/input_common/CMakeLists.txt15
-rw-r--r--src/input_common/keyboard.cpp8
-rw-r--r--src/input_common/main.cpp23
-rw-r--r--src/input_common/main.h2
-rw-r--r--src/input_common/motion_emu.cpp38
-rw-r--r--src/input_common/sdl/sdl.cpp636
-rw-r--r--src/input_common/sdl/sdl.h53
-rw-r--r--src/input_common/sdl/sdl_impl.cpp671
-rw-r--r--src/input_common/sdl/sdl_impl.h63
-rw-r--r--src/tests/CMakeLists.txt3
-rw-r--r--src/tests/common/bit_field.cpp90
-rw-r--r--src/tests/common/bit_utils.cpp23
-rw-r--r--src/tests/common/multi_level_queue.cpp55
-rw-r--r--src/tests/core/arm/arm_test_common.cpp14
-rw-r--r--src/tests/core/arm/arm_test_common.h8
-rw-r--r--src/tests/core/core_timing.cpp220
-rw-r--r--src/video_core/CMakeLists.txt77
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.h4
-rw-r--r--src/video_core/dma_pusher.cpp55
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/fermi_2d.cpp70
-rw-r--r--src/video_core/engines/fermi_2d.h39
-rw-r--r--src/video_core/engines/kepler_compute.cpp33
-rw-r--r--src/video_core/engines/kepler_compute.h (renamed from src/video_core/engines/maxwell_compute.h)38
-rw-r--r--src/video_core/engines/kepler_memory.cpp23
-rw-r--r--src/video_core/engines/kepler_memory.h17
-rw-r--r--src/video_core/engines/maxwell_3d.cpp185
-rw-r--r--src/video_core/engines/maxwell_3d.h50
-rw-r--r--src/video_core/engines/maxwell_compute.cpp28
-rw-r--r--src/video_core/engines/maxwell_dma.cpp44
-rw-r--r--src/video_core/engines/maxwell_dma.h20
-rw-r--r--src/video_core/engines/shader_bytecode.h75
-rw-r--r--src/video_core/engines/shader_header.h43
-rw-r--r--src/video_core/gpu.cpp203
-rw-r--r--src/video_core/gpu.h129
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp121
-rw-r--r--src/video_core/gpu_thread.h173
-rw-r--r--src/video_core/macro_interpreter.cpp20
-rw-r--r--src/video_core/memory_manager.cpp556
-rw-r--r--src/video_core/memory_manager.h171
-rw-r--r--src/video_core/morton.cpp324
-rw-r--r--src/video_core/morton.h6
-rw-r--r--src/video_core/rasterizer_cache.h92
-rw-r--r--src/video_core/rasterizer_interface.h23
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h31
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp452
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h76
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp828
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h238
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp561
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h144
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp4837
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp624
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h245
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp118
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h165
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h5
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp686
-rw-r--r--src/video_core/renderer_opengl/gl_state.h64
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp134
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h23
-rw-r--r--src/video_core/renderer_opengl/utils.cpp28
-rw-r--r--src/video_core/renderer_opengl/utils.h20
-rw-r--r--src/video_core/renderer_vulkan/declarations.h45
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp123
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h103
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp238
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h116
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp252
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp285
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h180
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h56
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h69
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp1379
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h80
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp210
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h92
-rw-r--r--src/video_core/shader/decode.cpp209
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp155
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp70
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp51
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp52
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp287
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp96
-rw-r--r--src/video_core/shader/decode/bfe.cpp49
-rw-r--r--src/video_core/shader/decode/bfi.cpp41
-rw-r--r--src/video_core/shader/decode/conversion.cpp149
-rw-r--r--src/video_core/shader/decode/decode_integer_set.cpp0
-rw-r--r--src/video_core/shader/decode/ffma.cpp59
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp56
-rw-r--r--src/video_core/shader/decode/half_set.cpp67
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp62
-rw-r--r--src/video_core/shader/decode/hfma2.cpp77
-rw-r--r--src/video_core/shader/decode/integer_set.cpp50
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp239
-rw-r--r--src/video_core/shader/decode/other.cpp189
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp67
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp51
-rw-r--r--src/video_core/shader/decode/shift.cpp55
-rw-r--r--src/video_core/shader/decode/texture.cpp598
-rw-r--r--src/video_core/shader/decode/video.cpp111
-rw-r--r--src/video_core/shader/decode/xmad.cpp119
-rw-r--r--src/video_core/shader/shader_ir.cpp444
-rw-r--r--src/video_core/shader/shader_ir.h842
-rw-r--r--src/video_core/shader/track.cpp102
-rw-r--r--src/video_core/surface.cpp22
-rw-r--r--src/video_core/surface.h5
-rw-r--r--src/video_core/texture_cache.cpp386
-rw-r--r--src/video_core/texture_cache.h586
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h21
-rw-r--r--src/video_core/textures/decoders.cpp38
-rw-r--r--src/video_core/textures/decoders.h31
-rw-r--r--src/video_core/textures/texture.h88
-rw-r--r--src/video_core/video_core.cpp12
-rw-r--r--src/video_core/video_core.h7
-rw-r--r--src/web_service/verify_login.h2
-rw-r--r--src/web_service/web_backend.cpp5
-rw-r--r--src/yuzu/CMakeLists.txt8
-rw-r--r--src/yuzu/applets/profile_select.cpp8
-rw-r--r--src/yuzu/applets/profile_select.h2
-rw-r--r--src/yuzu/applets/software_keyboard.cpp18
-rw-r--r--src/yuzu/applets/web_browser.cpp6
-rw-r--r--src/yuzu/bootmanager.cpp24
-rw-r--r--src/yuzu/bootmanager.h10
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/config.cpp492
-rw-r--r--src/yuzu/configuration/config.h8
-rw-r--r--src/yuzu/configuration/configure.ui19
-rw-r--r--src/yuzu/configuration/configure_debug.cpp1
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp17
-rw-r--r--src/yuzu/configuration/configure_dialog.h3
-rw-r--r--src/yuzu/configuration/configure_general.cpp10
-rw-r--r--src/yuzu/configuration/configure_general.h1
-rw-r--r--src/yuzu/configuration/configure_general.ui44
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp26
-rw-r--r--src/yuzu/configuration/configure_graphics.h2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui14
-rw-r--r--src/yuzu/configuration/configure_hotkeys.cpp121
-rw-r--r--src/yuzu/configuration/configure_hotkeys.h48
-rw-r--r--src/yuzu/configuration/configure_hotkeys.ui42
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp1
-rw-r--r--src/yuzu/configuration/configure_input_player.h8
-rw-r--r--src/yuzu/configuration/configure_per_general.cpp1
-rw-r--r--src/yuzu/configuration/configure_per_general.h6
-rw-r--r--src/yuzu/configuration/configure_system.cpp12
-rw-r--r--src/yuzu/configuration/configure_touchscreen_advanced.h2
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp461
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.h96
-rw-r--r--src/yuzu/debugger/profiler.cpp1
-rw-r--r--src/yuzu/debugger/profiler.h9
-rw-r--r--src/yuzu/debugger/wait_tree.cpp32
-rw-r--r--src/yuzu/debugger/wait_tree.h13
-rw-r--r--src/yuzu/game_list.cpp18
-rw-r--r--src/yuzu/game_list.h8
-rw-r--r--src/yuzu/game_list_worker.cpp125
-rw-r--r--src/yuzu/game_list_worker.h16
-rw-r--r--src/yuzu/hotkeys.cpp73
-rw-r--r--src/yuzu/hotkeys.h42
-rw-r--r--src/yuzu/hotkeys.ui46
-rw-r--r--src/yuzu/loading_screen.cpp11
-rw-r--r--src/yuzu/loading_screen.ui5
-rw-r--r--src/yuzu/main.cpp211
-rw-r--r--src/yuzu/main.h11
-rw-r--r--src/yuzu/ui_settings.cpp1
-rw-r--r--src/yuzu/ui_settings.h11
-rw-r--r--src/yuzu/util/sequence_dialog/sequence_dialog.cpp37
-rw-r--r--src/yuzu/util/sequence_dialog/sequence_dialog.h24
-rw-r--r--src/yuzu_cmd/config.cpp16
-rw-r--r--src/yuzu_cmd/default_ini.h8
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp14
-rw-r--r--src/yuzu_cmd/yuzu.cpp11
460 files changed, 25584 insertions, 11774 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 986efcfb9..70e1bba67 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,16 +1,27 @@
1<!-- 1<!--
2Please keep in mind yuzu is EXPERIMENTAL SOFTWARE. 2Please keep in mind yuzu is EXPERIMENTAL SOFTWARE.
3 3
4Please read the FAQ: https://yuzu-emu.org/wiki/faq/ 4Please read the FAQ:
5https://yuzu-emu.org/wiki/faq/
5 6
6When submitting an issue, please do the following: 7THIS IS NOT A SUPPORT FORUM, FOR SUPPORT GO TO:
8https://community.citra-emu.org/
7 9
8- Provide the version (commit hash) of yuzu you are using. 10If the FAQ does not answer your question, please go to:
9- Provide sufficient detail for the issue to be reproduced. 11https://community.citra-emu.org/
10- Provide: 12
13When submitting an issue, please check the following:
14
15- You have read the above.
16- You have provided the version (commit hash) of yuzu you are using.
17- You have provided sufficient detail for the issue to be reproduced.
18- You have provided system specs (if relevant).
19- Please also provide:
20 - For any issues, a log file
11 - For crashes, a backtrace. 21 - For crashes, a backtrace.
12 - For graphical issues, comparison screenshots with real hardware. 22 - For graphical issues, comparison screenshots with real hardware.
13 - For emulation inaccuracies, a test-case (if able). 23 - For emulation inaccuracies, a test-case (if able).
24
14--> 25-->
15 26
16 27
diff --git a/.gitmodules b/.gitmodules
index a33a04167..3a49c4874 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,12 @@
37[submodule "discord-rpc"] 37[submodule "discord-rpc"]
38 path = externals/discord-rpc 38 path = externals/discord-rpc
39 url = https://github.com/discordapp/discord-rpc.git 39 url = https://github.com/discordapp/discord-rpc.git
40[submodule "Vulkan-Headers"]
41 path = externals/Vulkan-Headers
42 url = https://github.com/KhronosGroup/Vulkan-Headers.git
43[submodule "externals/zstd"]
44 path = externals/zstd
45 url = https://github.com/facebook/zstd
46[submodule "sirit"]
47 path = externals/sirit
48 url = https://github.com/ReinUsesLisp/sirit
diff --git a/.travis.yml b/.travis.yml
index b0fbe3c5f..9512f7843 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
24 - os: osx 24 - os: osx
25 env: NAME="macos build" 25 env: NAME="macos build"
26 sudo: false 26 sudo: false
27 osx_image: xcode10 27 osx_image: xcode10.1
28 install: "./.travis/macos/deps.sh" 28 install: "./.travis/macos/deps.sh"
29 script: "./.travis/macos/build.sh" 29 script: "./.travis/macos/build.sh"
30 after_success: "./.travis/macos/upload.sh" 30 after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/common/travis-ci.env b/.travis/common/travis-ci.env
index ec8e2dd63..cffeb2e2b 100644
--- a/.travis/common/travis-ci.env
+++ b/.travis/common/travis-ci.env
@@ -6,6 +6,8 @@ TRAVIS_BRANCH
6TRAVIS_BUILD_ID 6TRAVIS_BUILD_ID
7TRAVIS_BUILD_NUMBER 7TRAVIS_BUILD_NUMBER
8TRAVIS_COMMIT 8TRAVIS_COMMIT
9TRAVIS_COMMIT_RANGE
10TRAVIS_EVENT_TYPE
9TRAVIS_JOB_ID 11TRAVIS_JOB_ID
10TRAVIS_JOB_NUMBER 12TRAVIS_JOB_NUMBER
11TRAVIS_REPO_SLUG 13TRAVIS_REPO_SLUG
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index 4a14837fc..b7b4c6f8c 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@
2 2
3set -o pipefail 3set -o pipefail
4 4
5export MACOSX_DEPLOYMENT_TARGET=10.13 5export MACOSX_DEPLOYMENT_TARGET=10.14
6export Qt5_DIR=$(brew --prefix)/opt/qt5 6export Qt5_DIR=$(brew --prefix)/opt/qt5
7export UNICORNDIR=$(pwd)/externals/unicorn 7export UNICORNDIR=$(pwd)/externals/unicorn
8export PATH="/usr/local/opt/ccache/libexec:$PATH" 8export PATH="/usr/local/opt/ccache/libexec:$PATH"
diff --git a/.travis/macos/deps.sh b/.travis/macos/deps.sh
index 1a547c060..faeafa216 100755
--- a/.travis/macos/deps.sh
+++ b/.travis/macos/deps.sh
@@ -1,5 +1,6 @@
1#!/bin/sh -ex 1#!/bin/sh -ex
2 2
3brew update 3brew update
4brew install dylibbundler p7zip qt5 sdl2 ccache 4brew install p7zip qt5 sdl2 ccache
5brew outdated cmake || brew upgrade cmake 5brew outdated cmake || brew upgrade cmake
6pip3 install macpack
diff --git a/.travis/macos/upload.sh b/.travis/macos/upload.sh
index 9ba95086b..66e3455ff 100755
--- a/.travis/macos/upload.sh
+++ b/.travis/macos/upload.sh
@@ -11,92 +11,19 @@ mkdir "$REV_NAME"
11cp build/bin/yuzu-cmd "$REV_NAME" 11cp build/bin/yuzu-cmd "$REV_NAME"
12cp -r build/bin/yuzu.app "$REV_NAME" 12cp -r build/bin/yuzu.app "$REV_NAME"
13 13
14# move qt libs into app bundle for deployment 14# move libs into folder for deployment
15$(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app" 15macpack "${REV_NAME}/yuzu.app/Contents/MacOS/yuzu" -d "../Frameworks"
16# move qt frameworks into app bundle for deployment
17$(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app" -executable="${REV_NAME}/yuzu.app/Contents/MacOS/yuzu"
16 18
17# move SDL2 libs into folder for deployment 19# move libs into folder for deployment
18dylibbundler -b -x "${REV_NAME}/yuzu-cmd" -cd -d "${REV_NAME}/libs" -p "@executable_path/libs/" 20macpack "${REV_NAME}/yuzu-cmd" -d "libs"
19
20# Make the changes to make the yuzu app standalone (i.e. not dependent on the current brew installation).
21# To do this, the absolute references to each and every QT framework must be re-written to point to the local frameworks
22# (in the Contents/Frameworks folder).
23# The "install_name_tool" is used to do so.
24
25# Coreutils is a hack to coerce Homebrew to point to the absolute Cellar path (symlink dereferenced). i.e:
26# ls -l /usr/local/opt/qt5:: /usr/local/opt/qt5 -> ../Cellar/qt5/5.6.1-1
27# grealpath ../Cellar/qt5/5.6.1-1:: /usr/local/Cellar/qt5/5.6.1-1
28brew install coreutils || brew upgrade coreutils || true
29
30REV_NAME_ALT=$REV_NAME/
31# grealpath is located in coreutils, there is no "realpath" for OS X :(
32QT_BREWS_PATH=$(grealpath "$(brew --prefix qt5)")
33BREW_PATH=$(brew --prefix)
34QT_VERSION_NUM=5
35
36$BREW_PATH/opt/qt5/bin/macdeployqt "${REV_NAME_ALT}yuzu.app" \
37 -executable="${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu"
38
39# These are the files that macdeployqt packed into Contents/Frameworks/ - we don't want those, so we replace them.
40declare -a macos_libs=("QtCore" "QtWidgets" "QtGui" "QtOpenGL" "QtPrintSupport")
41
42for macos_lib in "${macos_libs[@]}"
43do
44 SC_FRAMEWORK_PART=$macos_lib.framework/Versions/$QT_VERSION_NUM/$macos_lib
45 # Replace macdeployqt versions of the Frameworks with our own (from /usr/local/opt/qt5/lib/)
46 cp "$BREW_PATH/opt/qt5/lib/$SC_FRAMEWORK_PART" "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
47
48 # Replace references within the embedded Framework files with "internal" versions.
49 for macos_lib2 in "${macos_libs[@]}"
50 do
51 # Since brew references both the non-symlinked and symlink paths of QT5, it needs to be duplicated.
52 # /usr/local/Cellar/qt5/5.6.1-1/lib and /usr/local/opt/qt5/lib both resolve to the same files.
53 # So the two lines below are effectively duplicates when resolved as a path, but as strings, they aren't.
54 RM_FRAMEWORK_PART=$macos_lib2.framework/Versions/$QT_VERSION_NUM/$macos_lib2
55 install_name_tool -change \
56 $QT_BREWS_PATH/lib/$RM_FRAMEWORK_PART \
57 @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
58 "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
59 install_name_tool -change \
60 "$BREW_PATH/opt/qt5/lib/$RM_FRAMEWORK_PART" \
61 @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
62 "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
63 done
64done
65
66# Handles `This application failed to start because it could not find or load the Qt platform plugin "cocoa"`
67# Which manifests itself as:
68# "Exception Type: EXC_CRASH (SIGABRT) | Exception Codes: 0x0000000000000000, 0x0000000000000000 | Exception Note: EXC_CORPSE_NOTIFY"
69# There may be more dylibs needed to be fixed...
70declare -a macos_plugins=("Plugins/platforms/libqcocoa.dylib")
71
72for macos_lib in "${macos_plugins[@]}"
73do
74 install_name_tool -id @executable_path/../$macos_lib "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
75 for macos_lib2 in "${macos_libs[@]}"
76 do
77 RM_FRAMEWORK_PART=$macos_lib2.framework/Versions/$QT_VERSION_NUM/$macos_lib2
78 install_name_tool -change \
79 $QT_BREWS_PATH/lib/$RM_FRAMEWORK_PART \
80 @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
81 "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
82 install_name_tool -change \
83 "$BREW_PATH/opt/qt5/lib/$RM_FRAMEWORK_PART" \
84 @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
85 "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
86 done
87done
88
89for macos_lib in "${macos_libs[@]}"
90do
91 # Debugging info for Travis-CI
92 otool -L "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$macos_lib.framework/Versions/$QT_VERSION_NUM/$macos_lib"
93done
94 21
95# Make the yuzu.app application launch a debugging terminal. 22# Make the yuzu.app application launch a debugging terminal.
96# Store away the actual binary 23# Store away the actual binary
97mv ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu-bin 24mv ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu-bin
98 25
99cat > ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu <<EOL 26cat > ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu <<EOL
100#!/usr/bin/env bash 27#!/usr/bin/env bash
101cd "\`dirname "\$0"\`" 28cd "\`dirname "\$0"\`"
102chmod +x yuzu-bin 29chmod +x yuzu-bin
@@ -105,6 +32,9 @@ EOL
105# Content that will serve as the launching script for yuzu (within the .app folder) 32# Content that will serve as the launching script for yuzu (within the .app folder)
106 33
107# Make the launching script executable 34# Make the launching script executable
108chmod +x ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu 35chmod +x ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu
36
37# Verify loader instructions
38find "$REV_NAME" -exec otool -L {} \;
109 39
110. .travis/common/post-upload.sh 40. .travis/common/post-upload.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 871e0ca1a..6a417017c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF
23 23
24option(ENABLE_CUBEB "Enables the cubeb audio backend" ON) 24option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
25 25
26option(ENABLE_VULKAN "Enables Vulkan backend" ON)
27
26option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF) 28option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
27 29
28if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit) 30if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
@@ -102,90 +104,18 @@ endif()
102message(STATUS "Target architecture: ${ARCHITECTURE}") 104message(STATUS "Target architecture: ${ARCHITECTURE}")
103 105
104 106
105# Configure compilation flags 107# Configure C++ standard
106# =========================== 108# ===========================
107 109
108set(CMAKE_CXX_STANDARD 17) 110set(CMAKE_CXX_STANDARD 17)
109set(CMAKE_CXX_STANDARD_REQUIRED ON) 111set(CMAKE_CXX_STANDARD_REQUIRED ON)
110 112
111if (NOT MSVC)
112 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
113 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
114
115 if (MINGW)
116 add_definitions(-DMINGW_HAS_SECURE_API)
117
118 if (MINGW_STATIC_BUILD)
119 add_definitions(-DQT_STATICPLUGIN)
120 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static")
121 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
122 endif()
123 endif()
124else()
125 # Silence "deprecation" warnings
126 add_definitions(/D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_DEPRECATE /D_SCL_SECURE_NO_WARNINGS)
127 # Avoid windows.h junk
128 add_definitions(/DNOMINMAX)
129 # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
130 add_definitions(/DWIN32_LEAN_AND_MEAN)
131
132 set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
133
134 # Tweak optimization settings
135 # As far as I can tell, there's no way to override the CMake defaults while leaving user
136 # changes intact, so we'll just clobber everything and say sorry.
137 message(STATUS "Cache compiler flags ignored, please edit CMakeLists.txt to change the flags.")
138
139 # /W3 - Level 3 warnings
140 # /MP - Multi-threaded compilation
141 # /Zi - Output debugging information
142 # /Zo - enhanced debug info for optimized builds
143 # /permissive- - enables stricter C++ standards conformance checks
144 set(CMAKE_C_FLAGS "/W3 /MP /Zi /Zo /permissive-" CACHE STRING "" FORCE)
145 # /EHsc - C++-only exception handling semantics
146 # /Zc:throwingNew - let codegen assume `operator new` will never return null
147 # /Zc:inline - let codegen omit inline functions in object files
148 set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /EHsc /std:c++latest /Zc:throwingNew,inline" CACHE STRING "" FORCE)
149
150 # /MDd - Multi-threaded Debug Runtime DLL
151 set(CMAKE_C_FLAGS_DEBUG "/Od /MDd" CACHE STRING "" FORCE)
152 set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" CACHE STRING "" FORCE)
153
154 # /O2 - Optimization level 2
155 # /GS- - No stack buffer overflow checks
156 # /MD - Multi-threaded runtime DLL
157 set(CMAKE_C_FLAGS_RELEASE "/O2 /GS- /MD" CACHE STRING "" FORCE)
158 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}" CACHE STRING "" FORCE)
159
160 set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
161 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
162endif()
163
164# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
165# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
166if (CMAKE_COMPILER_IS_GNUCC)
167 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
168endif()
169
170# Set file offset size to 64 bits.
171#
172# On modern Unixes, this is typically already the case. The lone exception is
173# glibc, which may default to 32 bits. glibc allows this to be configured
174# by setting _FILE_OFFSET_BITS.
175if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
176 add_definitions(-D_FILE_OFFSET_BITS=64)
177endif()
178
179# CMake seems to only define _DEBUG on Windows
180set_property(DIRECTORY APPEND PROPERTY
181 COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
182
183# System imported libraries 113# System imported libraries
184# ====================== 114# ======================
185 115
186find_package(Boost 1.63.0 QUIET) 116find_package(Boost 1.66.0 QUIET)
187if (NOT Boost_FOUND) 117if (NOT Boost_FOUND)
188 message(STATUS "Boost 1.63.0 or newer not found, falling back to externals") 118 message(STATUS "Boost 1.66.0 or newer not found, falling back to externals")
189 119
190 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost") 120 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
191 set(Boost_NO_SYSTEM_PATHS OFF) 121 set(Boost_NO_SYSTEM_PATHS OFF)
@@ -330,25 +260,21 @@ endif()
330# Platform-specific library requirements 260# Platform-specific library requirements
331# ====================================== 261# ======================================
332 262
333IF (APPLE) 263if (APPLE)
334 find_library(COCOA_LIBRARY Cocoa) # Umbrella framework for everything GUI-related 264 # Umbrella framework for everything GUI-related
265 find_library(COCOA_LIBRARY Cocoa)
335 set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY}) 266 set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
336 267elseif (WIN32)
337 if (CMAKE_CXX_COMPILER_ID STREQUAL Clang)
338 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
339 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
340 endif()
341ELSEIF (WIN32)
342 # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista) 268 # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
343 add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600) 269 add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
344 set(PLATFORM_LIBRARIES winmm ws2_32) 270 set(PLATFORM_LIBRARIES winmm ws2_32)
345 IF (MINGW) 271 if (MINGW)
346 # PSAPI is the Process Status API 272 # PSAPI is the Process Status API
347 set(PLATFORM_LIBRARIES ${PLATFORM_LIBRARIES} psapi imm32 version) 273 set(PLATFORM_LIBRARIES ${PLATFORM_LIBRARIES} psapi imm32 version)
348 ENDIF (MINGW) 274 endif()
349ELSEIF (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$") 275elseif (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$")
350 set(PLATFORM_LIBRARIES rt) 276 set(PLATFORM_LIBRARIES rt)
351ENDIF (APPLE) 277endif()
352 278
353# Setup a custom clang-format target (if clang-format can be found) that will run 279# Setup a custom clang-format target (if clang-format can be found) that will run
354# against all the src files. This should be used before making a pull request. 280# against all the src files. This should be used before making a pull request.
@@ -383,7 +309,7 @@ if (CLANG_FORMAT)
383 set(CCOMMENT "Running clang format against all the .h and .cpp files in src/") 309 set(CCOMMENT "Running clang format against all the .h and .cpp files in src/")
384 if (WIN32) 310 if (WIN32)
385 add_custom_target(clang-format 311 add_custom_target(clang-format
386 COMMAND powershell.exe -Command "Get-ChildItem ${SRCS}/* -Include *.cpp,*.h -Recurse | Foreach {${CLANG_FORMAT} -i $_.fullname}" 312 COMMAND powershell.exe -Command "Get-ChildItem '${SRCS}/*' -Include *.cpp,*.h -Recurse | Foreach {&'${CLANG_FORMAT}' -i $_.fullname}"
387 COMMENT ${CCOMMENT}) 313 COMMENT ${CCOMMENT})
388 elseif(MINGW) 314 elseif(MINGW)
389 add_custom_target(clang-format 315 add_custom_target(clang-format
@@ -419,19 +345,6 @@ function(create_target_directory_groups target_name)
419 endforeach() 345 endforeach()
420endfunction() 346endfunction()
421 347
422# Gets a UTC timstamp and sets the provided variable to it
423function(get_timestamp _var)
424 string(TIMESTAMP timestamp UTC)
425 set(${_var} "${timestamp}" PARENT_SCOPE)
426endfunction()
427
428# generate git/build information
429include(GetGitRevisionDescription)
430get_git_head_revision(GIT_REF_SPEC GIT_REV)
431git_describe(GIT_DESC --always --long --dirty)
432git_branch_name(GIT_BRANCH)
433get_timestamp(BUILD_DATE)
434
435enable_testing() 348enable_testing()
436add_subdirectory(externals) 349add_subdirectory(externals)
437add_subdirectory(src) 350add_subdirectory(src)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
new file mode 100644
index 000000000..08315a1f1
--- /dev/null
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -0,0 +1,95 @@
1# Gets a UTC timstamp and sets the provided variable to it
2function(get_timestamp _var)
3 string(TIMESTAMP timestamp UTC)
4 set(${_var} "${timestamp}" PARENT_SCOPE)
5endfunction()
6
7list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
8# generate git/build information
9include(GetGitRevisionDescription)
10get_git_head_revision(GIT_REF_SPEC GIT_REV)
11git_describe(GIT_DESC --always --long --dirty)
12git_branch_name(GIT_BRANCH)
13get_timestamp(BUILD_DATE)
14
15# Generate cpp with Git revision from template
16# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
17set(REPO_NAME "")
18set(BUILD_VERSION "0")
19if (BUILD_REPOSITORY)
20 # regex capture the string nightly or canary into CMAKE_MATCH_1
21 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
22 if (${CMAKE_MATCH_COUNT} GREATER 0)
23 # capitalize the first letter of each word in the repo name.
24 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
25 foreach(WORD ${REPO_NAME_LIST})
26 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
27 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
28 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
29 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
30 endforeach()
31 if (BUILD_TAG)
32 string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
33 if (${CMAKE_MATCH_COUNT} GREATER 0)
34 set(BUILD_VERSION ${CMAKE_MATCH_1})
35 endif()
36 if (BUILD_VERSION)
37 # This leaves a trailing space on the last word, but we actually want that
38 # because of how it's styled in the title bar.
39 set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
40 else()
41 set(BUILD_FULLNAME "")
42 endif()
43 endif()
44 endif()
45endif()
46
47# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
48set(VIDEO_CORE "${SRC_DIR}/src/video_core")
49set(HASH_FILES
50 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
51 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
52 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
53 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
54 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
57 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
58 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
59 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
60 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
61 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
62 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
63 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
64 "${VIDEO_CORE}/shader/decode/bfe.cpp"
65 "${VIDEO_CORE}/shader/decode/bfi.cpp"
66 "${VIDEO_CORE}/shader/decode/conversion.cpp"
67 "${VIDEO_CORE}/shader/decode/ffma.cpp"
68 "${VIDEO_CORE}/shader/decode/float_set.cpp"
69 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
70 "${VIDEO_CORE}/shader/decode/half_set.cpp"
71 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
72 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp"
76 "${VIDEO_CORE}/shader/decode/texture.cpp"
77 "${VIDEO_CORE}/shader/decode/other.cpp"
78 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
79 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
80 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
81 "${VIDEO_CORE}/shader/decode/shift.cpp"
82 "${VIDEO_CORE}/shader/decode/video.cpp"
83 "${VIDEO_CORE}/shader/decode/xmad.cpp"
84 "${VIDEO_CORE}/shader/decode.cpp"
85 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp"
88)
89set(COMBINED "")
90foreach (F IN LISTS HASH_FILES)
91 file(READ ${F} TMP)
92 set(COMBINED "${COMBINED}${TMP}")
93endforeach()
94string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
95configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
diff --git a/README.md b/README.md
index 1d5ee58cc..fa4233b2a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
7 7
8It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes. 8It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
9 9
10yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics. 10yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
11 11
12yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included. 12yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.
13 13
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index e156bbece..3f8b6cda8 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -49,6 +49,10 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
49add_library(unicorn-headers INTERFACE) 49add_library(unicorn-headers INTERFACE)
50target_include_directories(unicorn-headers INTERFACE ./unicorn/include) 50target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
51 51
52# Zstandard
53add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL)
54target_include_directories(libzstd_static INTERFACE ./zstd/lib)
55
52# SoundTouch 56# SoundTouch
53add_subdirectory(soundtouch) 57add_subdirectory(soundtouch)
54 58
@@ -68,6 +72,11 @@ if (USE_DISCORD_PRESENCE)
68 target_include_directories(discord-rpc INTERFACE ./discord-rpc/include) 72 target_include_directories(discord-rpc INTERFACE ./discord-rpc/include)
69endif() 73endif()
70 74
75# Sirit
76if (ENABLE_VULKAN)
77 add_subdirectory(sirit)
78endif()
79
71if (ENABLE_WEB_SERVICE) 80if (ENABLE_WEB_SERVICE)
72 # LibreSSL 81 # LibreSSL
73 set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "") 82 set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
new file mode 160000
Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202
diff --git a/externals/cubeb b/externals/cubeb
Subproject 12b78c0edfa40007e41dbdcd9dfe367fbb98d01 Subproject 6f2420de8f155b10330cf973900ac7bdbfee589
diff --git a/externals/opus b/externals/opus
Subproject b2871922a12abb49579512d604cabc471a59ad9 Subproject 562f8ba555c4181e1b57e82e496e4a959b9c019
diff --git a/externals/sirit b/externals/sirit
new file mode 160000
Subproject f7c4b07a7e14edb1dcd93bc9879c823423705c2
diff --git a/externals/zstd b/externals/zstd
new file mode 160000
Subproject 470344d33e1d52a2ada75d278466da8d4ee2faf
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f69d00a2b..6c99dd5e2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,18 +1,79 @@
1# Enable modules to include each other's files 1# Enable modules to include each other's files
2include_directories(.) 2include_directories(.)
3 3
4# CMake seems to only define _DEBUG on Windows
5set_property(DIRECTORY APPEND PROPERTY
6 COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
7
8# Set compilation flags
9if (MSVC)
10 set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
11
12 # Silence "deprecation" warnings
13 add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
14
15 # Avoid windows.h junk
16 add_definitions(-DNOMINMAX)
17
18 # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
19 add_definitions(-DWIN32_LEAN_AND_MEAN)
20
21 # /W3 - Level 3 warnings
22 # /MP - Multi-threaded compilation
23 # /Zi - Output debugging information
24 # /Zo - enhanced debug info for optimized builds
25 # /permissive- - enables stricter C++ standards conformance checks
26 # /EHsc - C++-only exception handling semantics
27 # /Zc:throwingNew - let codegen assume `operator new` will never return null
28 # /Zc:inline - let codegen omit inline functions in object files
29 add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
30
31 # /GS- - No stack buffer overflow checks
32 add_compile_options("$<$<CONFIG:Release>:/GS->")
33
34 set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
35 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
36else()
37 add_compile_options("-Wno-attributes")
38
39 if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
40 add_compile_options("-stdlib=libc++")
41 endif()
42
43 # Set file offset size to 64 bits.
44 #
45 # On modern Unixes, this is typically already the case. The lone exception is
46 # glibc, which may default to 32 bits. glibc allows this to be configured
47 # by setting _FILE_OFFSET_BITS.
48 if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
49 add_definitions(-D_FILE_OFFSET_BITS=64)
50 endif()
51
52 if (MINGW)
53 add_definitions(-DMINGW_HAS_SECURE_API)
54
55 if (MINGW_STATIC_BUILD)
56 add_definitions(-DQT_STATICPLUGIN)
57 add_compile_options("-static")
58 endif()
59 endif()
60endif()
61
4add_subdirectory(common) 62add_subdirectory(common)
5add_subdirectory(core) 63add_subdirectory(core)
6add_subdirectory(audio_core) 64add_subdirectory(audio_core)
7add_subdirectory(video_core) 65add_subdirectory(video_core)
8add_subdirectory(input_common) 66add_subdirectory(input_common)
9add_subdirectory(tests) 67add_subdirectory(tests)
68
10if (ENABLE_SDL2) 69if (ENABLE_SDL2)
11 add_subdirectory(yuzu_cmd) 70 add_subdirectory(yuzu_cmd)
12endif() 71endif()
72
13if (ENABLE_QT) 73if (ENABLE_QT)
14 add_subdirectory(yuzu) 74 add_subdirectory(yuzu)
15endif() 75endif()
76
16if (ENABLE_WEB_SERVICE) 77if (ENABLE_WEB_SERVICE)
17 add_subdirectory(web_service) 78 add_subdirectory(web_service)
18endif() 79endif()
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 50d2a1ed3..8619a3f03 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
26 return {}; 26 return {};
27} 27}
28 28
29StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, 29StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
30 u32 num_channels, std::string&& name,
30 Stream::ReleaseCallback&& release_callback) { 31 Stream::ReleaseCallback&& release_callback) {
31 if (!sink) { 32 if (!sink) {
32 sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id); 33 sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
33 } 34 }
34 35
35 return std::make_shared<Stream>( 36 return std::make_shared<Stream>(
36 sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback), 37 core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
37 sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name)); 38 sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
38} 39}
39 40
diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h
index df9607ac7..b07588287 100644
--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
13#include "audio_core/stream.h" 13#include "audio_core/stream.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15 15
16namespace Core::Timing {
17class CoreTiming;
18}
19
16namespace AudioCore { 20namespace AudioCore {
17 21
18/** 22/**
@@ -21,8 +25,8 @@ namespace AudioCore {
21class AudioOut { 25class AudioOut {
22public: 26public:
23 /// Opens a new audio stream 27 /// Opens a new audio stream
24 StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, 28 StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
25 Stream::ReleaseCallback&& release_callback); 29 std::string&& name, Stream::ReleaseCallback&& release_callback);
26 30
27 /// Returns a vector of recently released buffers specified by tag for the specified stream 31 /// Returns a vector of recently released buffers specified by tag for the specified stream
28 std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count); 32 std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 00c026511..9a0939883 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/codec.h" 8#include "audio_core/codec.h"
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "core/core.h"
11#include "core/hle/kernel/writable_event.h" 12#include "core/hle/kernel/writable_event.h"
12#include "core/memory.h" 13#include "core/memory.h"
13 14
@@ -71,14 +72,14 @@ private:
71 EffectOutStatus out_status{}; 72 EffectOutStatus out_status{};
72 EffectInStatus info{}; 73 EffectInStatus info{};
73}; 74};
74AudioRenderer::AudioRenderer(AudioRendererParameter params, 75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
75 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) 76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
76 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 77 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
77 effects(params.effect_count) { 78 effects(params.effect_count) {
78 79
79 audio_out = std::make_unique<AudioCore::AudioOut>(); 80 audio_out = std::make_unique<AudioCore::AudioOut>();
80 stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer", 81 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
81 [=]() { buffer_event->Signal(); }); 82 "AudioRenderer", [=]() { buffer_event->Signal(); });
82 audio_out->StartStream(stream); 83 audio_out->StartStream(stream);
83 84
84 QueueMixedBuffer(0); 85 QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 7826881bf..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
14#include "common/swap.h" 14#include "common/swap.h"
15#include "core/hle/kernel/object.h" 15#include "core/hle/kernel/object.h"
16 16
17namespace Core::Timing {
18class CoreTiming;
19}
20
17namespace Kernel { 21namespace Kernel {
18class WritableEvent; 22class WritableEvent;
19} 23}
@@ -42,16 +46,18 @@ struct AudioRendererParameter {
42 u32_le sample_rate; 46 u32_le sample_rate;
43 u32_le sample_count; 47 u32_le sample_count;
44 u32_le mix_buffer_count; 48 u32_le mix_buffer_count;
45 u32_le unknown_c; 49 u32_le submix_count;
46 u32_le voice_count; 50 u32_le voice_count;
47 u32_le sink_count; 51 u32_le sink_count;
48 u32_le effect_count; 52 u32_le effect_count;
49 u32_le unknown_1c; 53 u32_le performance_frame_count;
50 u8 unknown_20; 54 u8 is_voice_drop_enabled;
51 INSERT_PADDING_BYTES(3); 55 u8 unknown_21;
56 u8 unknown_22;
57 u8 execution_mode;
52 u32_le splitter_count; 58 u32_le splitter_count;
53 u32_le unknown_2c; 59 u32_le num_splitter_send_channels;
54 INSERT_PADDING_WORDS(1); 60 u32_le unknown_30;
55 u32_le revision; 61 u32_le revision;
56}; 62};
57static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); 63static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
@@ -208,7 +214,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
208 214
209class AudioRenderer { 215class AudioRenderer {
210public: 216public:
211 AudioRenderer(AudioRendererParameter params, 217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
212 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); 218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
213 ~AudioRenderer(); 219 ~AudioRenderer();
214 220
diff --git a/src/audio_core/buffer.h b/src/audio_core/buffer.h
index a323b23ec..5ee09e9aa 100644
--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {} 21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
22 22
23 /// Returns the raw audio data for the buffer 23 /// Returns the raw audio data for the buffer
24 std::vector<s16>& Samples() { 24 std::vector<s16>& GetSamples() {
25 return samples; 25 return samples;
26 } 26 }
27 27
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index 454de798b..c5a0d98ce 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
68 } 68 }
69 } 69 }
70 70
71 state.yn1 = yn1; 71 state.yn1 = static_cast<s16>(yn1);
72 state.yn2 = yn2; 72 state.yn2 = static_cast<s16>(yn2);
73 73
74 return ret; 74 return ret;
75} 75}
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 097328901..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _WIN32
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
46 } 50 }
47 } 51 }
48 52
49 ~CubebSinkStream() { 53 ~CubebSinkStream() override {
50 if (!ctx) { 54 if (!ctx) {
51 return; 55 return;
52 } 56 }
@@ -75,11 +79,11 @@ public:
75 queue.Push(samples); 79 queue.Push(samples);
76 } 80 }
77 81
78 std::size_t SamplesInQueue(u32 num_channels) const override { 82 std::size_t SamplesInQueue(u32 channel_count) const override {
79 if (!ctx) 83 if (!ctx)
80 return 0; 84 return 0;
81 85
82 return queue.Size() / num_channels; 86 return queue.Size() / channel_count;
83 } 87 }
84 88
85 void Flush() override { 89 void Flush() override {
@@ -98,7 +102,7 @@ private:
98 u32 num_channels{}; 102 u32 num_channels{};
99 103
100 Common::RingBuffer<s16, 0x10000> queue; 104 Common::RingBuffer<s16, 0x10000> queue;
101 std::array<s16, 2> last_frame; 105 std::array<s16, 2> last_frame{};
102 std::atomic<bool> should_flush{}; 106 std::atomic<bool> should_flush{};
103 TimeStretcher time_stretch; 107 TimeStretcher time_stretch;
104 108
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _WIN32
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 874673c4e..22a3f8c84 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,13 +32,13 @@ u32 Stream::GetNumChannels() const {
32 return {}; 32 return {};
33} 33}
34 34
35Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, 35Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
36 SinkStream& sink_stream, std::string&& name_) 36 ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
37 : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)}, 37 : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
38 sink_stream{sink_stream}, name{std::move(name_)} { 38 sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
39 39
40 release_event = CoreTiming::RegisterEvent( 40 release_event = core_timing.RegisterEvent(
41 name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); }); 41 name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
42} 42}
43 43
44void Stream::Play() { 44void Stream::Play() {
@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {
57 57
58s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { 58s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
59 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; 59 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
60 return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate); 60 return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
61} 61}
62 62
63static void VolumeAdjustSamples(std::vector<s16>& samples) { 63static void VolumeAdjustSamples(std::vector<s16>& samples) {
@@ -68,7 +68,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
68 } 68 }
69 69
70 // Implementation of a volume slider with a dynamic range of 60 dB 70 // Implementation of a volume slider with a dynamic range of 60 dB
71 const float volume_scale_factor{std::exp(6.90775f * volume) * 0.001f}; 71 const float volume_scale_factor = volume == 0 ? 0 : std::exp(6.90775f * volume) * 0.001f;
72 for (auto& sample : samples) { 72 for (auto& sample : samples) {
73 sample = static_cast<s16>(sample * volume_scale_factor); 73 sample = static_cast<s16>(sample * volume_scale_factor);
74 } 74 }
@@ -95,11 +95,11 @@ void Stream::PlayNextBuffer() {
95 active_buffer = queued_buffers.front(); 95 active_buffer = queued_buffers.front();
96 queued_buffers.pop(); 96 queued_buffers.pop();
97 97
98 VolumeAdjustSamples(active_buffer->Samples()); 98 VolumeAdjustSamples(active_buffer->GetSamples());
99 99
100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
101 101
102 CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {}); 102 core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
103} 103}
104 104
105void Stream::ReleaseActiveBuffer() { 105void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index aebfeb51d..05071243b 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -13,9 +13,10 @@
13#include "audio_core/buffer.h" 13#include "audio_core/buffer.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15 15
16namespace CoreTiming { 16namespace Core::Timing {
17class CoreTiming;
17struct EventType; 18struct EventType;
18} 19} // namespace Core::Timing
19 20
20namespace AudioCore { 21namespace AudioCore {
21 22
@@ -42,8 +43,8 @@ public:
42 /// Callback function type, used to change guest state on a buffer being released 43 /// Callback function type, used to change guest state on a buffer being released
43 using ReleaseCallback = std::function<void()>; 44 using ReleaseCallback = std::function<void()>;
44 45
45 Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, 46 Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
46 SinkStream& sink_stream, std::string&& name_); 47 ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);
47 48
48 /// Plays the audio stream 49 /// Plays the audio stream
49 void Play(); 50 void Play();
@@ -91,16 +92,17 @@ private:
91 /// Gets the number of core cycles when the specified buffer will be released 92 /// Gets the number of core cycles when the specified buffer will be released
92 s64 GetBufferReleaseCycles(const Buffer& buffer) const; 93 s64 GetBufferReleaseCycles(const Buffer& buffer) const;
93 94
94 u32 sample_rate; ///< Sample rate of the stream 95 u32 sample_rate; ///< Sample rate of the stream
95 Format format; ///< Format of the stream 96 Format format; ///< Format of the stream
96 ReleaseCallback release_callback; ///< Buffer release callback for the stream 97 ReleaseCallback release_callback; ///< Buffer release callback for the stream
97 State state{State::Stopped}; ///< Playback state of the stream 98 State state{State::Stopped}; ///< Playback state of the stream
98 CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream 99 Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
99 BufferPtr active_buffer; ///< Actively playing buffer in the stream 100 BufferPtr active_buffer; ///< Actively playing buffer in the stream
100 std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream 101 std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream
101 std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream 102 std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
102 SinkStream& sink_stream; ///< Output sink for the stream 103 SinkStream& sink_stream; ///< Output sink for the stream
103 std::string name; ///< Name of the stream, must be unique 104 Core::Timing::CoreTiming& core_timing; ///< Core timing instance.
105 std::string name; ///< Name of the stream, must be unique
104}; 106};
105 107
106using StreamPtr = std::shared_ptr<Stream>; 108using StreamPtr = std::shared_ptr<Stream>;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 845626fc5..1e8e1b215 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,42 +1,70 @@
1# Generate cpp with Git revision from template 1# Add a custom command to generate a new shader_cache_version hash when any of the following files change
2# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well 2# NOTE: This is an approximation of what files affect shader generation, its possible something else
3set(REPO_NAME "") 3# could affect the result, but much more unlikely than the following files. Keeping a list of files
4set(BUILD_VERSION "0") 4# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
5if ($ENV{CI}) 5set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
6 if ($ENV{TRAVIS}) 6if (DEFINED ENV{CI})
7 if (DEFINED ENV{TRAVIS})
7 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG}) 8 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
8 set(BUILD_TAG $ENV{TRAVIS_TAG}) 9 set(BUILD_TAG $ENV{TRAVIS_TAG})
9 elseif($ENV{APPVEYOR}) 10 elseif(DEFINED ENV{APPVEYOR})
10 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME}) 11 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
11 set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME}) 12 set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME})
12 endif() 13 endif()
13 # regex capture the string nightly or canary into CMAKE_MATCH_1
14 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
15 if (${CMAKE_MATCH_COUNT} GREATER 0)
16 # capitalize the first letter of each word in the repo name.
17 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
18 foreach(WORD ${REPO_NAME_LIST})
19 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
20 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
21 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
22 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
23 endforeach()
24 if (BUILD_TAG)
25 string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
26 if (${CMAKE_MATCH_COUNT} GREATER 0)
27 set(BUILD_VERSION ${CMAKE_MATCH_1})
28 endif()
29 if (BUILD_VERSION)
30 # This leaves a trailing space on the last word, but we actually want that
31 # because of how it's styled in the title bar.
32 set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
33 else()
34 set(BUILD_FULLNAME "")
35 endif()
36 endif()
37 endif()
38endif() 14endif()
39configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) 15add_custom_command(OUTPUT scm_rev.cpp
16 COMMAND ${CMAKE_COMMAND}
17 -DSRC_DIR="${CMAKE_SOURCE_DIR}"
18 -DBUILD_REPOSITORY="${BUILD_REPOSITORY}"
19 -DBUILD_TAG="${BUILD_TAG}"
20 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
21 DEPENDS
22 # WARNING! It was too much work to try and make a common location for this list,
23 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
24 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
25 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
26 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
27 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
28 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
29 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
30 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
31 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
32 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
33 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
34 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
35 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
36 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
37 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
38 "${VIDEO_CORE}/shader/decode/bfe.cpp"
39 "${VIDEO_CORE}/shader/decode/bfi.cpp"
40 "${VIDEO_CORE}/shader/decode/conversion.cpp"
41 "${VIDEO_CORE}/shader/decode/ffma.cpp"
42 "${VIDEO_CORE}/shader/decode/float_set.cpp"
43 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
44 "${VIDEO_CORE}/shader/decode/half_set.cpp"
45 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
46 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
51 "${VIDEO_CORE}/shader/decode/other.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
54 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
55 "${VIDEO_CORE}/shader/decode/shift.cpp"
56 "${VIDEO_CORE}/shader/decode/video.cpp"
57 "${VIDEO_CORE}/shader/decode/xmad.cpp"
58 "${VIDEO_CORE}/shader/decode.cpp"
59 "${VIDEO_CORE}/shader/shader_ir.cpp"
60 "${VIDEO_CORE}/shader/shader_ir.h"
61 "${VIDEO_CORE}/shader/track.cpp"
62 # and also check that the scm_rev files haven't changed
63 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
64 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
65 # technically we should regenerate if the git version changed, but its not worth the effort imo
66 "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
67)
40 68
41add_library(common STATIC 69add_library(common STATIC
42 alignment.h 70 alignment.h
@@ -63,11 +91,18 @@ add_library(common STATIC
63 logging/log.h 91 logging/log.h
64 logging/text_formatter.cpp 92 logging/text_formatter.cpp
65 logging/text_formatter.h 93 logging/text_formatter.h
94 lz4_compression.cpp
95 lz4_compression.h
66 math_util.h 96 math_util.h
97 memory_hook.cpp
98 memory_hook.h
67 microprofile.cpp 99 microprofile.cpp
68 microprofile.h 100 microprofile.h
69 microprofileui.h 101 microprofileui.h
70 misc.cpp 102 misc.cpp
103 multi_level_queue.h
104 page_table.cpp
105 page_table.h
71 param_package.cpp 106 param_package.cpp
72 param_package.h 107 param_package.h
73 quaternion.h 108 quaternion.h
@@ -86,8 +121,12 @@ add_library(common STATIC
86 threadsafe_queue.h 121 threadsafe_queue.h
87 timer.cpp 122 timer.cpp
88 timer.h 123 timer.h
124 uint128.cpp
125 uint128.h
89 vector_math.h 126 vector_math.h
90 web_result.h 127 web_result.h
128 zstd_compression.cpp
129 zstd_compression.h
91) 130)
92 131
93if(ARCHITECTURE_x86_64) 132if(ARCHITECTURE_x86_64)
@@ -101,3 +140,4 @@ endif()
101create_target_directory_groups(common) 140create_target_directory_groups(common)
102 141
103target_link_libraries(common PUBLIC Boost::boost fmt microprofile) 142target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
143target_link_libraries(common PRIVATE lz4_static libzstd_static)
diff --git a/src/common/assert.h b/src/common/assert.h
index 6002f7ab1..4b0e3f64e 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -57,3 +57,21 @@ __declspec(noinline, noreturn)
57 57
58#define UNIMPLEMENTED_IF(cond) ASSERT_MSG(!(cond), "Unimplemented code!") 58#define UNIMPLEMENTED_IF(cond) ASSERT_MSG(!(cond), "Unimplemented code!")
59#define UNIMPLEMENTED_IF_MSG(cond, ...) ASSERT_MSG(!(cond), __VA_ARGS__) 59#define UNIMPLEMENTED_IF_MSG(cond, ...) ASSERT_MSG(!(cond), __VA_ARGS__)
60
61// If the assert is ignored, execute _b_
62#define ASSERT_OR_EXECUTE(_a_, _b_) \
63 do { \
64 ASSERT(_a_); \
65 if (!(_a_)) { \
66 _b_ \
67 } \
68 } while (0)
69
70// If the assert is ignored, execute _b_
71#define ASSERT_OR_EXECUTE_MSG(_a_, _b_, ...) \
72 do { \
73 ASSERT_MSG(_a_, __VA_ARGS__); \
74 if (!(_a_)) { \
75 _b_ \
76 } \
77 } while (0)
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..902e668e3 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -34,6 +34,7 @@
34#include <limits> 34#include <limits>
35#include <type_traits> 35#include <type_traits>
36#include "common/common_funcs.h" 36#include "common/common_funcs.h"
37#include "common/swap.h"
37 38
38/* 39/*
39 * Abstract bitfield class 40 * Abstract bitfield class
@@ -108,15 +109,9 @@
108 * symptoms. 109 * symptoms.
109 */ 110 */
110#pragma pack(1) 111#pragma pack(1)
111template <std::size_t Position, std::size_t Bits, typename T> 112template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
112struct BitField { 113struct BitField {
113private: 114private:
114 // We hide the copy assigment operator here, because the default copy
115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable.
118 constexpr BitField& operator=(const BitField&) = default;
119
120 // UnderlyingType is T for non-enum types and the underlying type of T if 115 // UnderlyingType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 116 // T is an enumeration. Note that T is wrapped within an enable_if in the
122 // former case to workaround compile errors which arise when using 117 // former case to workaround compile errors which arise when using
@@ -127,6 +122,8 @@ private:
127 // We store the value as the unsigned type to avoid undefined behaviour on value shifting 122 // We store the value as the unsigned type to avoid undefined behaviour on value shifting
128 using StorageType = std::make_unsigned_t<UnderlyingType>; 123 using StorageType = std::make_unsigned_t<UnderlyingType>;
129 124
125 using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
126
130public: 127public:
131 /// Constants to allow limited introspection of fields if needed 128 /// Constants to allow limited introspection of fields if needed
132 static constexpr std::size_t position = Position; 129 static constexpr std::size_t position = Position;
@@ -163,16 +160,20 @@ public:
163 BitField(T val) = delete; 160 BitField(T val) = delete;
164 BitField& operator=(T val) = delete; 161 BitField& operator=(T val) = delete;
165 162
166 // Force default constructor to be created 163 constexpr BitField() noexcept = default;
167 // so that we can use this within unions 164
168 constexpr BitField() = default; 165 constexpr BitField(const BitField&) noexcept = default;
166 constexpr BitField& operator=(const BitField&) noexcept = default;
167
168 constexpr BitField(BitField&&) noexcept = default;
169 constexpr BitField& operator=(BitField&&) noexcept = default;
169 170
170 constexpr FORCE_INLINE operator T() const { 171 constexpr FORCE_INLINE operator T() const {
171 return Value(); 172 return Value();
172 } 173 }
173 174
174 constexpr FORCE_INLINE void Assign(const T& value) { 175 constexpr FORCE_INLINE void Assign(const T& value) {
175 storage = (storage & ~mask) | FormatValue(value); 176 storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
176 } 177 }
177 178
178 constexpr T Value() const { 179 constexpr T Value() const {
@@ -184,7 +185,7 @@ public:
184 } 185 }
185 186
186private: 187private:
187 StorageType storage; 188 StorageTypeWithEndian storage;
188 189
189 static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range"); 190 static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
190 191
@@ -195,3 +196,6 @@ private:
195 static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField"); 196 static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
196}; 197};
197#pragma pack() 198#pragma pack()
199
200template <std::size_t Position, std::size_t Bits, typename T>
201using BitFieldBE = BitField<Position, Bits, T, BETag>;
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 1eea17ba1..d032df413 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -32,7 +32,7 @@ inline u32 CountLeadingZeroes32(u32 value) {
32 return 32; 32 return 32;
33} 33}
34 34
35inline u64 CountLeadingZeroes64(u64 value) { 35inline u32 CountLeadingZeroes64(u64 value) {
36 unsigned long leading_zero = 0; 36 unsigned long leading_zero = 0;
37 37
38 if (_BitScanReverse64(&leading_zero, value) != 0) { 38 if (_BitScanReverse64(&leading_zero, value) != 0) {
@@ -47,15 +47,54 @@ inline u32 CountLeadingZeroes32(u32 value) {
47 return 32; 47 return 32;
48 } 48 }
49 49
50 return __builtin_clz(value); 50 return static_cast<u32>(__builtin_clz(value));
51} 51}
52 52
53inline u64 CountLeadingZeroes64(u64 value) { 53inline u32 CountLeadingZeroes64(u64 value) {
54 if (value == 0) { 54 if (value == 0) {
55 return 64; 55 return 64;
56 } 56 }
57 57
58 return __builtin_clzll(value); 58 return static_cast<u32>(__builtin_clzll(value));
59} 59}
60#endif 60#endif
61
62#ifdef _MSC_VER
63inline u32 CountTrailingZeroes32(u32 value) {
64 unsigned long trailing_zero = 0;
65
66 if (_BitScanForward(&trailing_zero, value) != 0) {
67 return trailing_zero;
68 }
69
70 return 32;
71}
72
73inline u32 CountTrailingZeroes64(u64 value) {
74 unsigned long trailing_zero = 0;
75
76 if (_BitScanForward64(&trailing_zero, value) != 0) {
77 return trailing_zero;
78 }
79
80 return 64;
81}
82#else
83inline u32 CountTrailingZeroes32(u32 value) {
84 if (value == 0) {
85 return 32;
86 }
87
88 return static_cast<u32>(__builtin_ctz(value));
89}
90
91inline u32 CountTrailingZeroes64(u64 value) {
92 if (value == 0) {
93 return 64;
94 }
95
96 return static_cast<u32>(__builtin_ctzll(value));
97}
98#endif
99
61} // namespace Common 100} // namespace Common
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
55/** 55/**
56 * Decode a color stored in RGBA8 format 56 * Decode a color stored in RGBA8 format
57 * @param bytes Pointer to encoded source color 57 * @param bytes Pointer to encoded source color
58 * @return Result color decoded as Math::Vec4<u8> 58 * @return Result color decoded as Common::Vec4<u8>
59 */ 59 */
60inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { 60inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
61 return {bytes[3], bytes[2], bytes[1], bytes[0]}; 61 return {bytes[3], bytes[2], bytes[1], bytes[0]};
62} 62}
63 63
64/** 64/**
65 * Decode a color stored in RGB8 format 65 * Decode a color stored in RGB8 format
66 * @param bytes Pointer to encoded source color 66 * @param bytes Pointer to encoded source color
67 * @return Result color decoded as Math::Vec4<u8> 67 * @return Result color decoded as Common::Vec4<u8>
68 */ 68 */
69inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { 69inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
70 return {bytes[2], bytes[1], bytes[0], 255}; 70 return {bytes[2], bytes[1], bytes[0], 255};
71} 71}
72 72
73/** 73/**
74 * Decode a color stored in RG8 (aka HILO8) format 74 * Decode a color stored in RG8 (aka HILO8) format
75 * @param bytes Pointer to encoded source color 75 * @param bytes Pointer to encoded source color
76 * @return Result color decoded as Math::Vec4<u8> 76 * @return Result color decoded as Common::Vec4<u8>
77 */ 77 */
78inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { 78inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
79 return {bytes[1], bytes[0], 0, 255}; 79 return {bytes[1], bytes[0], 0, 255};
80} 80}
81 81
82/** 82/**
83 * Decode a color stored in RGB565 format 83 * Decode a color stored in RGB565 format
84 * @param bytes Pointer to encoded source color 84 * @param bytes Pointer to encoded source color
85 * @return Result color decoded as Math::Vec4<u8> 85 * @return Result color decoded as Common::Vec4<u8>
86 */ 86 */
87inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { 87inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
88 u16_le pixel; 88 u16_le pixel;
89 std::memcpy(&pixel, bytes, sizeof(pixel)); 89 std::memcpy(&pixel, bytes, sizeof(pixel));
90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), 90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
94/** 94/**
95 * Decode a color stored in RGB5A1 format 95 * Decode a color stored in RGB5A1 format
96 * @param bytes Pointer to encoded source color 96 * @param bytes Pointer to encoded source color
97 * @return Result color decoded as Math::Vec4<u8> 97 * @return Result color decoded as Common::Vec4<u8>
98 */ 98 */
99inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { 99inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
100 u16_le pixel; 100 u16_le pixel;
101 std::memcpy(&pixel, bytes, sizeof(pixel)); 101 std::memcpy(&pixel, bytes, sizeof(pixel));
102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), 102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
106/** 106/**
107 * Decode a color stored in RGBA4 format 107 * Decode a color stored in RGBA4 format
108 * @param bytes Pointer to encoded source color 108 * @param bytes Pointer to encoded source color
109 * @return Result color decoded as Math::Vec4<u8> 109 * @return Result color decoded as Common::Vec4<u8>
110 */ 110 */
111inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { 111inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
112 u16_le pixel; 112 u16_le pixel;
113 std::memcpy(&pixel, bytes, sizeof(pixel)); 113 std::memcpy(&pixel, bytes, sizeof(pixel));
114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), 114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
138/** 138/**
139 * Decode a depth value and a stencil value stored in D24S8 format 139 * Decode a depth value and a stencil value stored in D24S8 format
140 * @param bytes Pointer to encoded source values 140 * @param bytes Pointer to encoded source values
141 * @return Resulting values stored as a Math::Vec2 141 * @return Resulting values stored as a Common::Vec2
142 */ 142 */
143inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { 143inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; 144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
145} 145}
146 146
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
149 * @param color Source color to encode 149 * @param color Source color to encode
150 * @param bytes Destination pointer to store encoded color 150 * @param bytes Destination pointer to store encoded color
151 */ 151 */
152inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { 152inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
153 bytes[3] = color.r(); 153 bytes[3] = color.r();
154 bytes[2] = color.g(); 154 bytes[2] = color.g();
155 bytes[1] = color.b(); 155 bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
161 * @param color Source color to encode 161 * @param color Source color to encode
162 * @param bytes Destination pointer to store encoded color 162 * @param bytes Destination pointer to store encoded color
163 */ 163 */
164inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { 164inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
165 bytes[2] = color.r(); 165 bytes[2] = color.r();
166 bytes[1] = color.g(); 166 bytes[1] = color.g();
167 bytes[0] = color.b(); 167 bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
172 * @param color Source color to encode 172 * @param color Source color to encode
173 * @param bytes Destination pointer to store encoded color 173 * @param bytes Destination pointer to store encoded color
174 */ 174 */
175inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { 175inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
176 bytes[1] = color.r(); 176 bytes[1] = color.r();
177 bytes[0] = color.g(); 177 bytes[0] = color.g();
178} 178}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
181 * @param color Source color to encode 181 * @param color Source color to encode
182 * @param bytes Destination pointer to store encoded color 182 * @param bytes Destination pointer to store encoded color
183 */ 183 */
184inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { 184inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
185 const u16_le data = 185 const u16_le data =
186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); 186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
187 187
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
193 * @param color Source color to encode 193 * @param color Source color to encode
194 * @param bytes Destination pointer to store encoded color 194 * @param bytes Destination pointer to store encoded color
195 */ 195 */
196inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { 196inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | 197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); 198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
199 199
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
205 * @param color Source color to encode 205 * @param color Source color to encode
206 * @param bytes Destination pointer to store encoded color 206 * @param bytes Destination pointer to store encoded color
207 */ 207 */
208inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { 208inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | 209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
211 211
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index 4f88de768..076752d3b 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -35,6 +35,7 @@
35#define KEYS_DIR "keys" 35#define KEYS_DIR "keys"
36#define LOAD_DIR "load" 36#define LOAD_DIR "load"
37#define DUMP_DIR "dump" 37#define DUMP_DIR "dump"
38#define SHADER_DIR "shader"
38#define LOG_DIR "log" 39#define LOG_DIR "log"
39 40
40// Filenames 41// Filenames
diff --git a/src/common/common_types.h b/src/common/common_types.h
index 6b1766dca..4cec89fbd 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
40using f32 = float; ///< 32-bit floating point 40using f32 = float; ///< 32-bit floating point
41using f64 = double; ///< 64-bit floating point 41using f64 = double; ///< 64-bit floating point
42 42
43// TODO: It would be nice to eventually replace these with strong types that prevent accidental 43using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
44// conversion between each other. 44using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
45using VAddr = u64; ///< Represents a pointer in the userspace virtual address space. 45using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
46using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
47 46
48using u128 = std::array<std::uint64_t, 2>; 47using u128 = std::array<std::uint64_t, 2>;
49static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide"); 48static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp
index a347d9e02..f268d6021 100644
--- a/src/common/detached_tasks.cpp
+++ b/src/common/detached_tasks.cpp
@@ -16,22 +16,22 @@ DetachedTasks::DetachedTasks() {
16} 16}
17 17
18void DetachedTasks::WaitForAllTasks() { 18void DetachedTasks::WaitForAllTasks() {
19 std::unique_lock<std::mutex> lock(mutex); 19 std::unique_lock lock{mutex};
20 cv.wait(lock, [this]() { return count == 0; }); 20 cv.wait(lock, [this]() { return count == 0; });
21} 21}
22 22
23DetachedTasks::~DetachedTasks() { 23DetachedTasks::~DetachedTasks() {
24 std::unique_lock<std::mutex> lock(mutex); 24 std::unique_lock lock{mutex};
25 ASSERT(count == 0); 25 ASSERT(count == 0);
26 instance = nullptr; 26 instance = nullptr;
27} 27}
28 28
29void DetachedTasks::AddTask(std::function<void()> task) { 29void DetachedTasks::AddTask(std::function<void()> task) {
30 std::unique_lock<std::mutex> lock(instance->mutex); 30 std::unique_lock lock{instance->mutex};
31 ++instance->count; 31 ++instance->count;
32 std::thread([task{std::move(task)}]() { 32 std::thread([task{std::move(task)}]() {
33 task(); 33 task();
34 std::unique_lock<std::mutex> lock(instance->mutex); 34 std::unique_lock lock{instance->mutex};
35 --instance->count; 35 --instance->count;
36 std::notify_all_at_thread_exit(instance->cv, std::move(lock)); 36 std::notify_all_at_thread_exit(instance->cv, std::move(lock));
37 }) 37 })
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index b52492da6..aecb66c32 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -710,6 +710,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
710 paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP); 710 paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
711 paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP); 711 paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
712 paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP); 712 paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
713 paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
713 paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP); 714 paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
714 paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP); 715 paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
715 // TODO: Put the logs in a better location for each OS 716 // TODO: Put the logs in a better location for each OS
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 571503d2a..38cc7f059 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -31,6 +31,7 @@ enum class UserPath {
31 SDMCDir, 31 SDMCDir,
32 LoadDir, 32 LoadDir,
33 DumpDir, 33 DumpDir,
34 ShaderDir,
34 SysDataDir, 35 SysDataDir,
35 UserDir, 36 UserDir,
36}; 37};
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 12f6d0114..a03179520 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,19 +39,19 @@ public:
39 Impl(Impl const&) = delete; 39 Impl(Impl const&) = delete;
40 const Impl& operator=(Impl const&) = delete; 40 const Impl& operator=(Impl const&) = delete;
41 41
42 void PushEntry(Entry e) { 42 void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
43 std::lock_guard<std::mutex> lock(message_mutex); 43 const char* function, std::string message) {
44 message_queue.Push(std::move(e)); 44 message_queue.Push(
45 message_cv.notify_one(); 45 CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
46 } 46 }
47 47
48 void AddBackend(std::unique_ptr<Backend> backend) { 48 void AddBackend(std::unique_ptr<Backend> backend) {
49 std::lock_guard<std::mutex> lock(writing_mutex); 49 std::lock_guard lock{writing_mutex};
50 backends.push_back(std::move(backend)); 50 backends.push_back(std::move(backend));
51 } 51 }
52 52
53 void RemoveBackend(std::string_view backend_name) { 53 void RemoveBackend(std::string_view backend_name) {
54 std::lock_guard<std::mutex> lock(writing_mutex); 54 std::lock_guard lock{writing_mutex};
55 const auto it = 55 const auto it =
56 std::remove_if(backends.begin(), backends.end(), 56 std::remove_if(backends.begin(), backends.end(),
57 [&backend_name](const auto& i) { return backend_name == i->GetName(); }); 57 [&backend_name](const auto& i) { return backend_name == i->GetName(); });
@@ -80,21 +80,19 @@ private:
80 backend_thread = std::thread([&] { 80 backend_thread = std::thread([&] {
81 Entry entry; 81 Entry entry;
82 auto write_logs = [&](Entry& e) { 82 auto write_logs = [&](Entry& e) {
83 std::lock_guard<std::mutex> lock(writing_mutex); 83 std::lock_guard lock{writing_mutex};
84 for (const auto& backend : backends) { 84 for (const auto& backend : backends) {
85 backend->Write(e); 85 backend->Write(e);
86 } 86 }
87 }; 87 };
88 while (true) { 88 while (true) {
89 { 89 entry = message_queue.PopWait();
90 std::unique_lock<std::mutex> lock(message_mutex); 90 if (entry.final_entry) {
91 message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
92 }
93 if (!running) {
94 break; 91 break;
95 } 92 }
96 write_logs(entry); 93 write_logs(entry);
97 } 94 }
95
98 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case 96 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
99 // where a system is repeatedly spamming logs even on close. 97 // where a system is repeatedly spamming logs even on close.
100 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100; 98 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
106 } 104 }
107 105
108 ~Impl() { 106 ~Impl() {
109 running = false; 107 Entry entry;
110 message_cv.notify_one(); 108 entry.final_entry = true;
109 message_queue.Push(entry);
111 backend_thread.join(); 110 backend_thread.join();
112 } 111 }
113 112
114 std::atomic_bool running{true}; 113 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
115 std::mutex message_mutex, writing_mutex; 114 const char* function, std::string message) const {
116 std::condition_variable message_cv; 115 using std::chrono::duration_cast;
116 using std::chrono::steady_clock;
117
118 Entry entry;
119 entry.timestamp =
120 duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
121 entry.log_class = log_class;
122 entry.log_level = log_level;
123 entry.filename = Common::TrimSourcePath(filename);
124 entry.line_num = line_nr;
125 entry.function = function;
126 entry.message = std::move(message);
127
128 return entry;
129 }
130
131 std::mutex writing_mutex;
117 std::thread backend_thread; 132 std::thread backend_thread;
118 std::vector<std::unique_ptr<Backend>> backends; 133 std::vector<std::unique_ptr<Backend>> backends;
119 Common::MPSCQueue<Log::Entry> message_queue; 134 Common::MPSCQueue<Log::Entry> message_queue;
120 Filter filter; 135 Filter filter;
136 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
121}; 137};
122 138
123void ConsoleBackend::Write(const Entry& entry) { 139void ConsoleBackend::Write(const Entry& entry) {
@@ -232,6 +248,7 @@ void DebuggerBackend::Write(const Entry& entry) {
232 CLS(Render) \ 248 CLS(Render) \
233 SUB(Render, Software) \ 249 SUB(Render, Software) \
234 SUB(Render, OpenGL) \ 250 SUB(Render, OpenGL) \
251 SUB(Render, Vulkan) \
235 CLS(Audio) \ 252 CLS(Audio) \
236 SUB(Audio, DSP) \ 253 SUB(Audio, DSP) \
237 SUB(Audio, Sink) \ 254 SUB(Audio, Sink) \
@@ -275,25 +292,6 @@ const char* GetLevelName(Level log_level) {
275#undef LVL 292#undef LVL
276} 293}
277 294
278Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
279 const char* function, std::string message) {
280 using std::chrono::duration_cast;
281 using std::chrono::steady_clock;
282
283 static steady_clock::time_point time_origin = steady_clock::now();
284
285 Entry entry;
286 entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
287 entry.log_class = log_class;
288 entry.log_level = log_level;
289 entry.filename = Common::TrimSourcePath(filename);
290 entry.line_num = line_nr;
291 entry.function = function;
292 entry.message = std::move(message);
293
294 return entry;
295}
296
297void SetGlobalFilter(const Filter& filter) { 295void SetGlobalFilter(const Filter& filter) {
298 Impl::Instance().SetGlobalFilter(filter); 296 Impl::Instance().SetGlobalFilter(filter);
299} 297}
@@ -318,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
318 if (!filter.CheckMessage(log_class, log_level)) 316 if (!filter.CheckMessage(log_class, log_level))
319 return; 317 return;
320 318
321 Entry entry = 319 instance.PushEntry(log_class, log_level, filename, line_num, function,
322 CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); 320 fmt::vformat(format, args));
323
324 instance.PushEntry(std::move(entry));
325} 321}
326} // namespace Log 322} // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index 91bb0c309..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
27 unsigned int line_num; 27 unsigned int line_num;
28 std::string function; 28 std::string function;
29 std::string message; 29 std::string message;
30 bool final_entry = false;
30 31
31 Entry() = default; 32 Entry() = default;
32 Entry(Entry&& o) = default; 33 Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
134 */ 135 */
135const char* GetLevelName(Level log_level); 136const char* GetLevelName(Level log_level);
136 137
137/// Creates a log entry by formatting the given source location, and message.
138Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
139 const char* function, std::string message);
140
141/** 138/**
142 * The global filter will prevent any messages from even being processed if they are filtered. Each 139 * The global filter will prevent any messages from even being processed if they are filtered. Each
143 * backend can have a filter, but if the level is lower than the global filter, the backend will 140 * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index d4ec31ec3..8ed6d5050 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
112 Render, ///< Emulator video output and hardware acceleration 112 Render, ///< Emulator video output and hardware acceleration
113 Render_Software, ///< Software renderer backend 113 Render_Software, ///< Software renderer backend
114 Render_OpenGL, ///< OpenGL backend 114 Render_OpenGL, ///< OpenGL backend
115 Render_Vulkan, ///< Vulkan backend
115 Audio, ///< Audio emulation 116 Audio, ///< Audio emulation
116 Audio_DSP, ///< The HLE implementation of the DSP 117 Audio_DSP, ///< The HLE implementation of the DSP
117 Audio_Sink, ///< Emulator audio output backend 118 Audio_Sink, ///< Emulator audio output backend
diff --git a/src/common/lz4_compression.cpp b/src/common/lz4_compression.cpp
new file mode 100644
index 000000000..ade6759bb
--- /dev/null
+++ b/src/common/lz4_compression.cpp
@@ -0,0 +1,76 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <lz4hc.h>
7
8#include "common/assert.h"
9#include "common/lz4_compression.h"
10
11namespace Common::Compression {
12
13std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size) {
14 ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
15
16 const auto source_size_int = static_cast<int>(source_size);
17 const int max_compressed_size = LZ4_compressBound(source_size_int);
18 std::vector<u8> compressed(max_compressed_size);
19
20 const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
21 reinterpret_cast<char*>(compressed.data()),
22 source_size_int, max_compressed_size);
23
24 if (compressed_size <= 0) {
25 // Compression failed
26 return {};
27 }
28
29 compressed.resize(compressed_size);
30
31 return compressed;
32}
33
34std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size,
35 s32 compression_level) {
36 ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
37
38 compression_level = std::clamp(compression_level, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
39
40 const auto source_size_int = static_cast<int>(source_size);
41 const int max_compressed_size = LZ4_compressBound(source_size_int);
42 std::vector<u8> compressed(max_compressed_size);
43
44 const int compressed_size = LZ4_compress_HC(
45 reinterpret_cast<const char*>(source), reinterpret_cast<char*>(compressed.data()),
46 source_size_int, max_compressed_size, compression_level);
47
48 if (compressed_size <= 0) {
49 // Compression failed
50 return {};
51 }
52
53 compressed.resize(compressed_size);
54
55 return compressed;
56}
57
58std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size) {
59 return CompressDataLZ4HC(source, source_size, LZ4HC_CLEVEL_MAX);
60}
61
62std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed,
63 std::size_t uncompressed_size) {
64 std::vector<u8> uncompressed(uncompressed_size);
65 const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
66 reinterpret_cast<char*>(uncompressed.data()),
67 static_cast<int>(compressed.size()),
68 static_cast<int>(uncompressed.size()));
69 if (static_cast<int>(uncompressed_size) != size_check) {
70 // Decompression failed
71 return {};
72 }
73 return uncompressed;
74}
75
76} // namespace Common::Compression
diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h
new file mode 100644
index 000000000..fe2231a6c
--- /dev/null
+++ b/src/common/lz4_compression.h
@@ -0,0 +1,55 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_types.h"
8
9namespace Common::Compression {
10
11/**
12 * Compresses a source memory region with LZ4 and returns the compressed data in a vector.
13 *
14 * @param source the uncompressed source memory region.
15 * @param source_size the size in bytes of the uncompressed source memory region.
16 *
17 * @return the compressed data.
18 */
19std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size);
20
21/**
22 * Utilizes the LZ4 subalgorithm LZ4HC with the specified compression level. Higher compression
23 * levels result in a smaller compressed size, but require more CPU time for compression. The
24 * compression level has almost no impact on decompression speed. Data compressed with LZ4HC can
25 * also be decompressed with the default LZ4 decompression.
26 *
27 * @param source the uncompressed source memory region.
28 * @param source_size the size in bytes of the uncompressed source memory region.
29 * @param compression_level the used compression level. Should be between 3 and 12.
30 *
31 * @return the compressed data.
32 */
33std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size, s32 compression_level);
34
35/**
36 * Utilizes the LZ4 subalgorithm LZ4HC with the highest possible compression level.
37 *
38 * @param source the uncompressed source memory region.
39 * @param source_size the size in bytes of the uncompressed source memory region.
40 *
41 * @return the compressed data.
42 */
43std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size);
44
45/**
46 * Decompresses a source memory region with LZ4 and returns the uncompressed data in a vector.
47 *
48 * @param compressed the compressed source memory region.
49 * @param uncompressed_size the size in bytes of the uncompressed data.
50 *
51 * @return the decompressed data.
52 */
53std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed, std::size_t uncompressed_size);
54
55} // namespace Common::Compression \ No newline at end of file
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
7#include <cstdlib> 7#include <cstdlib>
8#include <type_traits> 8#include <type_traits>
9 9
10namespace MathUtil { 10namespace Common {
11 11
12constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
13 13
@@ -41,4 +41,4 @@ struct Rectangle {
41 } 41 }
42}; 42};
43 43
44} // namespace MathUtil 44} // namespace Common
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/memory_hook.h" 5#include "common/memory_hook.h"
6 6
7namespace Memory { 7namespace Common {
8 8
9MemoryHook::~MemoryHook() = default; 9MemoryHook::~MemoryHook() = default;
10 10
11} // namespace Memory 11} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11 11
12namespace Memory { 12namespace Common {
13 13
14/** 14/**
15 * Memory hooks have two purposes: 15 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
44}; 44};
45 45
46using MemoryHookPointer = std::shared_ptr<MemoryHook>; 46using MemoryHookPointer = std::shared_ptr<MemoryHook>;
47} // namespace Memory 47} // namespace Common
diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
new file mode 100644
index 000000000..9cb448f56
--- /dev/null
+++ b/src/common/multi_level_queue.h
@@ -0,0 +1,337 @@
1// Copyright 2019 TuxSH
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <iterator>
9#include <list>
10#include <utility>
11
12#include "common/bit_util.h"
13#include "common/common_types.h"
14
15namespace Common {
16
17/**
18 * A MultiLevelQueue is a type of priority queue which has the following characteristics:
19 * - iteratable through each of its elements.
20 * - back can be obtained.
21 * - O(1) add, lookup (both front and back)
22 * - discrete priorities and a max of 64 priorities (limited domain)
23 * This type of priority queue is normaly used for managing threads within an scheduler
24 */
25template <typename T, std::size_t Depth>
26class MultiLevelQueue {
27public:
28 using value_type = T;
29 using reference = value_type&;
30 using const_reference = const value_type&;
31 using pointer = value_type*;
32 using const_pointer = const value_type*;
33
34 using difference_type = typename std::pointer_traits<pointer>::difference_type;
35 using size_type = std::size_t;
36
37 template <bool is_constant>
38 class iterator_impl {
39 public:
40 using iterator_category = std::bidirectional_iterator_tag;
41 using value_type = T;
42 using pointer = std::conditional_t<is_constant, T*, const T*>;
43 using reference = std::conditional_t<is_constant, const T&, T&>;
44 using difference_type = typename std::pointer_traits<pointer>::difference_type;
45
46 friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
47 if (lhs.IsEnd() && rhs.IsEnd())
48 return true;
49 return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
50 }
51
52 friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
53 return !operator==(lhs, rhs);
54 }
55
56 reference operator*() const {
57 return *it;
58 }
59
60 pointer operator->() const {
61 return it.operator->();
62 }
63
64 iterator_impl& operator++() {
65 if (IsEnd()) {
66 return *this;
67 }
68
69 ++it;
70
71 if (it == GetEndItForPrio()) {
72 u64 prios = mlq.used_priorities;
73 prios &= ~((1ULL << (current_priority + 1)) - 1);
74 if (prios == 0) {
75 current_priority = static_cast<u32>(mlq.depth());
76 } else {
77 current_priority = CountTrailingZeroes64(prios);
78 it = GetBeginItForPrio();
79 }
80 }
81 return *this;
82 }
83
84 iterator_impl& operator--() {
85 if (IsEnd()) {
86 if (mlq.used_priorities != 0) {
87 current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
88 it = GetEndItForPrio();
89 --it;
90 }
91 } else if (it == GetBeginItForPrio()) {
92 u64 prios = mlq.used_priorities;
93 prios &= (1ULL << current_priority) - 1;
94 if (prios != 0) {
95 current_priority = CountTrailingZeroes64(prios);
96 it = GetEndItForPrio();
97 --it;
98 }
99 } else {
100 --it;
101 }
102 return *this;
103 }
104
105 iterator_impl operator++(int) {
106 const iterator_impl v{*this};
107 ++(*this);
108 return v;
109 }
110
111 iterator_impl operator--(int) {
112 const iterator_impl v{*this};
113 --(*this);
114 return v;
115 }
116
117 // allow implicit const->non-const
118 iterator_impl(const iterator_impl<false>& other)
119 : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
120
121 iterator_impl(const iterator_impl<true>& other)
122 : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
123
124 iterator_impl& operator=(const iterator_impl<false>& other) {
125 mlq = other.mlq;
126 it = other.it;
127 current_priority = other.current_priority;
128 return *this;
129 }
130
131 friend class iterator_impl<true>;
132 iterator_impl() = default;
133
134 private:
135 friend class MultiLevelQueue;
136 using container_ref =
137 std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
138 using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
139 typename std::list<T>::iterator>;
140
141 explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
142 : mlq(mlq), it(it), current_priority(current_priority) {}
143 explicit iterator_impl(container_ref mlq, u32 current_priority)
144 : mlq(mlq), it(), current_priority(current_priority) {}
145
146 bool IsEnd() const {
147 return current_priority == mlq.depth();
148 }
149
150 list_iterator GetBeginItForPrio() const {
151 return mlq.levels[current_priority].begin();
152 }
153
154 list_iterator GetEndItForPrio() const {
155 return mlq.levels[current_priority].end();
156 }
157
158 container_ref mlq;
159 list_iterator it;
160 u32 current_priority;
161 };
162
163 using iterator = iterator_impl<false>;
164 using const_iterator = iterator_impl<true>;
165
166 void add(const T& element, u32 priority, bool send_back = true) {
167 if (send_back)
168 levels[priority].push_back(element);
169 else
170 levels[priority].push_front(element);
171 used_priorities |= 1ULL << priority;
172 }
173
174 void remove(const T& element, u32 priority) {
175 auto it = ListIterateTo(levels[priority], element);
176 if (it == levels[priority].end())
177 return;
178 levels[priority].erase(it);
179 if (levels[priority].empty()) {
180 used_priorities &= ~(1ULL << priority);
181 }
182 }
183
184 void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
185 remove(element, old_priority);
186 add(element, new_priority, !adjust_front);
187 }
188 void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
189 adjust(*it, old_priority, new_priority, adjust_front);
190 }
191
192 void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
193 ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
194 ListIterateTo(levels[priority], element));
195
196 other.used_priorities |= 1ULL << priority;
197
198 if (levels[priority].empty()) {
199 used_priorities &= ~(1ULL << priority);
200 }
201 }
202
203 void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
204 transfer_to_front(*it, priority, other);
205 }
206
207 void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
208 ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
209 ListIterateTo(levels[priority], element));
210
211 other.used_priorities |= 1ULL << priority;
212
213 if (levels[priority].empty()) {
214 used_priorities &= ~(1ULL << priority);
215 }
216 }
217
218 void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
219 transfer_to_back(*it, priority, other);
220 }
221
222 void yield(u32 priority, std::size_t n = 1) {
223 ListShiftForward(levels[priority], n);
224 }
225
226 std::size_t depth() const {
227 return Depth;
228 }
229
230 std::size_t size(u32 priority) const {
231 return levels[priority].size();
232 }
233
234 std::size_t size() const {
235 u64 priorities = used_priorities;
236 std::size_t size = 0;
237 while (priorities != 0) {
238 const u64 current_priority = CountTrailingZeroes64(priorities);
239 size += levels[current_priority].size();
240 priorities &= ~(1ULL << current_priority);
241 }
242 return size;
243 }
244
245 bool empty() const {
246 return used_priorities == 0;
247 }
248
249 bool empty(u32 priority) const {
250 return (used_priorities & (1ULL << priority)) == 0;
251 }
252
253 u32 highest_priority_set(u32 max_priority = 0) const {
254 const u64 priorities =
255 max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
256 return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
257 }
258
259 u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
260 const u64 priorities = min_priority >= Depth - 1
261 ? used_priorities
262 : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
263 return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
264 }
265
266 const_iterator cbegin(u32 max_prio = 0) const {
267 const u32 priority = highest_priority_set(max_prio);
268 return priority == Depth ? cend()
269 : const_iterator{*this, levels[priority].cbegin(), priority};
270 }
271 const_iterator begin(u32 max_prio = 0) const {
272 return cbegin(max_prio);
273 }
274 iterator begin(u32 max_prio = 0) {
275 const u32 priority = highest_priority_set(max_prio);
276 return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
277 }
278
279 const_iterator cend(u32 min_prio = Depth - 1) const {
280 return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
281 }
282 const_iterator end(u32 min_prio = Depth - 1) const {
283 return cend(min_prio);
284 }
285 iterator end(u32 min_prio = Depth - 1) {
286 return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
287 }
288
289 T& front(u32 max_priority = 0) {
290 const u32 priority = highest_priority_set(max_priority);
291 return levels[priority == Depth ? 0 : priority].front();
292 }
293 const T& front(u32 max_priority = 0) const {
294 const u32 priority = highest_priority_set(max_priority);
295 return levels[priority == Depth ? 0 : priority].front();
296 }
297
298 T back(u32 min_priority = Depth - 1) {
299 const u32 priority = lowest_priority_set(min_priority); // intended
300 return levels[priority == Depth ? 63 : priority].back();
301 }
302 const T& back(u32 min_priority = Depth - 1) const {
303 const u32 priority = lowest_priority_set(min_priority); // intended
304 return levels[priority == Depth ? 63 : priority].back();
305 }
306
307private:
308 using const_list_iterator = typename std::list<T>::const_iterator;
309
310 static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
311 if (shift >= list.size()) {
312 return;
313 }
314
315 const auto begin_range = list.begin();
316 const auto end_range = std::next(begin_range, shift);
317 list.splice(list.end(), list, begin_range, end_range);
318 }
319
320 static void ListSplice(std::list<T>& in_list, const_list_iterator position,
321 std::list<T>& out_list, const_list_iterator element) {
322 in_list.splice(position, out_list, element);
323 }
324
325 static const_list_iterator ListIterateTo(const std::list<T>& list, const T& element) {
326 auto it = list.cbegin();
327 while (it != list.cend() && *it != element) {
328 ++it;
329 }
330 return it;
331 }
332
333 std::array<std::list<T>, Depth> levels;
334 u64 used_priorities = 0;
335};
336
337} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..69b7abc54
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,31 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/page_table.h"
6
7namespace Common {
8
9PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
10
11PageTable::~PageTable() = default;
12
13void PageTable::Resize(std::size_t address_space_width_in_bits) {
14 const std::size_t num_page_table_entries = 1ULL
15 << (address_space_width_in_bits - page_size_in_bits);
16
17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries);
19 backing_addr.resize(num_page_table_entries);
20
21 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
22 // vector size is subsequently decreased (via resize), the vector might not automatically
23 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
24 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
25
26 pointers.shrink_to_fit();
27 attributes.shrink_to_fit();
28 backing_addr.shrink_to_fit();
29}
30
31} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8b8ff0bb8
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,84 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include <boost/icl/interval_map.hpp>
9#include "common/common_types.h"
10#include "common/memory_hook.h"
11
12namespace Common {
13
14enum class PageType : u8 {
15 /// Page is unmapped and should cause an access error.
16 Unmapped,
17 /// Page is mapped to regular memory. This is the only type you can get pointers to.
18 Memory,
19 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
20 /// invalidation
21 RasterizerCachedMemory,
22 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
23 Special,
24 /// Page is allocated for use.
25 Allocated,
26};
27
28struct SpecialRegion {
29 enum class Type {
30 DebugHook,
31 IODevice,
32 } type;
33
34 MemoryHookPointer handler;
35
36 bool operator<(const SpecialRegion& other) const {
37 return std::tie(type, handler) < std::tie(other.type, other.handler);
38 }
39
40 bool operator==(const SpecialRegion& other) const {
41 return std::tie(type, handler) == std::tie(other.type, other.handler);
42 }
43};
44
45/**
46 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
47 * mimics the way a real CPU page table works.
48 */
49struct PageTable {
50 explicit PageTable(std::size_t page_size_in_bits);
51 ~PageTable();
52
53 /**
54 * Resizes the page table to be able to accomodate enough pages within
55 * a given address space.
56 *
57 * @param address_space_width_in_bits The address size width in bits.
58 */
59 void Resize(std::size_t address_space_width_in_bits);
60
61 /**
62 * Vector of memory pointers backing each page. An entry can only be non-null if the
63 * corresponding entry in the `attributes` vector is of type `Memory`.
64 */
65 std::vector<u8*> pointers;
66
67 /**
68 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
69 * of type `Special`.
70 */
71 boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;
72
73 /**
74 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
75 * the corresponding entry in `pointers` MUST be set to null.
76 */
77 std::vector<PageType> attributes;
78
79 std::vector<u64> backing_addr;
80
81 const std::size_t page_size_in_bits{};
82};
83
84} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
6 6
7#include "common/vector_math.h" 7#include "common/vector_math.h"
8 8
9namespace Math { 9namespace Common {
10 10
11template <typename T> 11template <typename T>
12class Quaternion { 12class Quaternion {
13public: 13public:
14 Math::Vec3<T> xyz; 14 Vec3<T> xyz;
15 T w{}; 15 T w{};
16 16
17 Quaternion<decltype(-T{})> Inverse() const { 17 Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
38}; 38};
39 39
40template <typename T> 40template <typename T>
41auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { 41auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); 42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
43} 43}
44 44
45inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { 45inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
46 return {axis * std::sin(angle / 2), std::cos(angle / 2)}; 46 return {axis * std::sin(angle / 2), std::cos(angle / 2)};
47} 47}
48 48
49} // namespace Math 49} // namespace Common
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 2b1727769..d69038f65 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -11,6 +11,7 @@
11#define BUILD_DATE "@BUILD_DATE@" 11#define BUILD_DATE "@BUILD_DATE@"
12#define BUILD_FULLNAME "@BUILD_FULLNAME@" 12#define BUILD_FULLNAME "@BUILD_FULLNAME@"
13#define BUILD_VERSION "@BUILD_VERSION@" 13#define BUILD_VERSION "@BUILD_VERSION@"
14#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
14 15
15namespace Common { 16namespace Common {
16 17
@@ -21,6 +22,7 @@ const char g_build_name[] = BUILD_NAME;
21const char g_build_date[] = BUILD_DATE; 22const char g_build_date[] = BUILD_DATE;
22const char g_build_fullname[] = BUILD_FULLNAME; 23const char g_build_fullname[] = BUILD_FULLNAME;
23const char g_build_version[] = BUILD_VERSION; 24const char g_build_version[] = BUILD_VERSION;
25const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
24 26
25} // namespace 27} // namespace
26 28
diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h
index af9a9daed..666bf0367 100644
--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -13,5 +13,6 @@ extern const char g_build_name[];
13extern const char g_build_date[]; 13extern const char g_build_date[];
14extern const char g_build_fullname[]; 14extern const char g_build_fullname[];
15extern const char g_build_version[]; 15extern const char g_build_version[];
16extern const char g_shader_cache_version[];
16 17
17} // namespace Common 18} // namespace Common
diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h
index baf1f1c9e..1176a72b1 100644
--- a/src/common/scope_exit.h
+++ b/src/common/scope_exit.h
@@ -20,7 +20,7 @@ struct ScopeExitHelper {
20 20
21template <typename Func> 21template <typename Func>
22ScopeExitHelper<Func> ScopeExit(Func&& func) { 22ScopeExitHelper<Func> ScopeExit(Func&& func) {
23 return ScopeExitHelper<Func>(std::move(func)); 23 return ScopeExitHelper<Func>(std::forward<Func>(func));
24} 24}
25} // namespace detail 25} // namespace detail
26 26
diff --git a/src/common/swap.h b/src/common/swap.h
index 32af0b6ac..71932c2bb 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -17,19 +17,16 @@
17 17
18#pragma once 18#pragma once
19 19
20#include <type_traits>
21
20#if defined(_MSC_VER) 22#if defined(_MSC_VER)
21#include <cstdlib> 23#include <cstdlib>
22#elif defined(__linux__)
23#include <byteswap.h>
24#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || \
25 defined(__NetBSD__) || defined(__OpenBSD__)
26#include <sys/endian.h>
27#endif 24#endif
28#include <cstring> 25#include <cstring>
29#include "common/common_types.h" 26#include "common/common_types.h"
30 27
31// GCC 4.6+ 28// GCC
32#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 29#ifdef __GNUC__
33 30
34#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) 31#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
35#define COMMON_LITTLE_ENDIAN 1 32#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +35,7 @@
38#endif 35#endif
39 36
40// LLVM/clang 37// LLVM/clang
41#elif __clang__ 38#elif defined(__clang__)
42 39
43#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) 40#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
44#define COMMON_LITTLE_ENDIAN 1 41#define COMMON_LITTLE_ENDIAN 1
@@ -60,86 +57,49 @@
60namespace Common { 57namespace Common {
61 58
62#ifdef _MSC_VER 59#ifdef _MSC_VER
63inline u16 swap16(u16 _data) { 60[[nodiscard]] inline u16 swap16(u16 data) noexcept {
64 return _byteswap_ushort(_data); 61 return _byteswap_ushort(data);
65}
66inline u32 swap32(u32 _data) {
67 return _byteswap_ulong(_data);
68}
69inline u64 swap64(u64 _data) {
70 return _byteswap_uint64(_data);
71}
72#elif defined(ARCHITECTURE_ARM) && (__ARM_ARCH >= 6)
73inline u16 swap16(u16 _data) {
74 u32 data = _data;
75 __asm__("rev16 %0, %1\n" : "=l"(data) : "l"(data));
76 return (u16)data;
77}
78inline u32 swap32(u32 _data) {
79 __asm__("rev %0, %1\n" : "=l"(_data) : "l"(_data));
80 return _data;
81}
82inline u64 swap64(u64 _data) {
83 return ((u64)swap32(_data) << 32) | swap32(_data >> 32);
84}
85#elif __linux__
86inline u16 swap16(u16 _data) {
87 return bswap_16(_data);
88}
89inline u32 swap32(u32 _data) {
90 return bswap_32(_data);
91}
92inline u64 swap64(u64 _data) {
93 return bswap_64(_data);
94} 62}
95#elif __APPLE__ 63[[nodiscard]] inline u32 swap32(u32 data) noexcept {
96inline __attribute__((always_inline)) u16 swap16(u16 _data) { 64 return _byteswap_ulong(data);
97 return (_data >> 8) | (_data << 8);
98} 65}
99inline __attribute__((always_inline)) u32 swap32(u32 _data) { 66[[nodiscard]] inline u64 swap64(u64 data) noexcept {
100 return __builtin_bswap32(_data); 67 return _byteswap_uint64(data);
101} 68}
102inline __attribute__((always_inline)) u64 swap64(u64 _data) { 69#elif defined(__clang__) || defined(__GNUC__)
103 return __builtin_bswap64(_data); 70#if defined(__Bitrig__) || defined(__OpenBSD__)
104}
105#elif defined(__Bitrig__) || defined(__OpenBSD__)
106// redefine swap16, swap32, swap64 as inline functions 71// redefine swap16, swap32, swap64 as inline functions
107#undef swap16 72#undef swap16
108#undef swap32 73#undef swap32
109#undef swap64 74#undef swap64
110inline u16 swap16(u16 _data) { 75#endif
111 return __swap16(_data); 76[[nodiscard]] inline u16 swap16(u16 data) noexcept {
112} 77 return __builtin_bswap16(data);
113inline u32 swap32(u32 _data) {
114 return __swap32(_data);
115}
116inline u64 swap64(u64 _data) {
117 return __swap64(_data);
118}
119#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__)
120inline u16 swap16(u16 _data) {
121 return bswap16(_data);
122} 78}
123inline u32 swap32(u32 _data) { 79[[nodiscard]] inline u32 swap32(u32 data) noexcept {
124 return bswap32(_data); 80 return __builtin_bswap32(data);
125} 81}
126inline u64 swap64(u64 _data) { 82[[nodiscard]] inline u64 swap64(u64 data) noexcept {
127 return bswap64(_data); 83 return __builtin_bswap64(data);
128} 84}
129#else 85#else
130// Slow generic implementation. 86// Generic implementation.
131inline u16 swap16(u16 data) { 87[[nodiscard]] inline u16 swap16(u16 data) noexcept {
132 return (data >> 8) | (data << 8); 88 return (data >> 8) | (data << 8);
133} 89}
134inline u32 swap32(u32 data) { 90[[nodiscard]] inline u32 swap32(u32 data) noexcept {
135 return (swap16(data) << 16) | swap16(data >> 16); 91 return ((data & 0xFF000000U) >> 24) | ((data & 0x00FF0000U) >> 8) |
92 ((data & 0x0000FF00U) << 8) | ((data & 0x000000FFU) << 24);
136} 93}
137inline u64 swap64(u64 data) { 94[[nodiscard]] inline u64 swap64(u64 data) noexcept {
138 return ((u64)swap32(data) << 32) | swap32(data >> 32); 95 return ((data & 0xFF00000000000000ULL) >> 56) | ((data & 0x00FF000000000000ULL) >> 40) |
96 ((data & 0x0000FF0000000000ULL) >> 24) | ((data & 0x000000FF00000000ULL) >> 8) |
97 ((data & 0x00000000FF000000ULL) << 8) | ((data & 0x0000000000FF0000ULL) << 24) |
98 ((data & 0x000000000000FF00ULL) << 40) | ((data & 0x00000000000000FFULL) << 56);
139} 99}
140#endif 100#endif
141 101
142inline float swapf(float f) { 102[[nodiscard]] inline float swapf(float f) noexcept {
143 static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t."); 103 static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t.");
144 104
145 u32 value; 105 u32 value;
@@ -151,7 +111,7 @@ inline float swapf(float f) {
151 return f; 111 return f;
152} 112}
153 113
154inline double swapd(double f) { 114[[nodiscard]] inline double swapd(double f) noexcept {
155 static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t."); 115 static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t.");
156 116
157 u64 value; 117 u64 value;
@@ -170,7 +130,7 @@ struct swap_struct_t {
170 using swapped_t = swap_struct_t; 130 using swapped_t = swap_struct_t;
171 131
172protected: 132protected:
173 T value = T(); 133 T value;
174 134
175 static T swap(T v) { 135 static T swap(T v) {
176 return F::swap(v); 136 return F::swap(v);
@@ -605,52 +565,154 @@ struct swap_double_t {
605 } 565 }
606}; 566};
607 567
608#if COMMON_LITTLE_ENDIAN 568template <typename T>
609using u16_le = u16; 569struct swap_enum_t {
610using u32_le = u32; 570 static_assert(std::is_enum_v<T>);
611using u64_le = u64; 571 using base = std::underlying_type_t<T>;
612 572
613using s16_le = s16; 573public:
614using s32_le = s32; 574 swap_enum_t() = default;
615using s64_le = s64; 575 swap_enum_t(const T& v) : value(swap(v)) {}
616 576
617using float_le = float; 577 swap_enum_t& operator=(const T& v) {
618using double_le = double; 578 value = swap(v);
579 return *this;
580 }
619 581
620using u64_be = swap_struct_t<u64, swap_64_t<u64>>; 582 operator T() const {
621using s64_be = swap_struct_t<s64, swap_64_t<s64>>; 583 return swap(value);
584 }
622 585
623using u32_be = swap_struct_t<u32, swap_32_t<u32>>; 586 explicit operator base() const {
624using s32_be = swap_struct_t<s32, swap_32_t<s32>>; 587 return static_cast<base>(swap(value));
588 }
625 589
626using u16_be = swap_struct_t<u16, swap_16_t<u16>>; 590protected:
627using s16_be = swap_struct_t<s16, swap_16_t<s16>>; 591 T value{};
592 // clang-format off
593 using swap_t = std::conditional_t<
594 std::is_same_v<base, u16>, swap_16_t<u16>, std::conditional_t<
595 std::is_same_v<base, s16>, swap_16_t<s16>, std::conditional_t<
596 std::is_same_v<base, u32>, swap_32_t<u32>, std::conditional_t<
597 std::is_same_v<base, s32>, swap_32_t<s32>, std::conditional_t<
598 std::is_same_v<base, u64>, swap_64_t<u64>, std::conditional_t<
599 std::is_same_v<base, s64>, swap_64_t<s64>, void>>>>>>;
600 // clang-format on
601 static T swap(T x) {
602 return static_cast<T>(swap_t::swap(static_cast<base>(x)));
603 }
604};
628 605
629using float_be = swap_struct_t<float, swap_float_t<float>>; 606struct SwapTag {}; // Use the different endianness from the system
630using double_be = swap_struct_t<double, swap_double_t<double>>; 607struct KeepTag {}; // Use the same endianness as the system
631#else
632 608
633using u64_le = swap_struct_t<u64, swap_64_t<u64>>; 609template <typename T, typename Tag>
634using s64_le = swap_struct_t<s64, swap_64_t<s64>>; 610struct AddEndian;
635 611
636using u32_le = swap_struct_t<u32, swap_32_t<u32>>; 612// KeepTag specializations
637using s32_le = swap_struct_t<s32, swap_32_t<s32>>;
638 613
639using u16_le = swap_struct_t<u16, swap_16_t<u16>>; 614template <typename T>
640using s16_le = swap_struct_t<s16, swap_16_t<s16>>; 615struct AddEndian<T, KeepTag> {
616 using type = T;
617};
641 618
642using float_le = swap_struct_t<float, swap_float_t<float>>; 619// SwapTag specializations
643using double_le = swap_struct_t<double, swap_double_t<double>>;
644 620
645using u16_be = u16; 621template <>
646using u32_be = u32; 622struct AddEndian<u8, SwapTag> {
647using u64_be = u64; 623 using type = u8;
624};
648 625
649using s16_be = s16; 626template <>
650using s32_be = s32; 627struct AddEndian<u16, SwapTag> {
651using s64_be = s64; 628 using type = swap_struct_t<u16, swap_16_t<u16>>;
629};
652 630
653using float_be = float; 631template <>
654using double_be = double; 632struct AddEndian<u32, SwapTag> {
633 using type = swap_struct_t<u32, swap_32_t<u32>>;
634};
635
636template <>
637struct AddEndian<u64, SwapTag> {
638 using type = swap_struct_t<u64, swap_64_t<u64>>;
639};
640
641template <>
642struct AddEndian<s8, SwapTag> {
643 using type = s8;
644};
645
646template <>
647struct AddEndian<s16, SwapTag> {
648 using type = swap_struct_t<s16, swap_16_t<s16>>;
649};
650
651template <>
652struct AddEndian<s32, SwapTag> {
653 using type = swap_struct_t<s32, swap_32_t<s32>>;
654};
655
656template <>
657struct AddEndian<s64, SwapTag> {
658 using type = swap_struct_t<s64, swap_64_t<s64>>;
659};
660
661template <>
662struct AddEndian<float, SwapTag> {
663 using type = swap_struct_t<float, swap_float_t<float>>;
664};
665
666template <>
667struct AddEndian<double, SwapTag> {
668 using type = swap_struct_t<double, swap_double_t<double>>;
669};
670
671template <typename T>
672struct AddEndian<T, SwapTag> {
673 static_assert(std::is_enum_v<T>);
674 using type = swap_enum_t<T>;
675};
676
677// Alias LETag/BETag as KeepTag/SwapTag depending on the system
678#if COMMON_LITTLE_ENDIAN
679
680using LETag = KeepTag;
681using BETag = SwapTag;
682
683#else
684
685using BETag = KeepTag;
686using LETag = SwapTag;
655 687
656#endif 688#endif
689
690// Aliases for LE types
691using u16_le = AddEndian<u16, LETag>::type;
692using u32_le = AddEndian<u32, LETag>::type;
693using u64_le = AddEndian<u64, LETag>::type;
694
695using s16_le = AddEndian<s16, LETag>::type;
696using s32_le = AddEndian<s32, LETag>::type;
697using s64_le = AddEndian<s64, LETag>::type;
698
699template <typename T>
700using enum_le = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, LETag>::type>;
701
702using float_le = AddEndian<float, LETag>::type;
703using double_le = AddEndian<double, LETag>::type;
704
705// Aliases for BE types
706using u16_be = AddEndian<u16, BETag>::type;
707using u32_be = AddEndian<u32, BETag>::type;
708using u64_be = AddEndian<u64, BETag>::type;
709
710using s16_be = AddEndian<s16, BETag>::type;
711using s32_be = AddEndian<s32, BETag>::type;
712using s64_be = AddEndian<s64, BETag>::type;
713
714template <typename T>
715using enum_be = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, BETag>::type>;
716
717using float_be = AddEndian<float, BETag>::type;
718using double_be = AddEndian<double, BETag>::type;
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 5144c0d9f..fe7a420cc 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -27,18 +27,6 @@ namespace Common {
27 27
28#ifdef _MSC_VER 28#ifdef _MSC_VER
29 29
30void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
31 SetThreadAffinityMask(thread, mask);
32}
33
34void SetCurrentThreadAffinity(u32 mask) {
35 SetThreadAffinityMask(GetCurrentThread(), mask);
36}
37
38void SwitchCurrentThread() {
39 SwitchToThread();
40}
41
42// Sets the debugger-visible name of the current thread. 30// Sets the debugger-visible name of the current thread.
43// Uses undocumented (actually, it is now documented) trick. 31// Uses undocumented (actually, it is now documented) trick.
44// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp 32// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
@@ -70,31 +58,6 @@ void SetCurrentThreadName(const char* name) {
70 58
71#else // !MSVC_VER, so must be POSIX threads 59#else // !MSVC_VER, so must be POSIX threads
72 60
73void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
74#ifdef __APPLE__
75 thread_policy_set(pthread_mach_thread_np(thread), THREAD_AFFINITY_POLICY, (integer_t*)&mask, 1);
76#elif (defined __linux__ || defined __FreeBSD__) && !(defined ANDROID)
77 cpu_set_t cpu_set;
78 CPU_ZERO(&cpu_set);
79
80 for (int i = 0; i != sizeof(mask) * 8; ++i)
81 if ((mask >> i) & 1)
82 CPU_SET(i, &cpu_set);
83
84 pthread_setaffinity_np(thread, sizeof(cpu_set), &cpu_set);
85#endif
86}
87
88void SetCurrentThreadAffinity(u32 mask) {
89 SetThreadAffinity(pthread_self(), mask);
90}
91
92#ifndef _WIN32
93void SwitchCurrentThread() {
94 usleep(1000 * 1);
95}
96#endif
97
98// MinGW with the POSIX threading model does not support pthread_setname_np 61// MinGW with the POSIX threading model does not support pthread_setname_np
99#if !defined(_WIN32) || defined(_MSC_VER) 62#if !defined(_WIN32) || defined(_MSC_VER)
100void SetCurrentThreadName(const char* name) { 63void SetCurrentThreadName(const char* name) {
diff --git a/src/common/thread.h b/src/common/thread.h
index 2cf74452d..0cfd98be6 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,14 +9,13 @@
9#include <cstddef> 9#include <cstddef>
10#include <mutex> 10#include <mutex>
11#include <thread> 11#include <thread>
12#include "common/common_types.h"
13 12
14namespace Common { 13namespace Common {
15 14
16class Event { 15class Event {
17public: 16public:
18 void Set() { 17 void Set() {
19 std::lock_guard<std::mutex> lk(mutex); 18 std::lock_guard lk{mutex};
20 if (!is_set) { 19 if (!is_set) {
21 is_set = true; 20 is_set = true;
22 condvar.notify_one(); 21 condvar.notify_one();
@@ -24,14 +23,14 @@ public:
24 } 23 }
25 24
26 void Wait() { 25 void Wait() {
27 std::unique_lock<std::mutex> lk(mutex); 26 std::unique_lock lk{mutex};
28 condvar.wait(lk, [&] { return is_set; }); 27 condvar.wait(lk, [&] { return is_set; });
29 is_set = false; 28 is_set = false;
30 } 29 }
31 30
32 template <class Clock, class Duration> 31 template <class Clock, class Duration>
33 bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) { 32 bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
34 std::unique_lock<std::mutex> lk(mutex); 33 std::unique_lock lk{mutex};
35 if (!condvar.wait_until(lk, time, [this] { return is_set; })) 34 if (!condvar.wait_until(lk, time, [this] { return is_set; }))
36 return false; 35 return false;
37 is_set = false; 36 is_set = false;
@@ -39,7 +38,7 @@ public:
39 } 38 }
40 39
41 void Reset() { 40 void Reset() {
42 std::unique_lock<std::mutex> lk(mutex); 41 std::unique_lock lk{mutex};
43 // no other action required, since wait loops on the predicate and any lingering signal will 42 // no other action required, since wait loops on the predicate and any lingering signal will
44 // get cleared on the first iteration 43 // get cleared on the first iteration
45 is_set = false; 44 is_set = false;
@@ -57,7 +56,7 @@ public:
57 56
58 /// Blocks until all "count" threads have called Sync() 57 /// Blocks until all "count" threads have called Sync()
59 void Sync() { 58 void Sync() {
60 std::unique_lock<std::mutex> lk(mutex); 59 std::unique_lock lk{mutex};
61 const std::size_t current_generation = generation; 60 const std::size_t current_generation = generation;
62 61
63 if (++waiting == count) { 62 if (++waiting == count) {
@@ -78,9 +77,6 @@ private:
78 std::size_t generation = 0; // Incremented once each time the barrier is used 77 std::size_t generation = 0; // Incremented once each time the barrier is used
79}; 78};
80 79
81void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
82void SetCurrentThreadAffinity(u32 mask);
83void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
84void SetCurrentThreadName(const char* name); 80void SetCurrentThreadName(const char* name);
85 81
86} // namespace Common 82} // namespace Common
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index e7594db68..791f99a8c 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,7 +6,6 @@
6 6
7#include <array> 7#include <array>
8#include <deque> 8#include <deque>
9#include <boost/range/algorithm_ext/erase.hpp>
10 9
11namespace Common { 10namespace Common {
12 11
@@ -111,8 +110,9 @@ struct ThreadQueueList {
111 } 110 }
112 111
113 void remove(Priority priority, const T& thread_id) { 112 void remove(Priority priority, const T& thread_id) {
114 Queue* cur = &queues[priority]; 113 Queue* const cur = &queues[priority];
115 boost::remove_erase(cur->data, thread_id); 114 const auto iter = std::remove(cur->data.begin(), cur->data.end(), thread_id);
115 cur->data.erase(iter, cur->data.end());
116 } 116 }
117 117
118 void rotate(Priority priority) { 118 void rotate(Priority priority) {
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index edf13bc49..e714ba5b3 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,17 @@
7// a simple lockless thread-safe, 7// a simple lockless thread-safe,
8// single reader, single writer queue 8// single reader, single writer queue
9 9
10#include <algorithm>
11#include <atomic> 10#include <atomic>
11#include <condition_variable>
12#include <cstddef> 12#include <cstddef>
13#include <mutex> 13#include <mutex>
14#include "common/common_types.h" 14#include <utility>
15 15
16namespace Common { 16namespace Common {
17template <typename T, bool NeedSize = true> 17template <typename T>
18class SPSCQueue { 18class SPSCQueue {
19public: 19public:
20 SPSCQueue() : size(0) { 20 SPSCQueue() {
21 write_ptr = read_ptr = new ElementPtr(); 21 write_ptr = read_ptr = new ElementPtr();
22 } 22 }
23 ~SPSCQueue() { 23 ~SPSCQueue() {
@@ -25,13 +25,12 @@ public:
25 delete read_ptr; 25 delete read_ptr;
26 } 26 }
27 27
28 u32 Size() const { 28 std::size_t Size() const {
29 static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
30 return size.load(); 29 return size.load();
31 } 30 }
32 31
33 bool Empty() const { 32 bool Empty() const {
34 return !read_ptr->next.load(); 33 return Size() == 0;
35 } 34 }
36 35
37 T& Front() const { 36 T& Front() const {
@@ -47,13 +46,14 @@ public:
47 ElementPtr* new_ptr = new ElementPtr(); 46 ElementPtr* new_ptr = new ElementPtr();
48 write_ptr->next.store(new_ptr, std::memory_order_release); 47 write_ptr->next.store(new_ptr, std::memory_order_release);
49 write_ptr = new_ptr; 48 write_ptr = new_ptr;
50 if (NeedSize) 49 cv.notify_one();
51 size++; 50
51 ++size;
52 } 52 }
53 53
54 void Pop() { 54 void Pop() {
55 if (NeedSize) 55 --size;
56 size--; 56
57 ElementPtr* tmpptr = read_ptr; 57 ElementPtr* tmpptr = read_ptr;
58 // advance the read pointer 58 // advance the read pointer
59 read_ptr = tmpptr->next.load(); 59 read_ptr = tmpptr->next.load();
@@ -66,8 +66,7 @@ public:
66 if (Empty()) 66 if (Empty())
67 return false; 67 return false;
68 68
69 if (NeedSize) 69 --size;
70 size--;
71 70
72 ElementPtr* tmpptr = read_ptr; 71 ElementPtr* tmpptr = read_ptr;
73 read_ptr = tmpptr->next.load(std::memory_order_acquire); 72 read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -77,6 +76,16 @@ public:
77 return true; 76 return true;
78 } 77 }
79 78
79 T PopWait() {
80 if (Empty()) {
81 std::unique_lock lock{cv_mutex};
82 cv.wait(lock, [this]() { return !Empty(); });
83 }
84 T t;
85 Pop(t);
86 return t;
87 }
88
80 // not thread-safe 89 // not thread-safe
81 void Clear() { 90 void Clear() {
82 size.store(0); 91 size.store(0);
@@ -89,7 +98,7 @@ private:
89 // and a pointer to the next ElementPtr 98 // and a pointer to the next ElementPtr
90 class ElementPtr { 99 class ElementPtr {
91 public: 100 public:
92 ElementPtr() : next(nullptr) {} 101 ElementPtr() {}
93 ~ElementPtr() { 102 ~ElementPtr() {
94 ElementPtr* next_ptr = next.load(); 103 ElementPtr* next_ptr = next.load();
95 104
@@ -98,21 +107,23 @@ private:
98 } 107 }
99 108
100 T current; 109 T current;
101 std::atomic<ElementPtr*> next; 110 std::atomic<ElementPtr*> next{nullptr};
102 }; 111 };
103 112
104 ElementPtr* write_ptr; 113 ElementPtr* write_ptr;
105 ElementPtr* read_ptr; 114 ElementPtr* read_ptr;
106 std::atomic<u32> size; 115 std::atomic_size_t size{0};
116 std::mutex cv_mutex;
117 std::condition_variable cv;
107}; 118};
108 119
109// a simple thread-safe, 120// a simple thread-safe,
110// single reader, multiple writer queue 121// single reader, multiple writer queue
111 122
112template <typename T, bool NeedSize = true> 123template <typename T>
113class MPSCQueue { 124class MPSCQueue {
114public: 125public:
115 u32 Size() const { 126 std::size_t Size() const {
116 return spsc_queue.Size(); 127 return spsc_queue.Size();
117 } 128 }
118 129
@@ -126,7 +137,7 @@ public:
126 137
127 template <typename Arg> 138 template <typename Arg>
128 void Push(Arg&& t) { 139 void Push(Arg&& t) {
129 std::lock_guard<std::mutex> lock(write_lock); 140 std::lock_guard lock{write_lock};
130 spsc_queue.Push(t); 141 spsc_queue.Push(t);
131 } 142 }
132 143
@@ -138,13 +149,17 @@ public:
138 return spsc_queue.Pop(t); 149 return spsc_queue.Pop(t);
139 } 150 }
140 151
152 T PopWait() {
153 return spsc_queue.PopWait();
154 }
155
141 // not thread-safe 156 // not thread-safe
142 void Clear() { 157 void Clear() {
143 spsc_queue.Clear(); 158 spsc_queue.Clear();
144 } 159 }
145 160
146private: 161private:
147 SPSCQueue<T, NeedSize> spsc_queue; 162 SPSCQueue<T> spsc_queue;
148 std::mutex write_lock; 163 std::mutex write_lock;
149}; 164};
150} // namespace Common 165} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..32bf56730
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,45 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef _MSC_VER
6#include <intrin.h>
7
8#pragma intrinsic(_umul128)
9#endif
10#include <cstring>
11#include "common/uint128.h"
12
13namespace Common {
14
15u128 Multiply64Into128(u64 a, u64 b) {
16 u128 result;
17#ifdef _MSC_VER
18 result[0] = _umul128(a, b, &result[1]);
19#else
20 unsigned __int128 tmp = a;
21 tmp *= b;
22 std::memcpy(&result, &tmp, sizeof(u128));
23#endif
24 return result;
25}
26
27std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
28 u64 remainder = dividend[0] % divisor;
29 u64 accum = dividend[0] / divisor;
30 if (dividend[1] == 0)
31 return {accum, remainder};
32 // We ignore dividend[1] / divisor as that overflows
33 const u64 first_segment = (dividend[1] % divisor) << 32;
34 accum += (first_segment / divisor) << 32;
35 const u64 second_segment = (first_segment % divisor) << 32;
36 accum += (second_segment / divisor);
37 remainder += second_segment % divisor;
38 if (remainder >= divisor) {
39 accum++;
40 remainder -= divisor;
41 }
42 return {accum, remainder};
43}
44
45} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..a3be2a2cb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,19 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9
10namespace Common {
11
12// This function multiplies 2 u64 values and produces a u128 value;
13u128 Multiply64Into128(u64 a, u64 b);
14
15// This function divides a u128 by a u32 value and produces two u64 values:
16// the result of division and the remainder
17std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
18
19} // namespace Common
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
33#include <cmath> 33#include <cmath>
34#include <type_traits> 34#include <type_traits>
35 35
36namespace Math { 36namespace Common {
37 37
38template <typename T> 38template <typename T>
39class Vec2; 39class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
690 return MakeVec(x, yzw[0], yzw[1], yzw[2]); 690 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
691} 691}
692 692
693} // namespace Math 693} // namespace Common
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
new file mode 100644
index 000000000..60a35c67c
--- /dev/null
+++ b/src/common/zstd_compression.cpp
@@ -0,0 +1,53 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <zstd.h>
9
10#include "common/assert.h"
11#include "common/zstd_compression.h"
12
13namespace Common::Compression {
14
15std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) {
16 compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel());
17
18 const std::size_t max_compressed_size = ZSTD_compressBound(source_size);
19 std::vector<u8> compressed(max_compressed_size);
20
21 const std::size_t compressed_size =
22 ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level);
23
24 if (ZSTD_isError(compressed_size)) {
25 // Compression failed
26 return {};
27 }
28
29 compressed.resize(compressed_size);
30
31 return compressed;
32}
33
34std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) {
35 return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT);
36}
37
38std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) {
39 const std::size_t decompressed_size =
40 ZSTD_getDecompressedSize(compressed.data(), compressed.size());
41 std::vector<u8> decompressed(decompressed_size);
42
43 const std::size_t uncompressed_result_size = ZSTD_decompress(
44 decompressed.data(), decompressed.size(), compressed.data(), compressed.size());
45
46 if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) {
47 // Decompression failed
48 return {};
49 }
50 return decompressed;
51}
52
53} // namespace Common::Compression
diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h
new file mode 100644
index 000000000..e0a64b035
--- /dev/null
+++ b/src/common/zstd_compression.h
@@ -0,0 +1,42 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_types.h"
8
9namespace Common::Compression {
10
11/**
12 * Compresses a source memory region with Zstandard and returns the compressed data in a vector.
13 *
14 * @param source the uncompressed source memory region.
15 * @param source_size the size in bytes of the uncompressed source memory region.
16 * @param compression_level the used compression level. Should be between 1 and 22.
17 *
18 * @return the compressed data.
19 */
20std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level);
21
22/**
23 * Compresses a source memory region with Zstandard with the default compression level and returns
24 * the compressed data in a vector.
25 *
26 * @param source the uncompressed source memory region.
27 * @param source_size the size in bytes of the uncompressed source memory region.
28 *
29 * @return the compressed data.
30 */
31std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size);
32
33/**
34 * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector.
35 *
36 * @param compressed the compressed source memory region.
37 *
38 * @return the decompressed data.
39 */
40std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed);
41
42} // namespace Common::Compression \ No newline at end of file
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index aa9e05089..c59107102 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -31,6 +31,8 @@ add_library(core STATIC
31 file_sys/bis_factory.h 31 file_sys/bis_factory.h
32 file_sys/card_image.cpp 32 file_sys/card_image.cpp
33 file_sys/card_image.h 33 file_sys/card_image.h
34 file_sys/cheat_engine.cpp
35 file_sys/cheat_engine.h
34 file_sys/content_archive.cpp 36 file_sys/content_archive.cpp
35 file_sys/content_archive.h 37 file_sys/content_archive.h
36 file_sys/control_metadata.cpp 38 file_sys/control_metadata.cpp
@@ -68,6 +70,8 @@ add_library(core STATIC
68 file_sys/system_archive/ng_word.h 70 file_sys/system_archive/ng_word.h
69 file_sys/system_archive/system_archive.cpp 71 file_sys/system_archive/system_archive.cpp
70 file_sys/system_archive/system_archive.h 72 file_sys/system_archive/system_archive.h
73 file_sys/system_archive/system_version.cpp
74 file_sys/system_archive/system_version.h
71 file_sys/vfs.cpp 75 file_sys/vfs.cpp
72 file_sys/vfs.h 76 file_sys/vfs.h
73 file_sys/vfs_concat.cpp 77 file_sys/vfs_concat.cpp
@@ -95,6 +99,8 @@ add_library(core STATIC
95 frontend/framebuffer_layout.cpp 99 frontend/framebuffer_layout.cpp
96 frontend/framebuffer_layout.h 100 frontend/framebuffer_layout.h
97 frontend/input.h 101 frontend/input.h
102 frontend/scope_acquire_window_context.cpp
103 frontend/scope_acquire_window_context.h
98 gdbstub/gdbstub.cpp 104 gdbstub/gdbstub.cpp
99 gdbstub/gdbstub.h 105 gdbstub/gdbstub.h
100 hle/ipc.h 106 hle/ipc.h
@@ -105,6 +111,8 @@ add_library(core STATIC
105 hle/kernel/client_port.h 111 hle/kernel/client_port.h
106 hle/kernel/client_session.cpp 112 hle/kernel/client_session.cpp
107 hle/kernel/client_session.h 113 hle/kernel/client_session.h
114 hle/kernel/code_set.cpp
115 hle/kernel/code_set.h
108 hle/kernel/errors.h 116 hle/kernel/errors.h
109 hle/kernel/handle_table.cpp 117 hle/kernel/handle_table.cpp
110 hle/kernel/handle_table.h 118 hle/kernel/handle_table.h
@@ -138,8 +146,8 @@ add_library(core STATIC
138 hle/kernel/svc_wrap.h 146 hle/kernel/svc_wrap.h
139 hle/kernel/thread.cpp 147 hle/kernel/thread.cpp
140 hle/kernel/thread.h 148 hle/kernel/thread.h
141 hle/kernel/timer.cpp 149 hle/kernel/transfer_memory.cpp
142 hle/kernel/timer.h 150 hle/kernel/transfer_memory.h
143 hle/kernel/vm_manager.cpp 151 hle/kernel/vm_manager.cpp
144 hle/kernel/vm_manager.h 152 hle/kernel/vm_manager.h
145 hle/kernel/wait_object.cpp 153 hle/kernel/wait_object.cpp
@@ -217,6 +225,7 @@ add_library(core STATIC
217 hle/service/audio/audren_u.h 225 hle/service/audio/audren_u.h
218 hle/service/audio/codecctl.cpp 226 hle/service/audio/codecctl.cpp
219 hle/service/audio/codecctl.h 227 hle/service/audio/codecctl.h
228 hle/service/audio/errors.h
220 hle/service/audio/hwopus.cpp 229 hle/service/audio/hwopus.cpp
221 hle/service/audio/hwopus.h 230 hle/service/audio/hwopus.h
222 hle/service/bcat/bcat.cpp 231 hle/service/bcat/bcat.cpp
@@ -400,6 +409,10 @@ add_library(core STATIC
400 hle/service/time/time.h 409 hle/service/time/time.h
401 hle/service/usb/usb.cpp 410 hle/service/usb/usb.cpp
402 hle/service/usb/usb.h 411 hle/service/usb/usb.h
412 hle/service/vi/display/vi_display.cpp
413 hle/service/vi/display/vi_display.h
414 hle/service/vi/layer/vi_layer.cpp
415 hle/service/vi/layer/vi_layer.h
403 hle/service/vi/vi.cpp 416 hle/service/vi/vi.cpp
404 hle/service/vi/vi.h 417 hle/service/vi/vi.h
405 hle/service/vi/vi_m.cpp 418 hle/service/vi/vi_m.cpp
@@ -414,8 +427,6 @@ add_library(core STATIC
414 loader/deconstructed_rom_directory.h 427 loader/deconstructed_rom_directory.h
415 loader/elf.cpp 428 loader/elf.cpp
416 loader/elf.h 429 loader/elf.h
417 loader/linker.cpp
418 loader/linker.h
419 loader/loader.cpp 430 loader/loader.cpp
420 loader/loader.h 431 loader/loader.h
421 loader/nax.cpp 432 loader/nax.cpp
@@ -432,8 +443,6 @@ add_library(core STATIC
432 loader/xci.h 443 loader/xci.h
433 memory.cpp 444 memory.cpp
434 memory.h 445 memory.h
435 memory_hook.cpp
436 memory_hook.h
437 memory_setup.h 446 memory_setup.h
438 perf_stats.cpp 447 perf_stats.cpp
439 perf_stats.h 448 perf_stats.h
@@ -449,7 +458,7 @@ add_library(core STATIC
449create_target_directory_groups(core) 458create_target_directory_groups(core)
450 459
451target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) 460target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
452target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt lz4_static mbedtls opus unicorn open_source_archives) 461target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt mbedtls opus unicorn open_source_archives)
453if (ENABLE_WEB_SERVICE) 462if (ENABLE_WEB_SERVICE)
454 target_compile_definitions(core PRIVATE -DENABLE_WEB_SERVICE) 463 target_compile_definitions(core PRIVATE -DENABLE_WEB_SERVICE)
455 target_link_libraries(core PRIVATE web_service) 464 target_link_libraries(core PRIVATE web_service)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index afbda8d8b..dc96e35d5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,7 +12,9 @@
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_cpu.h" 13#include "core/core_cpu.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/core_timing_util.h"
15#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
17#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/svc.h" 19#include "core/hle/kernel/svc.h"
18#include "core/hle/kernel/vm_manager.h" 20#include "core/hle/kernel/vm_manager.h"
@@ -25,7 +27,6 @@ using Vector = Dynarmic::A64::Vector;
25class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks { 27class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks {
26public: 28public:
27 explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {} 29 explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {}
28 ~ARM_Dynarmic_Callbacks() = default;
29 30
30 u8 MemoryRead8(u64 vaddr) override { 31 u8 MemoryRead8(u64 vaddr) override {
31 return Memory::Read8(vaddr); 32 return Memory::Read8(vaddr);
@@ -99,7 +100,7 @@ public:
99 } 100 }
100 101
101 void CallSVC(u32 swi) override { 102 void CallSVC(u32 swi) override {
102 Kernel::CallSVC(swi); 103 Kernel::CallSVC(parent.system, swi);
103 } 104 }
104 105
105 void AddTicks(u64 ticks) override { 106 void AddTicks(u64 ticks) override {
@@ -112,14 +113,14 @@ public:
112 // Always execute at least one tick. 113 // Always execute at least one tick.
113 amortized_ticks = std::max<u64>(amortized_ticks, 1); 114 amortized_ticks = std::max<u64>(amortized_ticks, 1);
114 115
115 CoreTiming::AddTicks(amortized_ticks); 116 parent.system.CoreTiming().AddTicks(amortized_ticks);
116 num_interpreted_instructions = 0; 117 num_interpreted_instructions = 0;
117 } 118 }
118 u64 GetTicksRemaining() override { 119 u64 GetTicksRemaining() override {
119 return std::max(CoreTiming::GetDowncount(), 0); 120 return std::max(parent.system.CoreTiming().GetDowncount(), 0);
120 } 121 }
121 u64 GetCNTPCT() override { 122 u64 GetCNTPCT() override {
122 return CoreTiming::GetTicks(); 123 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
123 } 124 }
124 125
125 ARM_Dynarmic& parent; 126 ARM_Dynarmic& parent;
@@ -129,7 +130,7 @@ public:
129}; 130};
130 131
131std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const { 132std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
132 auto* current_process = Core::CurrentProcess(); 133 auto* current_process = system.Kernel().CurrentProcess();
133 auto** const page_table = current_process->VMManager().page_table.pointers.data(); 134 auto** const page_table = current_process->VMManager().page_table.pointers.data();
134 135
135 Dynarmic::A64::UserConfig config; 136 Dynarmic::A64::UserConfig config;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
151 config.tpidr_el0 = &cb->tpidr_el0; 152 config.tpidr_el0 = &cb->tpidr_el0;
152 config.dczid_el0 = 4; 153 config.dczid_el0 = 4;
153 config.ctr_el0 = 0x8444c004; 154 config.ctr_el0 = 0x8444c004;
154 config.cntfrq_el0 = 19200000; // Value from fusee. 155 config.cntfrq_el0 = Timing::CNTFREQ;
155 156
156 // Unpredictable instructions 157 // Unpredictable instructions
157 config.define_unpredictable_behaviour = true; 158 config.define_unpredictable_behaviour = true;
@@ -163,7 +164,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)
163 164
164void ARM_Dynarmic::Run() { 165void ARM_Dynarmic::Run() {
165 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); 166 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic);
166 ASSERT(Memory::GetCurrentPageTable() == current_page_table);
167 167
168 jit->Run(); 168 jit->Run();
169} 169}
@@ -172,8 +172,10 @@ void ARM_Dynarmic::Step() {
172 cb->InterpreterFallback(jit->GetPC(), 1); 172 cb->InterpreterFallback(jit->GetPC(), 1);
173} 173}
174 174
175ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index) 175ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
176 : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index}, 176 std::size_t core_index)
177 : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
178 core_index{core_index}, system{system},
177 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { 179 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
178 ThreadContext ctx{}; 180 ThreadContext ctx{};
179 inner_unicorn.SaveContext(ctx); 181 inner_unicorn.SaveContext(ctx);
@@ -276,7 +278,6 @@ void ARM_Dynarmic::ClearExclusiveState() {
276 278
277void ARM_Dynarmic::PageTableChanged() { 279void ARM_Dynarmic::PageTableChanged() {
278 jit = MakeJit(); 280 jit = MakeJit();
279 current_page_table = Memory::GetCurrentPageTable();
280} 281}
281 282
282DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} 283DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 512bf8ce9..c1db254e8 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,19 +12,16 @@
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14 14
15namespace Memory {
16struct PageTable;
17}
18
19namespace Core { 15namespace Core {
20 16
21class ARM_Dynarmic_Callbacks; 17class ARM_Dynarmic_Callbacks;
22class DynarmicExclusiveMonitor; 18class DynarmicExclusiveMonitor;
19class System;
23 20
24class ARM_Dynarmic final : public ARM_Interface { 21class ARM_Dynarmic final : public ARM_Interface {
25public: 22public:
26 ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
27 ~ARM_Dynarmic(); 24 ~ARM_Dynarmic() override;
28 25
29 void MapBackingMemory(VAddr address, std::size_t size, u8* memory, 26 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
30 Kernel::VMAPermission perms) override; 27 Kernel::VMAPermission perms) override;
@@ -62,15 +59,14 @@ private:
62 ARM_Unicorn inner_unicorn; 59 ARM_Unicorn inner_unicorn;
63 60
64 std::size_t core_index; 61 std::size_t core_index;
62 System& system;
65 DynarmicExclusiveMonitor& exclusive_monitor; 63 DynarmicExclusiveMonitor& exclusive_monitor;
66
67 Memory::PageTable* current_page_table = nullptr;
68}; 64};
69 65
70class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 66class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
71public: 67public:
72 explicit DynarmicExclusiveMonitor(std::size_t core_count); 68 explicit DynarmicExclusiveMonitor(std::size_t core_count);
73 ~DynarmicExclusiveMonitor(); 69 ~DynarmicExclusiveMonitor() override;
74 70
75 void SetExclusive(std::size_t core_index, VAddr addr) override; 71 void SetExclusive(std::size_t core_index, VAddr addr) override;
76 void ClearExclusive() override; 72 void ClearExclusive() override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c455c81fb..4e07fe8b5 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -10,7 +10,6 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_timing.h" 11#include "core/core_timing.h"
12#include "core/hle/kernel/svc.h" 12#include "core/hle/kernel/svc.h"
13#include "core/memory.h"
14 13
15namespace Core { 14namespace Core {
16 15
@@ -49,20 +48,6 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
49 } 48 }
50} 49}
51 50
52static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
53 u32 esr{};
54 CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
55
56 auto ec = esr >> 26;
57 auto iss = esr & 0xFFFFFF;
58
59 switch (ec) {
60 case 0x15: // SVC
61 Kernel::CallSVC(iss);
62 break;
63 }
64}
65
66static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, 51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
67 void* user_data) { 52 void* user_data) {
68 ARM_Interface::ThreadContext ctx{}; 53 ARM_Interface::ThreadContext ctx{};
@@ -72,7 +57,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
72 return {}; 57 return {};
73} 58}
74 59
75ARM_Unicorn::ARM_Unicorn() { 60ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
76 CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc)); 61 CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
77 62
78 auto fpv = 3 << 20; 63 auto fpv = 3 << 20;
@@ -177,7 +162,7 @@ void ARM_Unicorn::Run() {
177 if (GDBStub::IsServerEnabled()) { 162 if (GDBStub::IsServerEnabled()) {
178 ExecuteInstructions(std::max(4000000, 0)); 163 ExecuteInstructions(std::max(4000000, 0));
179 } else { 164 } else {
180 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); 165 ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0));
181 } 166 }
182} 167}
183 168
@@ -190,14 +175,15 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
190void ARM_Unicorn::ExecuteInstructions(int num_instructions) { 175void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
191 MICROPROFILE_SCOPE(ARM_Jit_Unicorn); 176 MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
192 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 177 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
193 CoreTiming::AddTicks(num_instructions); 178 system.CoreTiming().AddTicks(num_instructions);
194 if (GDBStub::IsServerEnabled()) { 179 if (GDBStub::IsServerEnabled()) {
195 if (last_bkpt_hit) { 180 if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
196 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); 181 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
197 } 182 }
183
198 Kernel::Thread* thread = Kernel::GetCurrentThread(); 184 Kernel::Thread* thread = Kernel::GetCurrentThread();
199 SaveContext(thread->GetContext()); 185 SaveContext(thread->GetContext());
200 if (last_bkpt_hit || GDBStub::GetCpuStepFlag()) { 186 if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
201 last_bkpt_hit = false; 187 last_bkpt_hit = false;
202 GDBStub::Break(); 188 GDBStub::Break();
203 GDBStub::SendTrap(thread, 5); 189 GDBStub::SendTrap(thread, 5);
@@ -272,4 +258,20 @@ void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
272 last_bkpt_hit = true; 258 last_bkpt_hit = true;
273} 259}
274 260
261void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) {
262 u32 esr{};
263 CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
264
265 const auto ec = esr >> 26;
266 const auto iss = esr & 0xFFFFFF;
267
268 auto* const arm_instance = static_cast<ARM_Unicorn*>(user_data);
269
270 switch (ec) {
271 case 0x15: // SVC
272 Kernel::CallSVC(arm_instance->system, iss);
273 break;
274 }
275}
276
275} // namespace Core 277} // namespace Core
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 75761950b..209fc16ad 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -11,10 +11,13 @@
11 11
12namespace Core { 12namespace Core {
13 13
14class System;
15
14class ARM_Unicorn final : public ARM_Interface { 16class ARM_Unicorn final : public ARM_Interface {
15public: 17public:
16 ARM_Unicorn(); 18 explicit ARM_Unicorn(System& system);
17 ~ARM_Unicorn(); 19 ~ARM_Unicorn() override;
20
18 void MapBackingMemory(VAddr address, std::size_t size, u8* memory, 21 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
19 Kernel::VMAPermission perms) override; 22 Kernel::VMAPermission perms) override;
20 void UnmapMemory(VAddr address, std::size_t size) override; 23 void UnmapMemory(VAddr address, std::size_t size) override;
@@ -42,9 +45,12 @@ public:
42 void RecordBreak(GDBStub::BreakpointAddress bkpt); 45 void RecordBreak(GDBStub::BreakpointAddress bkpt);
43 46
44private: 47private:
48 static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
49
45 uc_engine* uc{}; 50 uc_engine* uc{};
51 System& system;
46 GDBStub::BreakpointAddress last_bkpt{}; 52 GDBStub::BreakpointAddress last_bkpt{};
47 bool last_bkpt_hit; 53 bool last_bkpt_hit = false;
48}; 54};
49 55
50} // namespace Core 56} // namespace Core
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 572814e4b..bc9e887b6 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -17,6 +17,7 @@
17#include "core/core_timing.h" 17#include "core/core_timing.h"
18#include "core/cpu_core_manager.h" 18#include "core/cpu_core_manager.h"
19#include "core/file_sys/mode.h" 19#include "core/file_sys/mode.h"
20#include "core/file_sys/registered_cache.h"
20#include "core/file_sys/vfs_concat.h" 21#include "core/file_sys/vfs_concat.h"
21#include "core/file_sys/vfs_real.h" 22#include "core/file_sys/vfs_real.h"
22#include "core/gdbstub/gdbstub.h" 23#include "core/gdbstub/gdbstub.h"
@@ -32,11 +33,13 @@
32#include "core/perf_stats.h" 33#include "core/perf_stats.h"
33#include "core/settings.h" 34#include "core/settings.h"
34#include "core/telemetry_session.h" 35#include "core/telemetry_session.h"
36#include "file_sys/cheat_engine.h"
35#include "frontend/applets/profile_select.h" 37#include "frontend/applets/profile_select.h"
36#include "frontend/applets/software_keyboard.h" 38#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 39#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 40#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 41#include "video_core/gpu_asynch.h"
42#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 43#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 44#include "video_core/video_core.h"
42 45
@@ -78,6 +81,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
78 return vfs->OpenFile(path, FileSys::Mode::Read); 81 return vfs->OpenFile(path, FileSys::Mode::Read);
79} 82}
80struct System::Impl { 83struct System::Impl {
84 explicit Impl(System& system) : kernel{system} {}
81 85
82 Cpu& CurrentCpuCore() { 86 Cpu& CurrentCpuCore() {
83 return cpu_core_manager.GetCurrentCore(); 87 return cpu_core_manager.GetCurrentCore();
@@ -94,7 +98,7 @@ struct System::Impl {
94 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) { 98 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
95 LOG_DEBUG(HW_Memory, "initialized OK"); 99 LOG_DEBUG(HW_Memory, "initialized OK");
96 100
97 CoreTiming::Init(); 101 core_timing.Initialize();
98 kernel.Initialize(); 102 kernel.Initialize();
99 103
100 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( 104 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -105,6 +109,8 @@ struct System::Impl {
105 // Create a default fs if one doesn't already exist. 109 // Create a default fs if one doesn't already exist.
106 if (virtual_filesystem == nullptr) 110 if (virtual_filesystem == nullptr)
107 virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>(); 111 virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>();
112 if (content_provider == nullptr)
113 content_provider = std::make_unique<FileSys::ContentProviderUnion>();
108 114
109 /// Create default implementations of applets if one is not provided. 115 /// Create default implementations of applets if one is not provided.
110 if (profile_selector == nullptr) 116 if (profile_selector == nullptr)
@@ -114,24 +120,30 @@ struct System::Impl {
114 if (web_browser == nullptr) 120 if (web_browser == nullptr)
115 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); 121 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
116 122
117 auto main_process = Kernel::Process::Create(kernel, "main"); 123 auto main_process = Kernel::Process::Create(system, "main");
118 kernel.MakeCurrentProcess(main_process.get()); 124 kernel.MakeCurrentProcess(main_process.get());
119 125
120 telemetry_session = std::make_unique<Core::TelemetrySession>(); 126 telemetry_session = std::make_unique<Core::TelemetrySession>();
121 service_manager = std::make_shared<Service::SM::ServiceManager>(); 127 service_manager = std::make_shared<Service::SM::ServiceManager>();
122 128
123 Service::Init(service_manager, *virtual_filesystem); 129 Service::Init(service_manager, system, *virtual_filesystem);
124 GDBStub::Init(); 130 GDBStub::Init();
125 131
126 renderer = VideoCore::CreateRenderer(emu_window); 132 renderer = VideoCore::CreateRenderer(emu_window, system);
127 if (!renderer->Init()) { 133 if (!renderer->Init()) {
128 return ResultStatus::ErrorVideoCore; 134 return ResultStatus::ErrorVideoCore;
129 } 135 }
130 136
131 gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer()); 137 is_powered_on = true;
138
139 if (Settings::values.use_asynchronous_gpu_emulation) {
140 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
141 } else {
142 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
143 }
132 144
133 cpu_core_manager.Initialize(system); 145 cpu_core_manager.Initialize(system);
134 is_powered_on = true; 146
135 LOG_DEBUG(Core, "Initialized OK"); 147 LOG_DEBUG(Core, "Initialized OK");
136 148
137 // Reset counters and set time origin to current frame 149 // Reset counters and set time origin to current frame
@@ -175,19 +187,20 @@ struct System::Impl {
175 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + 187 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
176 static_cast<u32>(load_result)); 188 static_cast<u32>(load_result));
177 } 189 }
190
178 status = ResultStatus::Success; 191 status = ResultStatus::Success;
179 return status; 192 return status;
180 } 193 }
181 194
182 void Shutdown() { 195 void Shutdown() {
183 // Log last frame performance stats 196 // Log last frame performance stats
184 auto perf_results = GetAndResetPerfStats(); 197 const auto perf_results = GetAndResetPerfStats();
185 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 198 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
186 perf_results.emulation_speed * 100.0); 199 perf_results.emulation_speed * 100.0);
187 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 200 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
188 perf_results.game_fps); 201 perf_results.game_fps);
189 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 202 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
190 perf_results.frametime * 1000.0); 203 perf_results.frametime * 1000.0);
191 204
192 is_powered_on = false; 205 is_powered_on = false;
193 206
@@ -196,6 +209,7 @@ struct System::Impl {
196 GDBStub::Shutdown(); 209 GDBStub::Shutdown();
197 Service::Shutdown(); 210 Service::Shutdown();
198 service_manager.reset(); 211 service_manager.reset();
212 cheat_engine.reset();
199 telemetry_session.reset(); 213 telemetry_session.reset();
200 gpu_core.reset(); 214 gpu_core.reset();
201 215
@@ -204,7 +218,7 @@ struct System::Impl {
204 218
205 // Shutdown kernel and core timing 219 // Shutdown kernel and core timing
206 kernel.Shutdown(); 220 kernel.Shutdown();
207 CoreTiming::Shutdown(); 221 core_timing.Shutdown();
208 222
209 // Close app loader 223 // Close app loader
210 app_loader.reset(); 224 app_loader.reset();
@@ -231,12 +245,15 @@ struct System::Impl {
231 } 245 }
232 246
233 PerfStatsResults GetAndResetPerfStats() { 247 PerfStatsResults GetAndResetPerfStats() {
234 return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs()); 248 return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
235 } 249 }
236 250
251 Timing::CoreTiming core_timing;
237 Kernel::KernelCore kernel; 252 Kernel::KernelCore kernel;
238 /// RealVfsFilesystem instance 253 /// RealVfsFilesystem instance
239 FileSys::VirtualFilesystem virtual_filesystem; 254 FileSys::VirtualFilesystem virtual_filesystem;
255 /// ContentProviderUnion instance
256 std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
240 /// AppLoader used to load the current executing application 257 /// AppLoader used to load the current executing application
241 std::unique_ptr<Loader::AppLoader> app_loader; 258 std::unique_ptr<Loader::AppLoader> app_loader;
242 std::unique_ptr<VideoCore::RendererBase> renderer; 259 std::unique_ptr<VideoCore::RendererBase> renderer;
@@ -245,6 +262,8 @@ struct System::Impl {
245 CpuCoreManager cpu_core_manager; 262 CpuCoreManager cpu_core_manager;
246 bool is_powered_on = false; 263 bool is_powered_on = false;
247 264
265 std::unique_ptr<FileSys::CheatEngine> cheat_engine;
266
248 /// Frontend applets 267 /// Frontend applets
249 std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector; 268 std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector;
250 std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard; 269 std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
@@ -263,7 +282,7 @@ struct System::Impl {
263 Core::FrameLimiter frame_limiter; 282 Core::FrameLimiter frame_limiter;
264}; 283};
265 284
266System::System() : impl{std::make_unique<Impl>()} {} 285System::System() : impl{std::make_unique<Impl>(*this)} {}
267System::~System() = default; 286System::~System() = default;
268 287
269Cpu& System::CurrentCpuCore() { 288Cpu& System::CurrentCpuCore() {
@@ -395,6 +414,14 @@ const Kernel::KernelCore& System::Kernel() const {
395 return impl->kernel; 414 return impl->kernel;
396} 415}
397 416
417Timing::CoreTiming& System::CoreTiming() {
418 return impl->core_timing;
419}
420
421const Timing::CoreTiming& System::CoreTiming() const {
422 return impl->core_timing;
423}
424
398Core::PerfStats& System::GetPerfStats() { 425Core::PerfStats& System::GetPerfStats() {
399 return impl->perf_stats; 426 return impl->perf_stats;
400} 427}
@@ -435,6 +462,13 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
435 return impl->debug_context.get(); 462 return impl->debug_context.get();
436} 463}
437 464
465void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
466 const std::string& build_id, VAddr code_region_start,
467 VAddr code_region_end) {
468 impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
469 code_region_start, code_region_end);
470}
471
438void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) { 472void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
439 impl->virtual_filesystem = std::move(vfs); 473 impl->virtual_filesystem = std::move(vfs);
440} 474}
@@ -459,6 +493,27 @@ const Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
459 return *impl->software_keyboard; 493 return *impl->software_keyboard;
460} 494}
461 495
496void System::SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider) {
497 impl->content_provider = std::move(provider);
498}
499
500FileSys::ContentProvider& System::GetContentProvider() {
501 return *impl->content_provider;
502}
503
504const FileSys::ContentProvider& System::GetContentProvider() const {
505 return *impl->content_provider;
506}
507
508void System::RegisterContentProvider(FileSys::ContentProviderUnionSlot slot,
509 FileSys::ContentProvider* provider) {
510 impl->content_provider->SetSlot(slot, provider);
511}
512
513void System::ClearContentProvider(FileSys::ContentProviderUnionSlot slot) {
514 impl->content_provider->ClearSlot(slot);
515}
516
462void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) { 517void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) {
463 impl->web_browser = std::move(applet); 518 impl->web_browser = std::move(applet);
464} 519}
diff --git a/src/core/core.h b/src/core/core.h
index 511a5ad3a..82b2e087e 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -20,6 +20,10 @@ class WebBrowserApplet;
20} // namespace Core::Frontend 20} // namespace Core::Frontend
21 21
22namespace FileSys { 22namespace FileSys {
23class CheatList;
24class ContentProvider;
25class ContentProviderUnion;
26enum class ContentProviderUnionSlot;
23class VfsFilesystem; 27class VfsFilesystem;
24} // namespace FileSys 28} // namespace FileSys
25 29
@@ -47,6 +51,10 @@ namespace VideoCore {
47class RendererBase; 51class RendererBase;
48} // namespace VideoCore 52} // namespace VideoCore
49 53
54namespace Core::Timing {
55class CoreTiming;
56}
57
50namespace Core { 58namespace Core {
51 59
52class ARM_Interface; 60class ARM_Interface;
@@ -205,6 +213,12 @@ public:
205 /// Provides a constant pointer to the current process. 213 /// Provides a constant pointer to the current process.
206 const Kernel::Process* CurrentProcess() const; 214 const Kernel::Process* CurrentProcess() const;
207 215
216 /// Provides a reference to the core timing instance.
217 Timing::CoreTiming& CoreTiming();
218
219 /// Provides a constant reference to the core timing instance.
220 const Timing::CoreTiming& CoreTiming() const;
221
208 /// Provides a reference to the kernel instance. 222 /// Provides a reference to the kernel instance.
209 Kernel::KernelCore& Kernel(); 223 Kernel::KernelCore& Kernel();
210 224
@@ -243,6 +257,9 @@ public:
243 257
244 std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const; 258 std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
245 259
260 void RegisterCheatList(const std::vector<FileSys::CheatList>& list, const std::string& build_id,
261 VAddr code_region_start, VAddr code_region_end);
262
246 void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet); 263 void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
247 264
248 const Frontend::ProfileSelectApplet& GetProfileSelector() const; 265 const Frontend::ProfileSelectApplet& GetProfileSelector() const;
@@ -256,6 +273,17 @@ public:
256 Frontend::WebBrowserApplet& GetWebBrowser(); 273 Frontend::WebBrowserApplet& GetWebBrowser();
257 const Frontend::WebBrowserApplet& GetWebBrowser() const; 274 const Frontend::WebBrowserApplet& GetWebBrowser() const;
258 275
276 void SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider);
277
278 FileSys::ContentProvider& GetContentProvider();
279
280 const FileSys::ContentProvider& GetContentProvider() const;
281
282 void RegisterContentProvider(FileSys::ContentProviderUnionSlot slot,
283 FileSys::ContentProvider* provider);
284
285 void ClearContentProvider(FileSys::ContentProviderUnionSlot slot);
286
259private: 287private:
260 System(); 288 System();
261 289
@@ -283,10 +311,6 @@ inline ARM_Interface& CurrentArmInterface() {
283 return System::GetInstance().CurrentArmInterface(); 311 return System::GetInstance().CurrentArmInterface();
284} 312}
285 313
286inline TelemetrySession& Telemetry() {
287 return System::GetInstance().TelemetrySession();
288}
289
290inline Kernel::Process* CurrentProcess() { 314inline Kernel::Process* CurrentProcess() {
291 return System::GetInstance().CurrentProcess(); 315 return System::GetInstance().CurrentProcess();
292} 316}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index fffda8a99..ba63c3e61 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -21,7 +22,7 @@
21namespace Core { 22namespace Core {
22 23
23void CpuBarrier::NotifyEnd() { 24void CpuBarrier::NotifyEnd() {
24 std::unique_lock<std::mutex> lock(mutex); 25 std::unique_lock lock{mutex};
25 end = true; 26 end = true;
26 condition.notify_all(); 27 condition.notify_all();
27} 28}
@@ -33,7 +34,7 @@ bool CpuBarrier::Rendezvous() {
33 } 34 }
34 35
35 if (!end) { 36 if (!end) {
36 std::unique_lock<std::mutex> lock(mutex); 37 std::unique_lock lock{mutex};
37 38
38 --cores_waiting; 39 --cores_waiting;
39 if (!cores_waiting) { 40 if (!cores_waiting) {
@@ -49,20 +50,21 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index) 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 : cpu_barrier{cpu_barrier}, core_index{core_index} { 54 std::size_t core_index)
55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
54 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
55#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
56 arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
57#else 59#else
58 arm_interface = std::make_unique<ARM_Unicorn>(); 60 arm_interface = std::make_unique<ARM_Unicorn>(system);
59 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 61 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
60#endif 62#endif
61 } else { 63 } else {
62 arm_interface = std::make_unique<ARM_Unicorn>(); 64 arm_interface = std::make_unique<ARM_Unicorn>(system);
63 } 65 }
64 66
65 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
66} 68}
67 69
68Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
@@ -93,14 +95,14 @@ void Cpu::RunLoop(bool tight_loop) {
93 95
94 if (IsMainCore()) { 96 if (IsMainCore()) {
95 // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. 97 // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
96 CoreTiming::Idle(); 98 core_timing.Idle();
97 CoreTiming::Advance(); 99 core_timing.Advance();
98 } 100 }
99 101
100 PrepareReschedule(); 102 PrepareReschedule();
101 } else { 103 } else {
102 if (IsMainCore()) { 104 if (IsMainCore()) {
103 CoreTiming::Advance(); 105 core_timing.Advance();
104 } 106 }
105 107
106 if (tight_loop) { 108 if (tight_loop) {
@@ -129,7 +131,7 @@ void Cpu::Reschedule() {
129 131
130 reschedule_pending = false; 132 reschedule_pending = false;
131 // Lock the global kernel mutex when we manipulate the HLE state 133 // Lock the global kernel mutex when we manipulate the HLE state
132 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 134 std::lock_guard lock{HLE::g_hle_lock};
133 scheduler->Reschedule(); 135 scheduler->Reschedule();
134} 136}
135 137
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 1d2bdc6cd..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -16,6 +16,14 @@ class Scheduler;
16} 16}
17 17
18namespace Core { 18namespace Core {
19class System;
20}
21
22namespace Core::Timing {
23class CoreTiming;
24}
25
26namespace Core {
19 27
20class ARM_Interface; 28class ARM_Interface;
21class ExclusiveMonitor; 29class ExclusiveMonitor;
@@ -41,7 +49,8 @@ private:
41 49
42class Cpu { 50class Cpu {
43public: 51public:
44 Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index); 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 std::size_t core_index);
45 ~Cpu(); 54 ~Cpu();
46 55
47 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
@@ -82,6 +91,7 @@ private:
82 std::unique_ptr<ARM_Interface> arm_interface; 91 std::unique_ptr<ARM_Interface> arm_interface;
83 CpuBarrier& cpu_barrier; 92 CpuBarrier& cpu_barrier;
84 std::unique_ptr<Kernel::Scheduler> scheduler; 93 std::unique_ptr<Kernel::Scheduler> scheduler;
94 Timing::CoreTiming& core_timing;
85 95
86 std::atomic<bool> reschedule_pending = false; 96 std::atomic<bool> reschedule_pending = false;
87 std::size_t core_index; 97 std::size_t core_index;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 7953c8720..41adb2302 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,149 +8,98 @@
8#include <mutex> 8#include <mutex>
9#include <string> 9#include <string>
10#include <tuple> 10#include <tuple>
11#include <unordered_map> 11
12#include <vector>
13#include "common/assert.h" 12#include "common/assert.h"
14#include "common/thread.h" 13#include "common/thread.h"
15#include "common/threadsafe_queue.h"
16#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
17 15
18namespace CoreTiming { 16namespace Core::Timing {
19
20static s64 global_timer;
21static int slice_length;
22static int downcount;
23 17
24struct EventType { 18constexpr int MAX_SLICE_LENGTH = 20000;
25 TimedCallback callback;
26 const std::string* name;
27};
28 19
29struct Event { 20struct CoreTiming::Event {
30 s64 time; 21 s64 time;
31 u64 fifo_order; 22 u64 fifo_order;
32 u64 userdata; 23 u64 userdata;
33 const EventType* type; 24 const EventType* type;
34};
35
36// Sort by time, unless the times are the same, in which case sort by the order added to the queue
37static bool operator>(const Event& left, const Event& right) {
38 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
39}
40
41static bool operator<(const Event& left, const Event& right) {
42 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
43}
44
45// unordered_map stores each element separately as a linked list node so pointers to elements
46// remain stable regardless of rehashes/resizing.
47static std::unordered_map<std::string, EventType> event_types;
48 25
49// The queue is a min-heap using std::make_heap/push_heap/pop_heap. 26 // Sort by time, unless the times are the same, in which case sort by
50// We don't use std::priority_queue because we need to be able to serialize, unserialize and 27 // the order added to the queue
51// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated 28 friend bool operator>(const Event& left, const Event& right) {
52// by the standard adaptor class. 29 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
53static std::vector<Event> event_queue; 30 }
54static u64 event_fifo_id;
55// the queue for storing the events from other threads threadsafe until they will be added
56// to the event_queue by the emu thread
57static Common::MPSCQueue<Event, false> ts_queue;
58
59// the queue for unscheduling the events from other threads threadsafe
60static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
61
62constexpr int MAX_SLICE_LENGTH = 20000;
63
64static s64 idled_cycles;
65
66// Are we in a function that has been called from Advance()
67// If events are sheduled from a function that gets called from Advance(),
68// don't change slice_length and downcount.
69static bool is_global_timer_sane;
70
71static EventType* ev_lost = nullptr;
72
73static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
74
75EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
76 // check for existing type with same name.
77 // we want event type names to remain unique so that we can use them for serialization.
78 ASSERT_MSG(event_types.find(name) == event_types.end(),
79 "CoreTiming Event \"{}\" is already registered. Events should only be registered "
80 "during Init to avoid breaking save states.",
81 name.c_str());
82 31
83 auto info = event_types.emplace(name, EventType{callback, nullptr}); 32 friend bool operator<(const Event& left, const Event& right) {
84 EventType* event_type = &info.first->second; 33 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
85 event_type->name = &info.first->first; 34 }
86 return event_type; 35};
87}
88 36
89void UnregisterAllEvents() { 37CoreTiming::CoreTiming() = default;
90 ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending"); 38CoreTiming::~CoreTiming() = default;
91 event_types.clear();
92}
93 39
94void Init() { 40void CoreTiming::Initialize() {
95 downcount = MAX_SLICE_LENGTH; 41 downcount = MAX_SLICE_LENGTH;
96 slice_length = MAX_SLICE_LENGTH; 42 slice_length = MAX_SLICE_LENGTH;
97 global_timer = 0; 43 global_timer = 0;
98 idled_cycles = 0; 44 idled_cycles = 0;
99 45
100 // The time between CoreTiming being intialized and the first call to Advance() is considered 46 // The time between CoreTiming being initialized and the first call to Advance() is considered
101 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before 47 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
102 // executing the first cycle of each slice to prepare the slice length and downcount for 48 // executing the first cycle of each slice to prepare the slice length and downcount for
103 // that slice. 49 // that slice.
104 is_global_timer_sane = true; 50 is_global_timer_sane = true;
105 51
106 event_fifo_id = 0; 52 event_fifo_id = 0;
107 ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); 53
54 const auto empty_timed_callback = [](u64, s64) {};
55 ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
108} 56}
109 57
110void Shutdown() { 58void CoreTiming::Shutdown() {
111 MoveEvents(); 59 MoveEvents();
112 ClearPendingEvents(); 60 ClearPendingEvents();
113 UnregisterAllEvents(); 61 UnregisterAllEvents();
114} 62}
115 63
116// This should only be called from the CPU thread. If you are calling 64EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
117// it from any other thread, you are doing something evil 65 // check for existing type with same name.
118u64 GetTicks() { 66 // we want event type names to remain unique so that we can use them for serialization.
119 u64 ticks = static_cast<u64>(global_timer); 67 ASSERT_MSG(event_types.find(name) == event_types.end(),
120 if (!is_global_timer_sane) { 68 "CoreTiming Event \"{}\" is already registered. Events should only be registered "
121 ticks += slice_length - downcount; 69 "during Init to avoid breaking save states.",
122 } 70 name.c_str());
123 return ticks;
124}
125
126void AddTicks(u64 ticks) {
127 downcount -= static_cast<int>(ticks);
128}
129 71
130u64 GetIdleTicks() { 72 auto info = event_types.emplace(name, EventType{callback, nullptr});
131 return static_cast<u64>(idled_cycles); 73 EventType* event_type = &info.first->second;
74 event_type->name = &info.first->first;
75 return event_type;
132} 76}
133 77
134void ClearPendingEvents() { 78void CoreTiming::UnregisterAllEvents() {
135 event_queue.clear(); 79 ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
80 event_types.clear();
136} 81}
137 82
138void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 83void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
139 ASSERT(event_type != nullptr); 84 ASSERT(event_type != nullptr);
140 s64 timeout = GetTicks() + cycles_into_future; 85 const s64 timeout = GetTicks() + cycles_into_future;
86
141 // If this event needs to be scheduled before the next advance(), force one early 87 // If this event needs to be scheduled before the next advance(), force one early
142 if (!is_global_timer_sane) 88 if (!is_global_timer_sane) {
143 ForceExceptionCheck(cycles_into_future); 89 ForceExceptionCheck(cycles_into_future);
90 }
91
144 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); 92 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
145 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 93 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
146} 94}
147 95
148void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 96void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
97 u64 userdata) {
149 ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type}); 98 ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
150} 99}
151 100
152void UnscheduleEvent(const EventType* event_type, u64 userdata) { 101void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
153 auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { 102 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
154 return e.type == event_type && e.userdata == userdata; 103 return e.type == event_type && e.userdata == userdata;
155 }); 104 });
156 105
@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
161 } 110 }
162} 111}
163 112
164void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) { 113void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
165 unschedule_queue.Push(std::make_pair(event_type, userdata)); 114 unschedule_queue.Push(std::make_pair(event_type, userdata));
166} 115}
167 116
168void RemoveEvent(const EventType* event_type) { 117u64 CoreTiming::GetTicks() const {
169 auto itr = std::remove_if(event_queue.begin(), event_queue.end(), 118 u64 ticks = static_cast<u64>(global_timer);
170 [&](const Event& e) { return e.type == event_type; }); 119 if (!is_global_timer_sane) {
120 ticks += slice_length - downcount;
121 }
122 return ticks;
123}
124
125u64 CoreTiming::GetIdleTicks() const {
126 return static_cast<u64>(idled_cycles);
127}
128
129void CoreTiming::AddTicks(u64 ticks) {
130 downcount -= static_cast<int>(ticks);
131}
132
133void CoreTiming::ClearPendingEvents() {
134 event_queue.clear();
135}
136
137void CoreTiming::RemoveEvent(const EventType* event_type) {
138 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
139 [&](const Event& e) { return e.type == event_type; });
171 140
172 // Removing random items breaks the invariant so we have to re-establish it. 141 // Removing random items breaks the invariant so we have to re-establish it.
173 if (itr != event_queue.end()) { 142 if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
176 } 145 }
177} 146}
178 147
179void RemoveNormalAndThreadsafeEvent(const EventType* event_type) { 148void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
180 MoveEvents(); 149 MoveEvents();
181 RemoveEvent(event_type); 150 RemoveEvent(event_type);
182} 151}
183 152
184void ForceExceptionCheck(s64 cycles) { 153void CoreTiming::ForceExceptionCheck(s64 cycles) {
185 cycles = std::max<s64>(0, cycles); 154 cycles = std::max<s64>(0, cycles);
186 if (downcount > cycles) { 155 if (downcount <= cycles) {
187 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int 156 return;
188 // here. Account for cycles already executed by adjusting the g.slice_length
189 slice_length -= downcount - static_cast<int>(cycles);
190 downcount = static_cast<int>(cycles);
191 } 157 }
158
159 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
160 // here. Account for cycles already executed by adjusting the g.slice_length
161 slice_length -= downcount - static_cast<int>(cycles);
162 downcount = static_cast<int>(cycles);
192} 163}
193 164
194void MoveEvents() { 165void CoreTiming::MoveEvents() {
195 for (Event ev; ts_queue.Pop(ev);) { 166 for (Event ev; ts_queue.Pop(ev);) {
196 ev.fifo_order = event_fifo_id++; 167 ev.fifo_order = event_fifo_id++;
197 event_queue.emplace_back(std::move(ev)); 168 event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
199 } 170 }
200} 171}
201 172
202void Advance() { 173void CoreTiming::Advance() {
203 MoveEvents(); 174 MoveEvents();
204 for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) { 175 for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
205 UnscheduleEvent(ev.first, ev.second); 176 UnscheduleEvent(ev.first, ev.second);
206 } 177 }
207 178
208 int cycles_executed = slice_length - downcount; 179 const int cycles_executed = slice_length - downcount;
209 global_timer += cycles_executed; 180 global_timer += cycles_executed;
210 slice_length = MAX_SLICE_LENGTH; 181 slice_length = MAX_SLICE_LENGTH;
211 182
@@ -215,7 +186,7 @@ void Advance() {
215 Event evt = std::move(event_queue.front()); 186 Event evt = std::move(event_queue.front());
216 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 187 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
217 event_queue.pop_back(); 188 event_queue.pop_back();
218 evt.type->callback(evt.userdata, static_cast<int>(global_timer - evt.time)); 189 evt.type->callback(evt.userdata, global_timer - evt.time);
219 } 190 }
220 191
221 is_global_timer_sane = false; 192 is_global_timer_sane = false;
@@ -229,17 +200,17 @@ void Advance() {
229 downcount = slice_length; 200 downcount = slice_length;
230} 201}
231 202
232void Idle() { 203void CoreTiming::Idle() {
233 idled_cycles += downcount; 204 idled_cycles += downcount;
234 downcount = 0; 205 downcount = 0;
235} 206}
236 207
237std::chrono::microseconds GetGlobalTimeUs() { 208std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
238 return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; 209 return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
239} 210}
240 211
241int GetDowncount() { 212int CoreTiming::GetDowncount() const {
242 return downcount; 213 return downcount;
243} 214}
244 215
245} // namespace CoreTiming 216} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 9ed757bd7..9d2efde37 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
8#include <functional>
9#include <string>
10#include <unordered_map>
11#include <vector>
12#include "common/common_types.h"
13#include "common/threadsafe_queue.h"
14
15namespace Core::Timing {
16
17/// A callback that may be scheduled for a particular core timing event.
18using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
19
20/// Contains the characteristics of a particular event.
21struct EventType {
22 /// The event's callback function.
23 TimedCallback callback;
24 /// A pointer to the name of the event.
25 const std::string* name;
26};
27
7/** 28/**
8 * This is a system to schedule events into the emulated machine's future. Time is measured 29 * This is a system to schedule events into the emulated machine's future. Time is measured
9 * in main CPU clock cycles. 30 * in main CPU clock cycles.
@@ -16,80 +37,120 @@
16 * inside callback: 37 * inside callback:
17 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever") 38 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
18 */ 39 */
19 40class CoreTiming {
20#include <chrono> 41public:
21#include <functional> 42 CoreTiming();
22#include <string> 43 ~CoreTiming();
23#include "common/common_types.h" 44
24 45 CoreTiming(const CoreTiming&) = delete;
25namespace CoreTiming { 46 CoreTiming(CoreTiming&&) = delete;
26 47
27struct EventType; 48 CoreTiming& operator=(const CoreTiming&) = delete;
28 49 CoreTiming& operator=(CoreTiming&&) = delete;
29using TimedCallback = std::function<void(u64 userdata, int cycles_late)>; 50
30 51 /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
31/** 52 /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
32 * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is 53 void Initialize();
33 * required to end slice -1 and start slice 0 before the first cycle of code is executed. 54
34 */ 55 /// Tears down all timing related functionality.
35void Init(); 56 void Shutdown();
36void Shutdown(); 57
37 58 /// Registers a core timing event with the given name and callback.
38/** 59 ///
39 * This should only be called from the emu thread, if you are calling it any other thread, you are 60 /// @param name The name of the core timing event to register.
40 * doing something evil 61 /// @param callback The callback to execute for the event.
41 */ 62 ///
42u64 GetTicks(); 63 /// @returns An EventType instance representing the registered event.
43u64 GetIdleTicks(); 64 ///
44void AddTicks(u64 ticks); 65 /// @pre The name of the event being registered must be unique among all
45 66 /// registered events.
46/** 67 ///
47 * Returns the event_type identifier. if name is not unique, it will assert. 68 EventType* RegisterEvent(const std::string& name, TimedCallback callback);
48 */ 69
49EventType* RegisterEvent(const std::string& name, TimedCallback callback); 70 /// Unregisters all registered events thus far.
50void UnregisterAllEvents(); 71 void UnregisterAllEvents();
51 72
52/** 73 /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
53 * After the first Advance, the slice lengths and the downcount will be reduced whenever an event 74 /// event is scheduled earlier than the current values.
54 * is scheduled earlier than the current values. 75 ///
55 * Scheduling from a callback will not update the downcount until the Advance() completes. 76 /// Scheduling from a callback will not update the downcount until the Advance() completes.
56 */ 77 void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
57void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0); 78
58 79 /// This is to be called when outside of hle threads, such as the graphics thread, wants to
59/** 80 /// schedule things to be executed on the main thread.
60 * This is to be called when outside of hle threads, such as the graphics thread, wants to 81 ///
61 * schedule things to be executed on the main thread. 82 /// @note This doesn't change slice_length and thus events scheduled by this might be
62 * Not that this doesn't change slice_length and thus events scheduled by this might be called 83 /// called with a delay of up to MAX_SLICE_LENGTH
63 * with a delay of up to MAX_SLICE_LENGTH 84 void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
64 */ 85 u64 userdata = 0);
65void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata); 86
66 87 void UnscheduleEvent(const EventType* event_type, u64 userdata);
67void UnscheduleEvent(const EventType* event_type, u64 userdata); 88 void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
68void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata); 89
69 90 /// We only permit one event of each type in the queue at a time.
70/// We only permit one event of each type in the queue at a time. 91 void RemoveEvent(const EventType* event_type);
71void RemoveEvent(const EventType* event_type); 92 void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
72void RemoveNormalAndThreadsafeEvent(const EventType* event_type); 93
73 94 void ForceExceptionCheck(s64 cycles);
74/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends 95
75 * the previous timing slice and begins the next one, you must Advance from the previous 96 /// This should only be called from the emu thread, if you are calling it any other thread,
76 * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an 97 /// you are doing something evil
77 * Advance() is required to initialize the slice length before the first cycle of emulated 98 u64 GetTicks() const;
78 * instructions is executed. 99
79 */ 100 u64 GetIdleTicks() const;
80void Advance(); 101
81void MoveEvents(); 102 void AddTicks(u64 ticks);
82 103
83/// Pretend that the main CPU has executed enough cycles to reach the next event. 104 /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
84void Idle(); 105 /// the previous timing slice and begins the next one, you must Advance from the previous
85 106 /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
86/// Clear all pending events. This should ONLY be done on exit. 107 /// Advance() is required to initialize the slice length before the first cycle of emulated
87void ClearPendingEvents(); 108 /// instructions is executed.
88 109 void Advance();
89void ForceExceptionCheck(s64 cycles); 110
90 111 /// Pretend that the main CPU has executed enough cycles to reach the next event.
91std::chrono::microseconds GetGlobalTimeUs(); 112 void Idle();
92 113
93int GetDowncount(); 114 std::chrono::microseconds GetGlobalTimeUs() const;
94 115
95} // namespace CoreTiming 116 int GetDowncount() const;
117
118private:
119 struct Event;
120
121 /// Clear all pending events. This should ONLY be done on exit.
122 void ClearPendingEvents();
123 void MoveEvents();
124
125 s64 global_timer = 0;
126 s64 idled_cycles = 0;
127 int slice_length = 0;
128 int downcount = 0;
129
130 // Are we in a function that has been called from Advance()
131 // If events are scheduled from a function that gets called from Advance(),
132 // don't change slice_length and downcount.
133 bool is_global_timer_sane = false;
134
135 // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
136 // We don't use std::priority_queue because we need to be able to serialize, unserialize and
137 // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
138 // accomodated by the standard adaptor class.
139 std::vector<Event> event_queue;
140 u64 event_fifo_id = 0;
141
142 // Stores each element separately as a linked list node so pointers to elements
143 // remain stable regardless of rehashes/resizing.
144 std::unordered_map<std::string, EventType> event_types;
145
146 // The queue for storing the events from other threads threadsafe until they will be added
147 // to the event_queue by the emu thread
148 Common::MPSCQueue<Event> ts_queue;
149
150 // The queue for unscheduling the events from other threads threadsafe
151 Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
152
153 EventType* ev_lost = nullptr;
154};
155
156} // namespace Core::Timing
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 73dea4edb..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,8 +7,9 @@
7#include <cinttypes> 7#include <cinttypes>
8#include <limits> 8#include <limits>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/uint128.h"
10 11
11namespace CoreTiming { 12namespace Core::Timing {
12 13
13constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE; 14constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
14 15
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
60 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; 61 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
61} 62}
62 63
63} // namespace CoreTiming 64u64 CpuCyclesToClockCycles(u64 ticks) {
65 const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
66 return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
67}
68
69} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 5c3718782..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,11 +6,12 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9namespace CoreTiming { 9namespace Core::Timing {
10 10
11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz 11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
12// The exact value used is of course unverified. 12// The exact value used is of course unverified.
13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked 13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
14constexpr u64 CNTFREQ = 19200000; // Value from fusee.
14 15
15inline s64 msToCycles(int ms) { 16inline s64 msToCycles(int ms) {
16 // since ms is int there is no way to overflow 17 // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
61 return cycles * 1000 / BASE_CLOCK_RATE; 62 return cycles * 1000 / BASE_CLOCK_RATE;
62} 63}
63 64
64} // namespace CoreTiming 65u64 CpuCyclesToClockCycles(u64 ticks);
66
67} // namespace Core::Timing
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 769a6fefa..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index); 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 } 31 }
32 32
33 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index ca12fb4ab..dc006e2bb 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -22,6 +22,7 @@
22#include "common/file_util.h" 22#include "common/file_util.h"
23#include "common/hex_util.h" 23#include "common/hex_util.h"
24#include "common/logging/log.h" 24#include "common/logging/log.h"
25#include "core/core.h"
25#include "core/crypto/aes_util.h" 26#include "core/crypto/aes_util.h"
26#include "core/crypto/key_manager.h" 27#include "core/crypto/key_manager.h"
27#include "core/crypto/partition_data_manager.h" 28#include "core/crypto/partition_data_manager.h"
@@ -398,7 +399,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
398} 399}
399 400
400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) { 401void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
401 std::ifstream file(filename); 402 std::ifstream file;
403 OpenFStream(file, filename, std::ios_base::in);
402 if (!file.is_open()) 404 if (!file.is_open())
403 return; 405 return;
404 406
@@ -793,7 +795,7 @@ void KeyManager::DeriveBase() {
793 795
794void KeyManager::DeriveETicket(PartitionDataManager& data) { 796void KeyManager::DeriveETicket(PartitionDataManager& data) {
795 // ETicket keys 797 // ETicket keys
796 const auto es = Service::FileSystem::GetUnionContents().GetEntry( 798 const auto es = Core::System::GetInstance().GetContentProvider().GetEntry(
797 0x0100000000000033, FileSys::ContentRecordType::Program); 799 0x0100000000000033, FileSys::ContentRecordType::Program);
798 800
799 if (es == nullptr) 801 if (es == nullptr)
diff --git a/src/core/file_sys/cheat_engine.cpp b/src/core/file_sys/cheat_engine.cpp
new file mode 100644
index 000000000..b06c2f20a
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.cpp
@@ -0,0 +1,492 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <locale>
6#include "common/hex_util.h"
7#include "common/microprofile.h"
8#include "common/swap.h"
9#include "core/core.h"
10#include "core/core_timing.h"
11#include "core/core_timing_util.h"
12#include "core/file_sys/cheat_engine.h"
13#include "core/hle/kernel/process.h"
14#include "core/hle/service/hid/controllers/npad.h"
15#include "core/hle/service/hid/hid.h"
16#include "core/hle/service/sm/sm.h"
17
18namespace FileSys {
19
20constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
21constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
22
23u64 Cheat::Address() const {
24 u64 out;
25 std::memcpy(&out, raw.data(), sizeof(u64));
26 return Common::swap64(out) & 0xFFFFFFFFFF;
27}
28
29u64 Cheat::ValueWidth(u64 offset) const {
30 return Value(offset, width);
31}
32
33u64 Cheat::Value(u64 offset, u64 width) const {
34 u64 out;
35 std::memcpy(&out, raw.data() + offset, sizeof(u64));
36 out = Common::swap64(out);
37 if (width == 8)
38 return out;
39 return out & ((1ull << (width * CHAR_BIT)) - 1);
40}
41
42u32 Cheat::KeypadValue() const {
43 u32 out;
44 std::memcpy(&out, raw.data(), sizeof(u32));
45 return Common::swap32(out) & 0x0FFFFFFF;
46}
47
48void CheatList::SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end,
49 VAddr heap_end, MemoryWriter writer, MemoryReader reader) {
50 this->main_region_begin = main_begin;
51 this->main_region_end = main_end;
52 this->heap_region_begin = heap_begin;
53 this->heap_region_end = heap_end;
54 this->writer = writer;
55 this->reader = reader;
56}
57
58MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
59
60void CheatList::Execute() {
61 MICROPROFILE_SCOPE(Cheat_Engine);
62
63 std::fill(scratch.begin(), scratch.end(), 0);
64 in_standard = false;
65 for (std::size_t i = 0; i < master_list.size(); ++i) {
66 LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, master_list[i].first);
67 current_block = i;
68 ExecuteBlock(master_list[i].second);
69 }
70
71 in_standard = true;
72 for (std::size_t i = 0; i < standard_list.size(); ++i) {
73 LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, standard_list[i].first);
74 current_block = i;
75 ExecuteBlock(standard_list[i].second);
76 }
77}
78
79CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
80 : master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}
81
82bool CheatList::EvaluateConditional(const Cheat& cheat) const {
83 using ComparisonFunction = bool (*)(u64, u64);
84 constexpr std::array<ComparisonFunction, 6> comparison_functions{
85 [](u64 a, u64 b) { return a > b; }, [](u64 a, u64 b) { return a >= b; },
86 [](u64 a, u64 b) { return a < b; }, [](u64 a, u64 b) { return a <= b; },
87 [](u64 a, u64 b) { return a == b; }, [](u64 a, u64 b) { return a != b; },
88 };
89
90 if (cheat.type == CodeType::ConditionalInput) {
91 const auto applet_resource =
92 system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
93 if (applet_resource == nullptr) {
94 LOG_WARNING(
95 Common_Filesystem,
96 "Attempted to evaluate input conditional, but applet resource is not initialized!");
97 return false;
98 }
99
100 const auto press_state =
101 applet_resource
102 ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)
103 .GetAndResetPressState();
104 return ((press_state & cheat.KeypadValue()) & KEYPAD_BITMASK) != 0;
105 }
106
107 ASSERT(cheat.type == CodeType::Conditional);
108
109 const auto offset =
110 cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
111 ASSERT(static_cast<u8>(cheat.comparison_op.Value()) < 6);
112 auto* function = comparison_functions[static_cast<u8>(cheat.comparison_op.Value())];
113 const auto addr = cheat.Address() + offset;
114
115 return function(reader(cheat.width, SanitizeAddress(addr)), cheat.ValueWidth(8));
116}
117
118void CheatList::ProcessBlockPairs(const Block& block) {
119 block_pairs.clear();
120
121 u64 scope = 0;
122 std::map<u64, u64> pairs;
123
124 for (std::size_t i = 0; i < block.size(); ++i) {
125 const auto& cheat = block[i];
126
127 switch (cheat.type) {
128 case CodeType::Conditional:
129 case CodeType::ConditionalInput:
130 pairs.insert_or_assign(scope, i);
131 ++scope;
132 break;
133 case CodeType::EndConditional: {
134 --scope;
135 const auto idx = pairs.at(scope);
136 block_pairs.insert_or_assign(idx, i);
137 break;
138 }
139 case CodeType::Loop: {
140 if (cheat.end_of_loop) {
141 --scope;
142 const auto idx = pairs.at(scope);
143 block_pairs.insert_or_assign(idx, i);
144 } else {
145 pairs.insert_or_assign(scope, i);
146 ++scope;
147 }
148 break;
149 }
150 }
151 }
152}
153
154void CheatList::WriteImmediate(const Cheat& cheat) {
155 const auto offset =
156 cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
157 const auto& register_3 = scratch.at(cheat.register_3);
158
159 const auto addr = cheat.Address() + offset + register_3;
160 LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}", addr,
161 cheat.Value(8, cheat.width));
162 writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(8));
163}
164
165void CheatList::BeginConditional(const Cheat& cheat) {
166 if (EvaluateConditional(cheat)) {
167 return;
168 }
169
170 const auto iter = block_pairs.find(current_index);
171 ASSERT(iter != block_pairs.end());
172 current_index = iter->second - 1;
173}
174
175void CheatList::EndConditional(const Cheat& cheat) {
176 LOG_DEBUG(Common_Filesystem, "Ending conditional block.");
177}
178
179void CheatList::Loop(const Cheat& cheat) {
180 if (cheat.end_of_loop.Value())
181 ASSERT(!cheat.end_of_loop.Value());
182
183 auto& register_3 = scratch.at(cheat.register_3);
184 const auto iter = block_pairs.find(current_index);
185 ASSERT(iter != block_pairs.end());
186 ASSERT(iter->first < iter->second);
187
188 const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
189 for (s32 i = initial_value; i >= 0; --i) {
190 register_3 = static_cast<u64>(i);
191 for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
192 current_index = c;
193 ExecuteSingleCheat(
194 (in_standard ? standard_list : master_list)[current_block].second[c]);
195 }
196 }
197
198 current_index = iter->second;
199}
200
201void CheatList::LoadImmediate(const Cheat& cheat) {
202 auto& register_3 = scratch.at(cheat.register_3);
203
204 LOG_DEBUG(Common_Filesystem, "setting register={:01X} equal to value={:016X}", cheat.register_3,
205 cheat.Value(4, 8));
206 register_3 = cheat.Value(4, 8);
207}
208
209void CheatList::LoadIndexed(const Cheat& cheat) {
210 const auto offset =
211 cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
212 auto& register_3 = scratch.at(cheat.register_3);
213
214 const auto addr = (cheat.load_from_register.Value() ? register_3 : offset) + cheat.Address();
215 LOG_DEBUG(Common_Filesystem, "writing indexed value to register={:01X}, addr={:016X}",
216 cheat.register_3, addr);
217 register_3 = reader(cheat.width, SanitizeAddress(addr));
218}
219
220void CheatList::StoreIndexed(const Cheat& cheat) {
221 const auto& register_3 = scratch.at(cheat.register_3);
222
223 const auto addr =
224 register_3 + (cheat.add_additional_register.Value() ? scratch.at(cheat.register_6) : 0);
225 LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}",
226 cheat.Value(4, cheat.width), addr);
227 writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(4));
228}
229
230void CheatList::RegisterArithmetic(const Cheat& cheat) {
231 using ArithmeticFunction = u64 (*)(u64, u64);
232 constexpr std::array<ArithmeticFunction, 5> arithmetic_functions{
233 [](u64 a, u64 b) { return a + b; }, [](u64 a, u64 b) { return a - b; },
234 [](u64 a, u64 b) { return a * b; }, [](u64 a, u64 b) { return a << b; },
235 [](u64 a, u64 b) { return a >> b; },
236 };
237
238 using ArithmeticOverflowCheck = bool (*)(u64, u64);
239 constexpr std::array<ArithmeticOverflowCheck, 5> arithmetic_overflow_checks{
240 [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() - b); }, // a + b
241 [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() + b); }, // a - b
242 [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() / b); }, // a * b
243 [](u64 a, u64 b) { return b >= 64 || (a & ~((1ull << (64 - b)) - 1)) != 0; }, // a << b
244 [](u64 a, u64 b) { return b >= 64 || (a & ((1ull << b) - 1)) != 0; }, // a >> b
245 };
246
247 static_assert(sizeof(arithmetic_functions) == sizeof(arithmetic_overflow_checks),
248 "Missing or have extra arithmetic overflow checks compared to functions!");
249
250 auto& register_3 = scratch.at(cheat.register_3);
251
252 ASSERT(static_cast<u8>(cheat.arithmetic_op.Value()) < 5);
253 auto* function = arithmetic_functions[static_cast<u8>(cheat.arithmetic_op.Value())];
254 auto* overflow_function =
255 arithmetic_overflow_checks[static_cast<u8>(cheat.arithmetic_op.Value())];
256 LOG_DEBUG(Common_Filesystem, "performing arithmetic with register={:01X}, value={:016X}",
257 cheat.register_3, cheat.ValueWidth(4));
258
259 if (overflow_function(register_3, cheat.ValueWidth(4))) {
260 LOG_WARNING(Common_Filesystem,
261 "overflow will occur when performing arithmetic operation={:02X} with operands "
262 "a={:016X}, b={:016X}!",
263 static_cast<u8>(cheat.arithmetic_op.Value()), register_3, cheat.ValueWidth(4));
264 }
265
266 register_3 = function(register_3, cheat.ValueWidth(4));
267}
268
269void CheatList::BeginConditionalInput(const Cheat& cheat) {
270 if (EvaluateConditional(cheat))
271 return;
272
273 const auto iter = block_pairs.find(current_index);
274 ASSERT(iter != block_pairs.end());
275 current_index = iter->second - 1;
276}
277
278VAddr CheatList::SanitizeAddress(VAddr in) const {
279 if ((in < main_region_begin || in >= main_region_end) &&
280 (in < heap_region_begin || in >= heap_region_end)) {
281 LOG_ERROR(Common_Filesystem,
282 "Cheat attempting to access memory at invalid address={:016X}, if this persists, "
283 "the cheat may be incorrect. However, this may be normal early in execution if "
284 "the game has not properly set up yet.",
285 in);
286 return 0; ///< Invalid addresses will hard crash
287 }
288
289 return in;
290}
291
292void CheatList::ExecuteSingleCheat(const Cheat& cheat) {
293 using CheatOperationFunction = void (CheatList::*)(const Cheat&);
294 constexpr std::array<CheatOperationFunction, 9> cheat_operation_functions{
295 &CheatList::WriteImmediate, &CheatList::BeginConditional,
296 &CheatList::EndConditional, &CheatList::Loop,
297 &CheatList::LoadImmediate, &CheatList::LoadIndexed,
298 &CheatList::StoreIndexed, &CheatList::RegisterArithmetic,
299 &CheatList::BeginConditionalInput,
300 };
301
302 const auto index = static_cast<u8>(cheat.type.Value());
303 ASSERT(index < sizeof(cheat_operation_functions));
304 const auto op = cheat_operation_functions[index];
305 (this->*op)(cheat);
306}
307
308void CheatList::ExecuteBlock(const Block& block) {
309 encountered_loops.clear();
310
311 ProcessBlockPairs(block);
312 for (std::size_t i = 0; i < block.size(); ++i) {
313 current_index = i;
314 ExecuteSingleCheat(block[i]);
315 i = current_index;
316 }
317}
318
319CheatParser::~CheatParser() = default;
320
321CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
322 CheatList::ProgramSegment standard) const {
323 return {system, std::move(master), std::move(standard)};
324}
325
326TextCheatParser::~TextCheatParser() = default;
327
328CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
329 std::stringstream ss;
330 ss.write(reinterpret_cast<const char*>(data.data()), data.size());
331
332 std::vector<std::string> lines;
333 std::string stream_line;
334 while (std::getline(ss, stream_line)) {
335 // Remove a trailing \r
336 if (!stream_line.empty() && stream_line.back() == '\r')
337 stream_line.pop_back();
338 lines.push_back(std::move(stream_line));
339 }
340
341 CheatList::ProgramSegment master_list;
342 CheatList::ProgramSegment standard_list;
343
344 for (std::size_t i = 0; i < lines.size(); ++i) {
345 auto line = lines[i];
346
347 if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
348 const auto master = line[0] == '{';
349 const auto begin = master ? line.find('{') : line.find('[');
350 const auto end = master ? line.rfind('}') : line.rfind(']');
351
352 ASSERT(begin != std::string::npos && end != std::string::npos);
353
354 const std::string patch_name{line.begin() + begin + 1, line.begin() + end};
355 CheatList::Block block{};
356
357 while (i < lines.size() - 1) {
358 line = lines[++i];
359 if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
360 --i;
361 break;
362 }
363
364 if (line.size() < 8)
365 continue;
366
367 Cheat out{};
368 out.raw = ParseSingleLineCheat(line);
369 block.push_back(out);
370 }
371
372 (master ? master_list : standard_list).emplace_back(patch_name, block);
373 }
374 }
375
376 return MakeCheatList(system, master_list, standard_list);
377}
378
379std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
380 std::array<u8, 16> out{};
381
382 if (line.size() < 8)
383 return out;
384
385 const auto word1 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data(), 8});
386 std::memcpy(out.data(), word1.data(), sizeof(u32));
387
388 if (line.size() < 17 || line[8] != ' ')
389 return out;
390
391 const auto word2 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 9, 8});
392 std::memcpy(out.data() + sizeof(u32), word2.data(), sizeof(u32));
393
394 if (line.size() < 26 || line[17] != ' ') {
395 // Perform shifting in case value is truncated early.
396 const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
397 if (type == CodeType::Loop || type == CodeType::LoadImmediate ||
398 type == CodeType::StoreIndexed || type == CodeType::RegisterArithmetic) {
399 std::memcpy(out.data() + 8, out.data() + 4, sizeof(u32));
400 std::memset(out.data() + 4, 0, sizeof(u32));
401 }
402
403 return out;
404 }
405
406 const auto word3 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 18, 8});
407 std::memcpy(out.data() + 2 * sizeof(u32), word3.data(), sizeof(u32));
408
409 if (line.size() < 35 || line[26] != ' ') {
410 // Perform shifting in case value is truncated early.
411 const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
412 if (type == CodeType::WriteImmediate || type == CodeType::Conditional) {
413 std::memcpy(out.data() + 12, out.data() + 8, sizeof(u32));
414 std::memset(out.data() + 8, 0, sizeof(u32));
415 }
416
417 return out;
418 }
419
420 const auto word4 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 27, 8});
421 std::memcpy(out.data() + 3 * sizeof(u32), word4.data(), sizeof(u32));
422
423 return out;
424}
425
426namespace {
427u64 MemoryReadImpl(u32 width, VAddr addr) {
428 switch (width) {
429 case 1:
430 return Memory::Read8(addr);
431 case 2:
432 return Memory::Read16(addr);
433 case 4:
434 return Memory::Read32(addr);
435 case 8:
436 return Memory::Read64(addr);
437 default:
438 UNREACHABLE();
439 return 0;
440 }
441}
442
443void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
444 switch (width) {
445 case 1:
446 Memory::Write8(addr, static_cast<u8>(value));
447 break;
448 case 2:
449 Memory::Write16(addr, static_cast<u16>(value));
450 break;
451 case 4:
452 Memory::Write32(addr, static_cast<u32>(value));
453 break;
454 case 8:
455 Memory::Write64(addr, value);
456 break;
457 default:
458 UNREACHABLE();
459 }
460}
461} // Anonymous namespace
462
463CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
464 const std::string& build_id, VAddr code_region_start,
465 VAddr code_region_end)
466 : cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
467 event = core_timing.RegisterEvent(
468 "CheatEngine::FrameCallback::" + build_id,
469 [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
470 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
471
472 const auto& vm_manager = system.CurrentProcess()->VMManager();
473 for (auto& list : this->cheats) {
474 list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
475 code_region_end, vm_manager.GetHeapRegionEndAddress(),
476 &MemoryWriteImpl, &MemoryReadImpl);
477 }
478}
479
480CheatEngine::~CheatEngine() {
481 core_timing.UnscheduleEvent(event, 0);
482}
483
484void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
485 for (auto& list : cheats) {
486 list.Execute();
487 }
488
489 core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
490}
491
492} // namespace FileSys
diff --git a/src/core/file_sys/cheat_engine.h b/src/core/file_sys/cheat_engine.h
new file mode 100644
index 000000000..ac22a82cb
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.h
@@ -0,0 +1,234 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <set>
9#include <vector>
10#include "common/bit_field.h"
11#include "common/common_types.h"
12
13namespace Core {
14class System;
15}
16
17namespace Core::Timing {
18class CoreTiming;
19struct EventType;
20} // namespace Core::Timing
21
22namespace FileSys {
23
24enum class CodeType : u32 {
25 // 0TMR00AA AAAAAAAA YYYYYYYY YYYYYYYY
26 // Writes a T sized value Y to the address A added to the value of register R in memory domain M
27 WriteImmediate = 0,
28
29 // 1TMC00AA AAAAAAAA YYYYYYYY YYYYYYYY
30 // Compares the T sized value Y to the value at address A in memory domain M using the
31 // conditional function C. If success, continues execution. If failure, jumps to the matching
32 // EndConditional statement.
33 Conditional = 1,
34
35 // 20000000
36 // Terminates a Conditional or ConditionalInput block.
37 EndConditional = 2,
38
39 // 300R0000 VVVVVVVV
40 // Starts looping V times, storing the current count in register R.
41 // Loop block is terminated with a matching 310R0000.
42 Loop = 3,
43
44 // 400R0000 VVVVVVVV VVVVVVVV
45 // Sets the value of register R to the value V.
46 LoadImmediate = 4,
47
48 // 5TMRI0AA AAAAAAAA
49 // Sets the value of register R to the value of width T at address A in memory domain M, with
50 // the current value of R added to the address if I == 1.
51 LoadIndexed = 5,
52
53 // 6T0RIFG0 VVVVVVVV VVVVVVVV
54 // Writes the value V of width T to the memory address stored in register R. Adds the value of
55 // register G to the final calculation if F is nonzero. Increments the value of register R by T
56 // after operation if I is nonzero.
57 StoreIndexed = 6,
58
59 // 7T0RA000 VVVVVVVV
60 // Performs the arithmetic operation A on the value in register R and the value V of width T,
61 // storing the result in register R.
62 RegisterArithmetic = 7,
63
64 // 8KKKKKKK
65 // Checks to see if any of the buttons defined by the bitmask K are pressed. If any are,
66 // execution continues. If none are, execution skips to the next EndConditional command.
67 ConditionalInput = 8,
68};
69
70enum class MemoryType : u32 {
71 // Addressed relative to start of main NSO
72 MainNSO = 0,
73
74 // Addressed relative to start of heap
75 Heap = 1,
76};
77
78enum class ArithmeticOp : u32 {
79 Add = 0,
80 Sub = 1,
81 Mult = 2,
82 LShift = 3,
83 RShift = 4,
84};
85
86enum class ComparisonOp : u32 {
87 GreaterThan = 1,
88 GreaterThanEqual = 2,
89 LessThan = 3,
90 LessThanEqual = 4,
91 Equal = 5,
92 Inequal = 6,
93};
94
95union Cheat {
96 std::array<u8, 16> raw;
97
98 BitField<4, 4, CodeType> type;
99 BitField<0, 4, u32> width; // Can be 1, 2, 4, or 8. Measured in bytes.
100 BitField<0, 4, u32> end_of_loop;
101 BitField<12, 4, MemoryType> memory_type;
102 BitField<8, 4, u32> register_3;
103 BitField<8, 4, ComparisonOp> comparison_op;
104 BitField<20, 4, u32> load_from_register;
105 BitField<20, 4, u32> increment_register;
106 BitField<20, 4, ArithmeticOp> arithmetic_op;
107 BitField<16, 4, u32> add_additional_register;
108 BitField<28, 4, u32> register_6;
109
110 u64 Address() const;
111 u64 ValueWidth(u64 offset) const;
112 u64 Value(u64 offset, u64 width) const;
113 u32 KeypadValue() const;
114};
115
116class CheatParser;
117
118// Represents a full collection of cheats for a game. The Execute function should be called every
119// interval that all cheats should be executed. Clients should not directly instantiate this class
120// (hence private constructor), they should instead receive an instance from CheatParser, which
121// guarantees the list is always in an acceptable state.
122class CheatList {
123public:
124 friend class CheatParser;
125
126 using Block = std::vector<Cheat>;
127 using ProgramSegment = std::vector<std::pair<std::string, Block>>;
128
129 // (width in bytes, address, value)
130 using MemoryWriter = void (*)(u32, VAddr, u64);
131 // (width in bytes, address) -> value
132 using MemoryReader = u64 (*)(u32, VAddr);
133
134 void SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end, VAddr heap_end,
135 MemoryWriter writer, MemoryReader reader);
136
137 void Execute();
138
139private:
140 CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);
141
142 void ProcessBlockPairs(const Block& block);
143 void ExecuteSingleCheat(const Cheat& cheat);
144
145 void ExecuteBlock(const Block& block);
146
147 bool EvaluateConditional(const Cheat& cheat) const;
148
149 // Individual cheat operations
150 void WriteImmediate(const Cheat& cheat);
151 void BeginConditional(const Cheat& cheat);
152 void EndConditional(const Cheat& cheat);
153 void Loop(const Cheat& cheat);
154 void LoadImmediate(const Cheat& cheat);
155 void LoadIndexed(const Cheat& cheat);
156 void StoreIndexed(const Cheat& cheat);
157 void RegisterArithmetic(const Cheat& cheat);
158 void BeginConditionalInput(const Cheat& cheat);
159
160 VAddr SanitizeAddress(VAddr in) const;
161
162 // Master Codes are defined as codes that cannot be disabled and are run prior to all
163 // others.
164 ProgramSegment master_list;
165 // All other codes
166 ProgramSegment standard_list;
167
168 bool in_standard = false;
169
170 // 16 (0x0-0xF) scratch registers that can be used by cheats
171 std::array<u64, 16> scratch{};
172
173 MemoryWriter writer = nullptr;
174 MemoryReader reader = nullptr;
175
176 u64 main_region_begin{};
177 u64 heap_region_begin{};
178 u64 main_region_end{};
179 u64 heap_region_end{};
180
181 u64 current_block{};
182 // The current index of the cheat within the current Block
183 u64 current_index{};
184
185 // The 'stack' of the program. When a conditional or loop statement is encountered, its index is
186 // pushed onto this queue. When a end block is encountered, the condition is checked.
187 std::map<u64, u64> block_pairs;
188
189 std::set<u64> encountered_loops;
190
191 const Core::System* system;
192};
193
194// Intermediary class that parses a text file or other disk format for storing cheats into a
195// CheatList object, that can be used for execution.
196class CheatParser {
197public:
198 virtual ~CheatParser();
199
200 virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;
201
202protected:
203 CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
204 CheatList::ProgramSegment standard) const;
205};
206
207// CheatParser implementation that parses text files
208class TextCheatParser final : public CheatParser {
209public:
210 ~TextCheatParser() override;
211
212 CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;
213
214private:
215 std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
216};
217
218// Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
219class CheatEngine final {
220public:
221 CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
222 VAddr code_region_start, VAddr code_region_end);
223 ~CheatEngine();
224
225private:
226 void FrameCallback(u64 userdata, s64 cycles_late);
227
228 std::vector<CheatList> cheats;
229
230 Core::Timing::EventType* event;
231 Core::Timing::CoreTiming& core_timing;
232};
233
234} // namespace FileSys
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 5d4d05c82..15b9e6624 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -24,13 +24,26 @@ namespace FileSys {
24 24
25union NCASectionHeader; 25union NCASectionHeader;
26 26
27/// Describes the type of content within an NCA archive.
27enum class NCAContentType : u8 { 28enum class NCAContentType : u8 {
29 /// Executable-related data
28 Program = 0, 30 Program = 0,
31
32 /// Metadata.
29 Meta = 1, 33 Meta = 1,
34
35 /// Access control data.
30 Control = 2, 36 Control = 2,
37
38 /// Information related to the game manual
39 /// e.g. Legal information, etc.
31 Manual = 3, 40 Manual = 3,
41
42 /// System data.
32 Data = 4, 43 Data = 4,
33 Data_Unknown5 = 5, ///< Seems to be used on some system archives 44
45 /// Data that can be accessed by applications.
46 PublicData = 5,
34}; 47};
35 48
36enum class NCASectionCryptoType : u8 { 49enum class NCASectionCryptoType : u8 {
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index 83c184750..60ea9ad12 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -67,7 +67,7 @@ std::string NACP::GetDeveloperName(Language language) const {
67} 67}
68 68
69u64 NACP::GetTitleId() const { 69u64 NACP::GetTitleId() const {
70 return raw.title_id; 70 return raw.save_data_owner_id;
71} 71}
72 72
73u64 NACP::GetDLCBaseTitleId() const { 73u64 NACP::GetDLCBaseTitleId() const {
@@ -80,11 +80,11 @@ std::string NACP::GetVersionString() const {
80} 80}
81 81
82u64 NACP::GetDefaultNormalSaveSize() const { 82u64 NACP::GetDefaultNormalSaveSize() const {
83 return raw.normal_save_data_size; 83 return raw.user_account_save_data_size;
84} 84}
85 85
86u64 NACP::GetDefaultJournalSaveSize() const { 86u64 NACP::GetDefaultJournalSaveSize() const {
87 return raw.journal_sava_data_size; 87 return raw.user_account_save_data_journal_size;
88} 88}
89 89
90std::vector<u8> NACP::GetRawBytes() const { 90std::vector<u8> NACP::GetRawBytes() const {
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 7b9cdc910..280710ddf 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -38,23 +38,35 @@ struct RawNACP {
38 u8 video_capture_mode; 38 u8 video_capture_mode;
39 bool data_loss_confirmation; 39 bool data_loss_confirmation;
40 INSERT_PADDING_BYTES(1); 40 INSERT_PADDING_BYTES(1);
41 u64_le title_id; 41 u64_le presence_group_id;
42 std::array<u8, 0x20> rating_age; 42 std::array<u8, 0x20> rating_age;
43 std::array<char, 0x10> version_string; 43 std::array<char, 0x10> version_string;
44 u64_le dlc_base_title_id; 44 u64_le dlc_base_title_id;
45 u64_le title_id_2; 45 u64_le save_data_owner_id;
46 u64_le normal_save_data_size; 46 u64_le user_account_save_data_size;
47 u64_le journal_sava_data_size; 47 u64_le user_account_save_data_journal_size;
48 INSERT_PADDING_BYTES(0x18); 48 u64_le device_save_data_size;
49 u64_le product_code; 49 u64_le device_save_data_journal_size;
50 u64_le bcat_delivery_cache_storage_size;
51 char application_error_code_category[8];
50 std::array<u64_le, 0x8> local_communication; 52 std::array<u64_le, 0x8> local_communication;
51 u8 logo_type; 53 u8 logo_type;
52 u8 logo_handling; 54 u8 logo_handling;
53 bool runtime_add_on_content_install; 55 bool runtime_add_on_content_install;
54 INSERT_PADDING_BYTES(5); 56 INSERT_PADDING_BYTES(5);
55 u64_le title_id_update; 57 u64_le seed_for_pseudo_device_id;
56 std::array<u8, 0x40> bcat_passphrase; 58 std::array<u8, 0x41> bcat_passphrase;
57 INSERT_PADDING_BYTES(0xEC0); 59 INSERT_PADDING_BYTES(7);
60 u64_le user_account_save_data_max_size;
61 u64_le user_account_save_data_max_journal_size;
62 u64_le device_save_data_max_size;
63 u64_le device_save_data_max_journal_size;
64 u64_le temporary_storage_size;
65 u64_le cache_storage_size;
66 u64_le cache_storage_journal_size;
67 u64_le cache_storage_data_and_journal_max_size;
68 u64_le cache_storage_max_index;
69 INSERT_PADDING_BYTES(0xE70);
58}; 70};
59static_assert(sizeof(RawNACP) == 0x4000, "RawNACP has incorrect size."); 71static_assert(sizeof(RawNACP) == 0x4000, "RawNACP has incorrect size.");
60 72
diff --git a/src/core/file_sys/errors.h b/src/core/file_sys/errors.h
index e4a4ee4ab..bb4654366 100644
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -11,6 +11,9 @@ namespace FileSys {
11constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1}; 11constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1};
12constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002}; 12constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002};
13constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001}; 13constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001};
14constexpr ResultCode ERROR_OUT_OF_BOUNDS{ErrorModule::FS, 3005};
15constexpr ResultCode ERROR_FAILED_MOUNT_ARCHIVE{ErrorModule::FS, 3223};
16constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::FS, 6001};
14constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061}; 17constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061};
15constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062}; 18constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062};
16 19
diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp
index 47b7526c7..d126ae8dd 100644
--- a/src/core/file_sys/fsmitm_romfsbuild.cpp
+++ b/src/core/file_sys/fsmitm_romfsbuild.cpp
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <cstring> 25#include <cstring>
26#include <string_view>
26#include "common/alignment.h" 27#include "common/alignment.h"
27#include "common/assert.h" 28#include "common/assert.h"
28#include "core/file_sys/fsmitm_romfsbuild.h" 29#include "core/file_sys/fsmitm_romfsbuild.h"
@@ -97,7 +98,8 @@ struct RomFSBuildFileContext {
97 VirtualFile source; 98 VirtualFile source;
98}; 99};
99 100
100static u32 romfs_calc_path_hash(u32 parent, std::string path, u32 start, std::size_t path_len) { 101static u32 romfs_calc_path_hash(u32 parent, std::string_view path, u32 start,
102 std::size_t path_len) {
101 u32 hash = parent ^ 123456789; 103 u32 hash = parent ^ 123456789;
102 for (u32 i = 0; i < path_len; i++) { 104 for (u32 i = 0; i < path_len; i++) {
103 hash = (hash >> 5) | (hash << 27); 105 hash = (hash >> 5) | (hash << 27);
diff --git a/src/core/file_sys/nca_metadata.cpp b/src/core/file_sys/nca_metadata.cpp
index 6f34b7836..93d0df6b9 100644
--- a/src/core/file_sys/nca_metadata.cpp
+++ b/src/core/file_sys/nca_metadata.cpp
@@ -10,14 +10,6 @@
10 10
11namespace FileSys { 11namespace FileSys {
12 12
13bool operator>=(TitleType lhs, TitleType rhs) {
14 return static_cast<std::size_t>(lhs) >= static_cast<std::size_t>(rhs);
15}
16
17bool operator<=(TitleType lhs, TitleType rhs) {
18 return static_cast<std::size_t>(lhs) <= static_cast<std::size_t>(rhs);
19}
20
21CNMT::CNMT(VirtualFile file) { 13CNMT::CNMT(VirtualFile file) {
22 if (file->ReadObject(&header) != sizeof(CNMTHeader)) 14 if (file->ReadObject(&header) != sizeof(CNMTHeader))
23 return; 15 return;
diff --git a/src/core/file_sys/nca_metadata.h b/src/core/file_sys/nca_metadata.h
index a05d155f4..50bf38471 100644
--- a/src/core/file_sys/nca_metadata.h
+++ b/src/core/file_sys/nca_metadata.h
@@ -29,9 +29,6 @@ enum class TitleType : u8 {
29 DeltaTitle = 0x83, 29 DeltaTitle = 0x83,
30}; 30};
31 31
32bool operator>=(TitleType lhs, TitleType rhs);
33bool operator<=(TitleType lhs, TitleType rhs);
34
35enum class ContentRecordType : u8 { 32enum class ContentRecordType : u8 {
36 Meta = 0, 33 Meta = 0,
37 Program = 1, 34 Program = 1,
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 61706966e..78dbadee3 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -7,8 +7,10 @@
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9 9
10#include "common/file_util.h"
10#include "common/hex_util.h" 11#include "common/hex_util.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "core/core.h"
12#include "core/file_sys/content_archive.h" 14#include "core/file_sys/content_archive.h"
13#include "core/file_sys/control_metadata.h" 15#include "core/file_sys/control_metadata.h"
14#include "core/file_sys/ips_layer.h" 16#include "core/file_sys/ips_layer.h"
@@ -19,6 +21,7 @@
19#include "core/file_sys/vfs_vector.h" 21#include "core/file_sys/vfs_vector.h"
20#include "core/hle/service/filesystem/filesystem.h" 22#include "core/hle/service/filesystem/filesystem.h"
21#include "core/loader/loader.h" 23#include "core/loader/loader.h"
24#include "core/loader/nso.h"
22#include "core/settings.h" 25#include "core/settings.h"
23 26
24namespace FileSys { 27namespace FileSys {
@@ -31,14 +34,6 @@ constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
31 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9", 34 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
32}; 35};
33 36
34struct NSOBuildHeader {
35 u32_le magic;
36 INSERT_PADDING_BYTES(0x3C);
37 std::array<u8, 0x20> build_id;
38 INSERT_PADDING_BYTES(0xA0);
39};
40static_assert(sizeof(NSOBuildHeader) == 0x100, "NSOBuildHeader has incorrect size.");
41
42std::string FormatTitleVersion(u32 version, TitleVersionFormat format) { 37std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
43 std::array<u8, sizeof(u32)> bytes{}; 38 std::array<u8, sizeof(u32)> bytes{};
44 bytes[0] = version % SINGLE_BYTE_MODULUS; 39 bytes[0] = version % SINGLE_BYTE_MODULUS;
@@ -75,7 +70,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
75 } 70 }
76 } 71 }
77 72
78 const auto installed = Service::FileSystem::GetUnionContents(); 73 const auto& installed = Core::System::GetInstance().GetContentProvider();
79 74
80 const auto& disabled = Settings::values.disabled_addons[title_id]; 75 const auto& disabled = Settings::values.disabled_addons[title_id];
81 const auto update_disabled = 76 const auto update_disabled =
@@ -161,32 +156,35 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
161 return out; 156 return out;
162} 157}
163 158
164std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const { 159std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso, const std::string& name) const {
165 if (nso.size() < 0x100) 160 if (nso.size() < sizeof(Loader::NSOHeader)) {
166 return nso; 161 return nso;
162 }
167 163
168 NSOBuildHeader header; 164 Loader::NSOHeader header;
169 std::memcpy(&header, nso.data(), sizeof(NSOBuildHeader)); 165 std::memcpy(&header, nso.data(), sizeof(header));
170 166
171 if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) 167 if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
172 return nso; 168 return nso;
169 }
173 170
174 const auto build_id_raw = Common::HexArrayToString(header.build_id); 171 const auto build_id_raw = Common::HexArrayToString(header.build_id);
175 const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1); 172 const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);
176 173
177 if (Settings::values.dump_nso) { 174 if (Settings::values.dump_nso) {
178 LOG_INFO(Loader, "Dumping NSO for build_id={}, title_id={:016X}", build_id, title_id); 175 LOG_INFO(Loader, "Dumping NSO for name={}, build_id={}, title_id={:016X}", name, build_id,
176 title_id);
179 const auto dump_dir = Service::FileSystem::GetModificationDumpRoot(title_id); 177 const auto dump_dir = Service::FileSystem::GetModificationDumpRoot(title_id);
180 if (dump_dir != nullptr) { 178 if (dump_dir != nullptr) {
181 const auto nso_dir = GetOrCreateDirectoryRelative(dump_dir, "/nso"); 179 const auto nso_dir = GetOrCreateDirectoryRelative(dump_dir, "/nso");
182 const auto file = nso_dir->CreateFile(fmt::format("{}.nso", build_id)); 180 const auto file = nso_dir->CreateFile(fmt::format("{}-{}.nso", name, build_id));
183 181
184 file->Resize(nso.size()); 182 file->Resize(nso.size());
185 file->WriteBytes(nso); 183 file->WriteBytes(nso);
186 } 184 }
187 } 185 }
188 186
189 LOG_INFO(Loader, "Patching NSO for build_id={}", build_id); 187 LOG_INFO(Loader, "Patching NSO for name={}, build_id={}", name, build_id);
190 188
191 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id); 189 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
192 auto patch_dirs = load_dir->GetSubdirectories(); 190 auto patch_dirs = load_dir->GetSubdirectories();
@@ -212,9 +210,11 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
212 } 210 }
213 } 211 }
214 212
215 if (out.size() < 0x100) 213 if (out.size() < sizeof(Loader::NSOHeader)) {
216 return nso; 214 return nso;
217 std::memcpy(out.data(), &header, sizeof(NSOBuildHeader)); 215 }
216
217 std::memcpy(out.data(), &header, sizeof(header));
218 return out; 218 return out;
219} 219}
220 220
@@ -232,6 +232,57 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
232 return !CollectPatches(patch_dirs, build_id).empty(); 232 return !CollectPatches(patch_dirs, build_id).empty();
233} 233}
234 234
235static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
236 const std::array<u8, 0x20>& build_id_,
237 const VirtualDir& base_path, bool upper) {
238 const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
239 const auto build_id = build_id_raw.substr(0, sizeof(u64) * 2);
240 const auto file = base_path->GetFile(fmt::format("{}.txt", build_id));
241
242 if (file == nullptr) {
243 LOG_INFO(Common_Filesystem, "No cheats file found for title_id={:016X}, build_id={}",
244 title_id, build_id);
245 return std::nullopt;
246 }
247
248 std::vector<u8> data(file->GetSize());
249 if (file->Read(data.data(), data.size()) != data.size()) {
250 LOG_INFO(Common_Filesystem, "Failed to read cheats file for title_id={:016X}, build_id={}",
251 title_id, build_id);
252 return std::nullopt;
253 }
254
255 TextCheatParser parser;
256 return parser.Parse(system, data);
257}
258
259std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
260 const std::array<u8, 32>& build_id_) const {
261 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
262 auto patch_dirs = load_dir->GetSubdirectories();
263 std::sort(patch_dirs.begin(), patch_dirs.end(),
264 [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
265
266 std::vector<CheatList> out;
267 out.reserve(patch_dirs.size());
268 for (const auto& subdir : patch_dirs) {
269 auto cheats_dir = subdir->GetSubdirectory("cheats");
270 if (cheats_dir != nullptr) {
271 auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
272 if (res.has_value()) {
273 out.push_back(std::move(*res));
274 continue;
275 }
276
277 res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
278 if (res.has_value())
279 out.push_back(std::move(*res));
280 }
281 }
282
283 return out;
284}
285
235static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) { 286static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) {
236 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id); 287 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
237 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || 288 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
@@ -296,7 +347,7 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
296 if (romfs == nullptr) 347 if (romfs == nullptr)
297 return romfs; 348 return romfs;
298 349
299 const auto installed = Service::FileSystem::GetUnionContents(); 350 const auto& installed = Core::System::GetInstance().GetContentProvider();
300 351
301 // Game Updates 352 // Game Updates
302 const auto update_tid = GetUpdateTitleID(title_id); 353 const auto update_tid = GetUpdateTitleID(title_id);
@@ -343,7 +394,7 @@ static bool IsDirValidAndNonEmpty(const VirtualDir& dir) {
343std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNames( 394std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNames(
344 VirtualFile update_raw) const { 395 VirtualFile update_raw) const {
345 std::map<std::string, std::string, std::less<>> out; 396 std::map<std::string, std::string, std::less<>> out;
346 const auto installed = Service::FileSystem::GetUnionContents(); 397 const auto& installed = Core::System::GetInstance().GetContentProvider();
347 const auto& disabled = Settings::values.disabled_addons[title_id]; 398 const auto& disabled = Settings::values.disabled_addons[title_id];
348 399
349 // Game Updates 400 // Game Updates
@@ -403,6 +454,8 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
403 } 454 }
404 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) 455 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
405 AppendCommaIfNotEmpty(types, "LayeredFS"); 456 AppendCommaIfNotEmpty(types, "LayeredFS");
457 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
458 AppendCommaIfNotEmpty(types, "Cheats");
406 459
407 if (types.empty()) 460 if (types.empty())
408 continue; 461 continue;
@@ -415,10 +468,10 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
415 468
416 // DLC 469 // DLC
417 const auto dlc_entries = installed.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data); 470 const auto dlc_entries = installed.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
418 std::vector<RegisteredCacheEntry> dlc_match; 471 std::vector<ContentProviderEntry> dlc_match;
419 dlc_match.reserve(dlc_entries.size()); 472 dlc_match.reserve(dlc_entries.size());
420 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), 473 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
421 [this, &installed](const RegisteredCacheEntry& entry) { 474 [this, &installed](const ContentProviderEntry& entry) {
422 return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == title_id && 475 return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == title_id &&
423 installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success; 476 installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success;
424 }); 477 });
@@ -441,7 +494,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
441} 494}
442 495
443std::pair<std::unique_ptr<NACP>, VirtualFile> PatchManager::GetControlMetadata() const { 496std::pair<std::unique_ptr<NACP>, VirtualFile> PatchManager::GetControlMetadata() const {
444 const auto installed{Service::FileSystem::GetUnionContents()}; 497 const auto& installed = Core::System::GetInstance().GetContentProvider();
445 498
446 const auto base_control_nca = installed.GetEntry(title_id, ContentRecordType::Control); 499 const auto base_control_nca = installed.GetEntry(title_id, ContentRecordType::Control);
447 if (base_control_nca == nullptr) 500 if (base_control_nca == nullptr)
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index b8a1652fd..769f8c6f0 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -8,9 +8,14 @@
8#include <memory> 8#include <memory>
9#include <string> 9#include <string>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/file_sys/cheat_engine.h"
11#include "core/file_sys/nca_metadata.h" 12#include "core/file_sys/nca_metadata.h"
12#include "core/file_sys/vfs.h" 13#include "core/file_sys/vfs.h"
13 14
15namespace Core {
16class System;
17}
18
14namespace FileSys { 19namespace FileSys {
15 20
16class NCA; 21class NCA;
@@ -39,12 +44,16 @@ public:
39 // Currently tracked NSO patches: 44 // Currently tracked NSO patches:
40 // - IPS 45 // - IPS
41 // - IPSwitch 46 // - IPSwitch
42 std::vector<u8> PatchNSO(const std::vector<u8>& nso) const; 47 std::vector<u8> PatchNSO(const std::vector<u8>& nso, const std::string& name) const;
43 48
44 // Checks to see if PatchNSO() will have any effect given the NSO's build ID. 49 // Checks to see if PatchNSO() will have any effect given the NSO's build ID.
45 // Used to prevent expensive copies in NSO loader. 50 // Used to prevent expensive copies in NSO loader.
46 bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const; 51 bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;
47 52
53 // Creates a CheatList object with all
54 std::vector<CheatList> CreateCheatList(const Core::System& system,
55 const std::array<u8, 0x20>& build_id) const;
56
48 // Currently tracked RomFS patches: 57 // Currently tracked RomFS patches:
49 // - Game Updates 58 // - Game Updates
50 // - LayeredFS 59 // - LayeredFS
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index d3e00437f..d863253f8 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -3,7 +3,6 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstddef> 5#include <cstddef>
6#include <cstring>
7#include <vector> 6#include <vector>
8 7
9#include "common/logging/log.h" 8#include "common/logging/log.h"
@@ -17,28 +16,30 @@ ProgramMetadata::ProgramMetadata() = default;
17ProgramMetadata::~ProgramMetadata() = default; 16ProgramMetadata::~ProgramMetadata() = default;
18 17
19Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) { 18Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
20 std::size_t total_size = static_cast<std::size_t>(file->GetSize()); 19 const std::size_t total_size = file->GetSize();
21 if (total_size < sizeof(Header)) 20 if (total_size < sizeof(Header)) {
22 return Loader::ResultStatus::ErrorBadNPDMHeader; 21 return Loader::ResultStatus::ErrorBadNPDMHeader;
22 }
23 23
24 // TODO(DarkLordZach): Use ReadObject when Header/AcidHeader becomes trivially copyable. 24 if (sizeof(Header) != file->ReadObject(&npdm_header)) {
25 std::vector<u8> npdm_header_data = file->ReadBytes(sizeof(Header));
26 if (sizeof(Header) != npdm_header_data.size())
27 return Loader::ResultStatus::ErrorBadNPDMHeader; 25 return Loader::ResultStatus::ErrorBadNPDMHeader;
28 std::memcpy(&npdm_header, npdm_header_data.data(), sizeof(Header)); 26 }
29 27
30 std::vector<u8> acid_header_data = file->ReadBytes(sizeof(AcidHeader), npdm_header.acid_offset); 28 if (sizeof(AcidHeader) != file->ReadObject(&acid_header, npdm_header.acid_offset)) {
31 if (sizeof(AcidHeader) != acid_header_data.size())
32 return Loader::ResultStatus::ErrorBadACIDHeader; 29 return Loader::ResultStatus::ErrorBadACIDHeader;
33 std::memcpy(&acid_header, acid_header_data.data(), sizeof(AcidHeader)); 30 }
34 31
35 if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset)) 32 if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset)) {
36 return Loader::ResultStatus::ErrorBadACIHeader; 33 return Loader::ResultStatus::ErrorBadACIHeader;
34 }
37 35
38 if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) 36 if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) {
39 return Loader::ResultStatus::ErrorBadFileAccessControl; 37 return Loader::ResultStatus::ErrorBadFileAccessControl;
40 if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) 38 }
39
40 if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) {
41 return Loader::ResultStatus::ErrorBadFileAccessHeader; 41 return Loader::ResultStatus::ErrorBadFileAccessHeader;
42 }
42 43
43 aci_kernel_capabilities.resize(aci_header.kac_size / sizeof(u32)); 44 aci_kernel_capabilities.resize(aci_header.kac_size / sizeof(u32));
44 const u64 read_size = aci_header.kac_size; 45 const u64 read_size = aci_header.kac_size;
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 0033ba347..7de5b9cf9 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,7 +58,6 @@ public:
58 void Print() const; 58 void Print() const;
59 59
60private: 60private:
61 // TODO(DarkLordZach): BitField is not trivially copyable.
62 struct Header { 61 struct Header {
63 std::array<char, 4> magic; 62 std::array<char, 4> magic;
64 std::array<u8, 8> reserved; 63 std::array<u8, 8> reserved;
@@ -85,7 +84,6 @@ private:
85 84
86 static_assert(sizeof(Header) == 0x80, "NPDM header structure size is wrong"); 85 static_assert(sizeof(Header) == 0x80, "NPDM header structure size is wrong");
87 86
88 // TODO(DarkLordZach): BitField is not trivially copyable.
89 struct AcidHeader { 87 struct AcidHeader {
90 std::array<u8, 0x100> signature; 88 std::array<u8, 0x100> signature;
91 std::array<u8, 0x100> nca_modulus; 89 std::array<u8, 0x100> nca_modulus;
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 128199063..3946ff871 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -23,19 +23,19 @@ namespace FileSys {
23// The size of blocks to use when vfs raw copying into nand. 23// The size of blocks to use when vfs raw copying into nand.
24constexpr size_t VFS_RC_LARGE_COPY_BLOCK = 0x400000; 24constexpr size_t VFS_RC_LARGE_COPY_BLOCK = 0x400000;
25 25
26std::string RegisteredCacheEntry::DebugInfo() const { 26std::string ContentProviderEntry::DebugInfo() const {
27 return fmt::format("title_id={:016X}, content_type={:02X}", title_id, static_cast<u8>(type)); 27 return fmt::format("title_id={:016X}, content_type={:02X}", title_id, static_cast<u8>(type));
28} 28}
29 29
30bool operator<(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) { 30bool operator<(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
31 return (lhs.title_id < rhs.title_id) || (lhs.title_id == rhs.title_id && lhs.type < rhs.type); 31 return (lhs.title_id < rhs.title_id) || (lhs.title_id == rhs.title_id && lhs.type < rhs.type);
32} 32}
33 33
34bool operator==(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) { 34bool operator==(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
35 return std::tie(lhs.title_id, lhs.type) == std::tie(rhs.title_id, rhs.type); 35 return std::tie(lhs.title_id, lhs.type) == std::tie(rhs.title_id, rhs.type);
36} 36}
37 37
38bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) { 38bool operator!=(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
39 return !operator==(lhs, rhs); 39 return !operator==(lhs, rhs);
40} 40}
41 41
@@ -84,7 +84,7 @@ static std::string GetCNMTName(TitleType type, u64 title_id) {
84 return fmt::format("{}_{:016x}.cnmt", TITLE_TYPE_NAMES[index], title_id); 84 return fmt::format("{}_{:016x}.cnmt", TITLE_TYPE_NAMES[index], title_id);
85} 85}
86 86
87static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) { 87ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
88 switch (type) { 88 switch (type) {
89 case NCAContentType::Program: 89 case NCAContentType::Program:
90 // TODO(DarkLordZach): Differentiate between Program and Patch 90 // TODO(DarkLordZach): Differentiate between Program and Patch
@@ -94,7 +94,7 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
94 case NCAContentType::Control: 94 case NCAContentType::Control:
95 return ContentRecordType::Control; 95 return ContentRecordType::Control;
96 case NCAContentType::Data: 96 case NCAContentType::Data:
97 case NCAContentType::Data_Unknown5: 97 case NCAContentType::PublicData:
98 return ContentRecordType::Data; 98 return ContentRecordType::Data;
99 case NCAContentType::Manual: 99 case NCAContentType::Manual:
100 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. 100 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
@@ -104,6 +104,28 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
104 } 104 }
105} 105}
106 106
107ContentProvider::~ContentProvider() = default;
108
109bool ContentProvider::HasEntry(ContentProviderEntry entry) const {
110 return HasEntry(entry.title_id, entry.type);
111}
112
113VirtualFile ContentProvider::GetEntryUnparsed(ContentProviderEntry entry) const {
114 return GetEntryUnparsed(entry.title_id, entry.type);
115}
116
117VirtualFile ContentProvider::GetEntryRaw(ContentProviderEntry entry) const {
118 return GetEntryRaw(entry.title_id, entry.type);
119}
120
121std::unique_ptr<NCA> ContentProvider::GetEntry(ContentProviderEntry entry) const {
122 return GetEntry(entry.title_id, entry.type);
123}
124
125std::vector<ContentProviderEntry> ContentProvider::ListEntries() const {
126 return ListEntriesFilter(std::nullopt, std::nullopt, std::nullopt);
127}
128
107VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir, 129VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir,
108 std::string_view path) const { 130 std::string_view path) const {
109 const auto file = dir->GetFileRelative(path); 131 const auto file = dir->GetFileRelative(path);
@@ -161,8 +183,8 @@ VirtualFile RegisteredCache::GetFileAtID(NcaID id) const {
161 return file; 183 return file;
162} 184}
163 185
164static std::optional<NcaID> CheckMapForContentRecord( 186static std::optional<NcaID> CheckMapForContentRecord(const std::map<u64, CNMT>& map, u64 title_id,
165 const boost::container::flat_map<u64, CNMT>& map, u64 title_id, ContentRecordType type) { 187 ContentRecordType type) {
166 if (map.find(title_id) == map.end()) 188 if (map.find(title_id) == map.end())
167 return {}; 189 return {};
168 190
@@ -268,7 +290,7 @@ void RegisteredCache::Refresh() {
268 AccumulateYuzuMeta(); 290 AccumulateYuzuMeta();
269} 291}
270 292
271RegisteredCache::RegisteredCache(VirtualDir dir_, RegisteredCacheParsingFunction parsing_function) 293RegisteredCache::RegisteredCache(VirtualDir dir_, ContentProviderParsingFunction parsing_function)
272 : dir(std::move(dir_)), parser(std::move(parsing_function)) { 294 : dir(std::move(dir_)), parser(std::move(parsing_function)) {
273 Refresh(); 295 Refresh();
274} 296}
@@ -279,19 +301,11 @@ bool RegisteredCache::HasEntry(u64 title_id, ContentRecordType type) const {
279 return GetEntryRaw(title_id, type) != nullptr; 301 return GetEntryRaw(title_id, type) != nullptr;
280} 302}
281 303
282bool RegisteredCache::HasEntry(RegisteredCacheEntry entry) const {
283 return GetEntryRaw(entry) != nullptr;
284}
285
286VirtualFile RegisteredCache::GetEntryUnparsed(u64 title_id, ContentRecordType type) const { 304VirtualFile RegisteredCache::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
287 const auto id = GetNcaIDFromMetadata(title_id, type); 305 const auto id = GetNcaIDFromMetadata(title_id, type);
288 return id ? GetFileAtID(*id) : nullptr; 306 return id ? GetFileAtID(*id) : nullptr;
289} 307}
290 308
291VirtualFile RegisteredCache::GetEntryUnparsed(RegisteredCacheEntry entry) const {
292 return GetEntryUnparsed(entry.title_id, entry.type);
293}
294
295std::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const { 309std::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
296 const auto meta_iter = meta.find(title_id); 310 const auto meta_iter = meta.find(title_id);
297 if (meta_iter != meta.end()) 311 if (meta_iter != meta.end())
@@ -309,10 +323,6 @@ VirtualFile RegisteredCache::GetEntryRaw(u64 title_id, ContentRecordType type) c
309 return id ? parser(GetFileAtID(*id), *id) : nullptr; 323 return id ? parser(GetFileAtID(*id), *id) : nullptr;
310} 324}
311 325
312VirtualFile RegisteredCache::GetEntryRaw(RegisteredCacheEntry entry) const {
313 return GetEntryRaw(entry.title_id, entry.type);
314}
315
316std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType type) const { 326std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType type) const {
317 const auto raw = GetEntryRaw(title_id, type); 327 const auto raw = GetEntryRaw(title_id, type);
318 if (raw == nullptr) 328 if (raw == nullptr)
@@ -320,10 +330,6 @@ std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType t
320 return std::make_unique<NCA>(raw, nullptr, 0, keys); 330 return std::make_unique<NCA>(raw, nullptr, 0, keys);
321} 331}
322 332
323std::unique_ptr<NCA> RegisteredCache::GetEntry(RegisteredCacheEntry entry) const {
324 return GetEntry(entry.title_id, entry.type);
325}
326
327template <typename T> 333template <typename T>
328void RegisteredCache::IterateAllMetadata( 334void RegisteredCache::IterateAllMetadata(
329 std::vector<T>& out, std::function<T(const CNMT&, const ContentRecord&)> proc, 335 std::vector<T>& out, std::function<T(const CNMT&, const ContentRecord&)> proc,
@@ -348,25 +354,14 @@ void RegisteredCache::IterateAllMetadata(
348 } 354 }
349} 355}
350 356
351std::vector<RegisteredCacheEntry> RegisteredCache::ListEntries() const { 357std::vector<ContentProviderEntry> RegisteredCache::ListEntriesFilter(
352 std::vector<RegisteredCacheEntry> out;
353 IterateAllMetadata<RegisteredCacheEntry>(
354 out,
355 [](const CNMT& c, const ContentRecord& r) {
356 return RegisteredCacheEntry{c.GetTitleID(), r.type};
357 },
358 [](const CNMT& c, const ContentRecord& r) { return true; });
359 return out;
360}
361
362std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
363 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type, 358 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
364 std::optional<u64> title_id) const { 359 std::optional<u64> title_id) const {
365 std::vector<RegisteredCacheEntry> out; 360 std::vector<ContentProviderEntry> out;
366 IterateAllMetadata<RegisteredCacheEntry>( 361 IterateAllMetadata<ContentProviderEntry>(
367 out, 362 out,
368 [](const CNMT& c, const ContentRecord& r) { 363 [](const CNMT& c, const ContentRecord& r) {
369 return RegisteredCacheEntry{c.GetTitleID(), r.type}; 364 return ContentProviderEntry{c.GetTitleID(), r.type};
370 }, 365 },
371 [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) { 366 [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
372 if (title_type && *title_type != c.GetType()) 367 if (title_type && *title_type != c.GetType())
@@ -521,37 +516,56 @@ bool RegisteredCache::RawInstallYuzuMeta(const CNMT& cnmt) {
521 }) != yuzu_meta.end(); 516 }) != yuzu_meta.end();
522} 517}
523 518
524RegisteredCacheUnion::RegisteredCacheUnion(std::vector<RegisteredCache*> caches) 519ContentProviderUnion::~ContentProviderUnion() = default;
525 : caches(std::move(caches)) {}
526 520
527void RegisteredCacheUnion::Refresh() { 521void ContentProviderUnion::SetSlot(ContentProviderUnionSlot slot, ContentProvider* provider) {
528 for (const auto& c : caches) 522 providers[slot] = provider;
529 c->Refresh();
530} 523}
531 524
532bool RegisteredCacheUnion::HasEntry(u64 title_id, ContentRecordType type) const { 525void ContentProviderUnion::ClearSlot(ContentProviderUnionSlot slot) {
533 return std::any_of(caches.begin(), caches.end(), [title_id, type](const auto& cache) { 526 providers[slot] = nullptr;
534 return cache->HasEntry(title_id, type);
535 });
536} 527}
537 528
538bool RegisteredCacheUnion::HasEntry(RegisteredCacheEntry entry) const { 529void ContentProviderUnion::Refresh() {
539 return HasEntry(entry.title_id, entry.type); 530 for (auto& provider : providers) {
531 if (provider.second == nullptr)
532 continue;
533
534 provider.second->Refresh();
535 }
540} 536}
541 537
542std::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const { 538bool ContentProviderUnion::HasEntry(u64 title_id, ContentRecordType type) const {
543 for (const auto& c : caches) { 539 for (const auto& provider : providers) {
544 const auto res = c->GetEntryVersion(title_id); 540 if (provider.second == nullptr)
545 if (res) 541 continue;
542
543 if (provider.second->HasEntry(title_id, type))
544 return true;
545 }
546
547 return false;
548}
549
550std::optional<u32> ContentProviderUnion::GetEntryVersion(u64 title_id) const {
551 for (const auto& provider : providers) {
552 if (provider.second == nullptr)
553 continue;
554
555 const auto res = provider.second->GetEntryVersion(title_id);
556 if (res != std::nullopt)
546 return res; 557 return res;
547 } 558 }
548 559
549 return {}; 560 return std::nullopt;
550} 561}
551 562
552VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const { 563VirtualFile ContentProviderUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
553 for (const auto& c : caches) { 564 for (const auto& provider : providers) {
554 const auto res = c->GetEntryUnparsed(title_id, type); 565 if (provider.second == nullptr)
566 continue;
567
568 const auto res = provider.second->GetEntryUnparsed(title_id, type);
555 if (res != nullptr) 569 if (res != nullptr)
556 return res; 570 return res;
557 } 571 }
@@ -559,13 +573,12 @@ VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordTy
559 return nullptr; 573 return nullptr;
560} 574}
561 575
562VirtualFile RegisteredCacheUnion::GetEntryUnparsed(RegisteredCacheEntry entry) const { 576VirtualFile ContentProviderUnion::GetEntryRaw(u64 title_id, ContentRecordType type) const {
563 return GetEntryUnparsed(entry.title_id, entry.type); 577 for (const auto& provider : providers) {
564} 578 if (provider.second == nullptr)
579 continue;
565 580
566VirtualFile RegisteredCacheUnion::GetEntryRaw(u64 title_id, ContentRecordType type) const { 581 const auto res = provider.second->GetEntryRaw(title_id, type);
567 for (const auto& c : caches) {
568 const auto res = c->GetEntryRaw(title_id, type);
569 if (res != nullptr) 582 if (res != nullptr)
570 return res; 583 return res;
571 } 584 }
@@ -573,30 +586,56 @@ VirtualFile RegisteredCacheUnion::GetEntryRaw(u64 title_id, ContentRecordType ty
573 return nullptr; 586 return nullptr;
574} 587}
575 588
576VirtualFile RegisteredCacheUnion::GetEntryRaw(RegisteredCacheEntry entry) const { 589std::unique_ptr<NCA> ContentProviderUnion::GetEntry(u64 title_id, ContentRecordType type) const {
577 return GetEntryRaw(entry.title_id, entry.type); 590 for (const auto& provider : providers) {
578} 591 if (provider.second == nullptr)
592 continue;
579 593
580std::unique_ptr<NCA> RegisteredCacheUnion::GetEntry(u64 title_id, ContentRecordType type) const { 594 auto res = provider.second->GetEntry(title_id, type);
581 const auto raw = GetEntryRaw(title_id, type); 595 if (res != nullptr)
582 if (raw == nullptr) 596 return res;
583 return nullptr; 597 }
584 return std::make_unique<NCA>(raw); 598
599 return nullptr;
585} 600}
586 601
587std::unique_ptr<NCA> RegisteredCacheUnion::GetEntry(RegisteredCacheEntry entry) const { 602std::vector<ContentProviderEntry> ContentProviderUnion::ListEntriesFilter(
588 return GetEntry(entry.title_id, entry.type); 603 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
604 std::optional<u64> title_id) const {
605 std::vector<ContentProviderEntry> out;
606
607 for (const auto& provider : providers) {
608 if (provider.second == nullptr)
609 continue;
610
611 const auto vec = provider.second->ListEntriesFilter(title_type, record_type, title_id);
612 std::copy(vec.begin(), vec.end(), std::back_inserter(out));
613 }
614
615 std::sort(out.begin(), out.end());
616 out.erase(std::unique(out.begin(), out.end()), out.end());
617 return out;
589} 618}
590 619
591std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const { 620std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>>
592 std::vector<RegisteredCacheEntry> out; 621ContentProviderUnion::ListEntriesFilterOrigin(std::optional<ContentProviderUnionSlot> origin,
593 for (const auto& c : caches) { 622 std::optional<TitleType> title_type,
594 c->IterateAllMetadata<RegisteredCacheEntry>( 623 std::optional<ContentRecordType> record_type,
595 out, 624 std::optional<u64> title_id) const {
596 [](const CNMT& c, const ContentRecord& r) { 625 std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> out;
597 return RegisteredCacheEntry{c.GetTitleID(), r.type}; 626
598 }, 627 for (const auto& provider : providers) {
599 [](const CNMT& c, const ContentRecord& r) { return true; }); 628 if (provider.second == nullptr)
629 continue;
630
631 if (origin.has_value() && *origin != provider.first)
632 continue;
633
634 const auto vec = provider.second->ListEntriesFilter(title_type, record_type, title_id);
635 std::transform(vec.begin(), vec.end(), std::back_inserter(out),
636 [&provider](const ContentProviderEntry& entry) {
637 return std::make_pair(provider.first, entry);
638 });
600 } 639 }
601 640
602 std::sort(out.begin(), out.end()); 641 std::sort(out.begin(), out.end());
@@ -604,25 +643,61 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
604 return out; 643 return out;
605} 644}
606 645
607std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter( 646ManualContentProvider::~ManualContentProvider() = default;
647
648void ManualContentProvider::AddEntry(TitleType title_type, ContentRecordType content_type,
649 u64 title_id, VirtualFile file) {
650 entries.insert_or_assign({title_type, content_type, title_id}, file);
651}
652
653void ManualContentProvider::ClearAllEntries() {
654 entries.clear();
655}
656
657void ManualContentProvider::Refresh() {}
658
659bool ManualContentProvider::HasEntry(u64 title_id, ContentRecordType type) const {
660 return GetEntryRaw(title_id, type) != nullptr;
661}
662
663std::optional<u32> ManualContentProvider::GetEntryVersion(u64 title_id) const {
664 return std::nullopt;
665}
666
667VirtualFile ManualContentProvider::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
668 return GetEntryRaw(title_id, type);
669}
670
671VirtualFile ManualContentProvider::GetEntryRaw(u64 title_id, ContentRecordType type) const {
672 const auto iter =
673 std::find_if(entries.begin(), entries.end(), [title_id, type](const auto& entry) {
674 const auto [title_type, content_type, e_title_id] = entry.first;
675 return content_type == type && e_title_id == title_id;
676 });
677 if (iter == entries.end())
678 return nullptr;
679 return iter->second;
680}
681
682std::unique_ptr<NCA> ManualContentProvider::GetEntry(u64 title_id, ContentRecordType type) const {
683 const auto res = GetEntryRaw(title_id, type);
684 if (res == nullptr)
685 return nullptr;
686 return std::make_unique<NCA>(res, nullptr, 0, keys);
687}
688
689std::vector<ContentProviderEntry> ManualContentProvider::ListEntriesFilter(
608 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type, 690 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
609 std::optional<u64> title_id) const { 691 std::optional<u64> title_id) const {
610 std::vector<RegisteredCacheEntry> out; 692 std::vector<ContentProviderEntry> out;
611 for (const auto& c : caches) { 693
612 c->IterateAllMetadata<RegisteredCacheEntry>( 694 for (const auto& entry : entries) {
613 out, 695 const auto [e_title_type, e_content_type, e_title_id] = entry.first;
614 [](const CNMT& c, const ContentRecord& r) { 696 if ((title_type == std::nullopt || e_title_type == *title_type) &&
615 return RegisteredCacheEntry{c.GetTitleID(), r.type}; 697 (record_type == std::nullopt || e_content_type == *record_type) &&
616 }, 698 (title_id == std::nullopt || e_title_id == *title_id)) {
617 [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) { 699 out.emplace_back(ContentProviderEntry{e_title_id, e_content_type});
618 if (title_type && *title_type != c.GetType()) 700 }
619 return false;
620 if (record_type && *record_type != r.type)
621 return false;
622 if (title_id && *title_id != c.GetTitleID())
623 return false;
624 return true;
625 });
626 } 701 }
627 702
628 std::sort(out.begin(), out.end()); 703 std::sort(out.begin(), out.end());
diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h
index 3b77af4e0..ec9052653 100644
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -21,12 +21,13 @@ class NSP;
21class XCI; 21class XCI;
22 22
23enum class ContentRecordType : u8; 23enum class ContentRecordType : u8;
24enum class NCAContentType : u8;
24enum class TitleType : u8; 25enum class TitleType : u8;
25 26
26struct ContentRecord; 27struct ContentRecord;
27 28
28using NcaID = std::array<u8, 0x10>; 29using NcaID = std::array<u8, 0x10>;
29using RegisteredCacheParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>; 30using ContentProviderParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>;
30using VfsCopyFunction = std::function<bool(const VirtualFile&, const VirtualFile&, size_t)>; 31using VfsCopyFunction = std::function<bool(const VirtualFile&, const VirtualFile&, size_t)>;
31 32
32enum class InstallResult { 33enum class InstallResult {
@@ -36,7 +37,7 @@ enum class InstallResult {
36 ErrorMetaFailed, 37 ErrorMetaFailed,
37}; 38};
38 39
39struct RegisteredCacheEntry { 40struct ContentProviderEntry {
40 u64 title_id; 41 u64 title_id;
41 ContentRecordType type; 42 ContentRecordType type;
42 43
@@ -47,12 +48,46 @@ constexpr u64 GetUpdateTitleID(u64 base_title_id) {
47 return base_title_id | 0x800; 48 return base_title_id | 0x800;
48} 49}
49 50
51ContentRecordType GetCRTypeFromNCAType(NCAContentType type);
52
50// boost flat_map requires operator< for O(log(n)) lookups. 53// boost flat_map requires operator< for O(log(n)) lookups.
51bool operator<(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs); 54bool operator<(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
52 55
53// std unique requires operator== to identify duplicates. 56// std unique requires operator== to identify duplicates.
54bool operator==(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs); 57bool operator==(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
55bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs); 58bool operator!=(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
59
60class ContentProvider {
61public:
62 virtual ~ContentProvider();
63
64 virtual void Refresh() = 0;
65
66 virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0;
67 virtual bool HasEntry(ContentProviderEntry entry) const;
68
69 virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0;
70
71 virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0;
72 virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const;
73
74 virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0;
75 virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const;
76
77 virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0;
78 virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const;
79
80 virtual std::vector<ContentProviderEntry> ListEntries() const;
81
82 // If a parameter is not std::nullopt, it will be filtered for from all entries.
83 virtual std::vector<ContentProviderEntry> ListEntriesFilter(
84 std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
85 std::optional<u64> title_id = {}) const = 0;
86
87protected:
88 // A single instance of KeyManager to be used by GetEntry()
89 Core::Crypto::KeyManager keys;
90};
56 91
57/* 92/*
58 * A class that catalogues NCAs in the registered directory structure. 93 * A class that catalogues NCAs in the registered directory structure.
@@ -67,39 +102,32 @@ bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs
67 * (This impl also supports substituting the nca dir for an nca file, as that's more convenient 102 * (This impl also supports substituting the nca dir for an nca file, as that's more convenient
68 * when 4GB splitting can be ignored.) 103 * when 4GB splitting can be ignored.)
69 */ 104 */
70class RegisteredCache { 105class RegisteredCache : public ContentProvider {
71 friend class RegisteredCacheUnion;
72
73public: 106public:
74 // Parsing function defines the conversion from raw file to NCA. If there are other steps 107 // Parsing function defines the conversion from raw file to NCA. If there are other steps
75 // besides creating the NCA from the file (e.g. NAX0 on SD Card), that should go in a custom 108 // besides creating the NCA from the file (e.g. NAX0 on SD Card), that should go in a custom
76 // parsing function. 109 // parsing function.
77 explicit RegisteredCache(VirtualDir dir, 110 explicit RegisteredCache(VirtualDir dir,
78 RegisteredCacheParsingFunction parsing_function = 111 ContentProviderParsingFunction parsing_function =
79 [](const VirtualFile& file, const NcaID& id) { return file; }); 112 [](const VirtualFile& file, const NcaID& id) { return file; });
80 ~RegisteredCache(); 113 ~RegisteredCache() override;
81 114
82 void Refresh(); 115 void Refresh() override;
83 116
84 bool HasEntry(u64 title_id, ContentRecordType type) const; 117 bool HasEntry(u64 title_id, ContentRecordType type) const override;
85 bool HasEntry(RegisteredCacheEntry entry) const;
86 118
87 std::optional<u32> GetEntryVersion(u64 title_id) const; 119 std::optional<u32> GetEntryVersion(u64 title_id) const override;
88 120
89 VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const; 121 VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
90 VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
91 122
92 VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const; 123 VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
93 VirtualFile GetEntryRaw(RegisteredCacheEntry entry) const;
94 124
95 std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const; 125 std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
96 std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;
97 126
98 std::vector<RegisteredCacheEntry> ListEntries() const;
99 // If a parameter is not std::nullopt, it will be filtered for from all entries. 127 // If a parameter is not std::nullopt, it will be filtered for from all entries.
100 std::vector<RegisteredCacheEntry> ListEntriesFilter( 128 std::vector<ContentProviderEntry> ListEntriesFilter(
101 std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {}, 129 std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
102 std::optional<u64> title_id = {}) const; 130 std::optional<u64> title_id = {}) const override;
103 131
104 // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure 132 // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure
105 // there is a meta NCA and all of them are accessible. 133 // there is a meta NCA and all of them are accessible.
@@ -131,46 +159,70 @@ private:
131 bool RawInstallYuzuMeta(const CNMT& cnmt); 159 bool RawInstallYuzuMeta(const CNMT& cnmt);
132 160
133 VirtualDir dir; 161 VirtualDir dir;
134 RegisteredCacheParsingFunction parser; 162 ContentProviderParsingFunction parser;
135 Core::Crypto::KeyManager keys;
136 163
137 // maps tid -> NcaID of meta 164 // maps tid -> NcaID of meta
138 boost::container::flat_map<u64, NcaID> meta_id; 165 std::map<u64, NcaID> meta_id;
139 // maps tid -> meta 166 // maps tid -> meta
140 boost::container::flat_map<u64, CNMT> meta; 167 std::map<u64, CNMT> meta;
141 // maps tid -> meta for CNMT in yuzu_meta 168 // maps tid -> meta for CNMT in yuzu_meta
142 boost::container::flat_map<u64, CNMT> yuzu_meta; 169 std::map<u64, CNMT> yuzu_meta;
143}; 170};
144 171
145// Combines multiple RegisteredCaches (i.e. SysNAND, UserNAND, SDMC) into one interface. 172enum class ContentProviderUnionSlot {
146class RegisteredCacheUnion { 173 SysNAND, ///< System NAND
147public: 174 UserNAND, ///< User NAND
148 explicit RegisteredCacheUnion(std::vector<RegisteredCache*> caches); 175 SDMC, ///< SD Card
149 176 FrontendManual, ///< Frontend-defined game list or similar
150 void Refresh(); 177};
151
152 bool HasEntry(u64 title_id, ContentRecordType type) const;
153 bool HasEntry(RegisteredCacheEntry entry) const;
154
155 std::optional<u32> GetEntryVersion(u64 title_id) const;
156
157 VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
158 VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
159
160 VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const;
161 VirtualFile GetEntryRaw(RegisteredCacheEntry entry) const;
162
163 std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const;
164 std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;
165 178
166 std::vector<RegisteredCacheEntry> ListEntries() const; 179// Combines multiple ContentProvider(s) (i.e. SysNAND, UserNAND, SDMC) into one interface.
167 // If a parameter is not std::nullopt, it will be filtered for from all entries. 180class ContentProviderUnion : public ContentProvider {
168 std::vector<RegisteredCacheEntry> ListEntriesFilter( 181public:
182 ~ContentProviderUnion() override;
183
184 void SetSlot(ContentProviderUnionSlot slot, ContentProvider* provider);
185 void ClearSlot(ContentProviderUnionSlot slot);
186
187 void Refresh() override;
188 bool HasEntry(u64 title_id, ContentRecordType type) const override;
189 std::optional<u32> GetEntryVersion(u64 title_id) const override;
190 VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
191 VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
192 std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
193 std::vector<ContentProviderEntry> ListEntriesFilter(
194 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
195 std::optional<u64> title_id) const override;
196
197 std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> ListEntriesFilterOrigin(
198 std::optional<ContentProviderUnionSlot> origin = {},
169 std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {}, 199 std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
170 std::optional<u64> title_id = {}) const; 200 std::optional<u64> title_id = {}) const;
171 201
172private: 202private:
173 std::vector<RegisteredCache*> caches; 203 std::map<ContentProviderUnionSlot, ContentProvider*> providers;
204};
205
206class ManualContentProvider : public ContentProvider {
207public:
208 ~ManualContentProvider() override;
209
210 void AddEntry(TitleType title_type, ContentRecordType content_type, u64 title_id,
211 VirtualFile file);
212 void ClearAllEntries();
213
214 void Refresh() override;
215 bool HasEntry(u64 title_id, ContentRecordType type) const override;
216 std::optional<u32> GetEntryVersion(u64 title_id) const override;
217 VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
218 VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
219 std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
220 std::vector<ContentProviderEntry> ListEntriesFilter(
221 std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
222 std::optional<u64> title_id) const override;
223
224private:
225 std::map<std::tuple<TitleType, ContentRecordType, u64>, VirtualFile> entries;
174}; 226};
175 227
176} // namespace FileSys 228} // namespace FileSys
diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp
index 6ad1e4f86..b2ccb2926 100644
--- a/src/core/file_sys/romfs_factory.cpp
+++ b/src/core/file_sys/romfs_factory.cpp
@@ -48,7 +48,7 @@ ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage, Conte
48 48
49 switch (storage) { 49 switch (storage) {
50 case StorageId::None: 50 case StorageId::None:
51 res = Service::FileSystem::GetUnionContents().GetEntry(title_id, type); 51 res = Core::System::GetInstance().GetContentProvider().GetEntry(title_id, type);
52 break; 52 break;
53 case StorageId::NandSystem: 53 case StorageId::NandSystem:
54 res = Service::FileSystem::GetSystemNANDContents()->GetEntry(title_id, type); 54 res = Service::FileSystem::GetSystemNANDContents()->GetEntry(title_id, type);
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index 1913dc956..7974b031d 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -16,8 +16,10 @@ namespace FileSys {
16constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size"; 16constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size";
17 17
18std::string SaveDataDescriptor::DebugInfo() const { 18std::string SaveDataDescriptor::DebugInfo() const {
19 return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}]", 19 return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}, "
20 static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id); 20 "rank={}, index={}]",
21 static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id,
22 static_cast<u8>(rank), index);
21} 23}
22 24
23SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save_directory)) { 25SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save_directory)) {
@@ -28,7 +30,7 @@ SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save
28 30
29SaveDataFactory::~SaveDataFactory() = default; 31SaveDataFactory::~SaveDataFactory() = default;
30 32
31ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescriptor meta) { 33ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, const SaveDataDescriptor& meta) {
32 if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) { 34 if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) {
33 if (meta.zero_1 != 0) { 35 if (meta.zero_1 != 0) {
34 LOG_WARNING(Service_FS, 36 LOG_WARNING(Service_FS,
diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h
index 3a1caf292..b73654571 100644
--- a/src/core/file_sys/savedata_factory.h
+++ b/src/core/file_sys/savedata_factory.h
@@ -32,12 +32,19 @@ enum class SaveDataType : u8 {
32 CacheStorage = 5, 32 CacheStorage = 5,
33}; 33};
34 34
35enum class SaveDataRank : u8 {
36 Primary,
37 Secondary,
38};
39
35struct SaveDataDescriptor { 40struct SaveDataDescriptor {
36 u64_le title_id; 41 u64_le title_id;
37 u128 user_id; 42 u128 user_id;
38 u64_le save_id; 43 u64_le save_id;
39 SaveDataType type; 44 SaveDataType type;
40 INSERT_PADDING_BYTES(7); 45 SaveDataRank rank;
46 u16_le index;
47 INSERT_PADDING_BYTES(4);
41 u64_le zero_1; 48 u64_le zero_1;
42 u64_le zero_2; 49 u64_le zero_2;
43 u64_le zero_3; 50 u64_le zero_3;
@@ -57,7 +64,7 @@ public:
57 explicit SaveDataFactory(VirtualDir dir); 64 explicit SaveDataFactory(VirtualDir dir);
58 ~SaveDataFactory(); 65 ~SaveDataFactory();
59 66
60 ResultVal<VirtualDir> Open(SaveDataSpaceId space, SaveDataDescriptor meta); 67 ResultVal<VirtualDir> Open(SaveDataSpaceId space, const SaveDataDescriptor& meta);
61 68
62 VirtualDir GetSaveDataSpaceDirectory(SaveDataSpaceId space) const; 69 VirtualDir GetSaveDataSpaceDirectory(SaveDataSpaceId space) const;
63 70
diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp
index e1a4210db..c69caae0f 100644
--- a/src/core/file_sys/submission_package.cpp
+++ b/src/core/file_sys/submission_package.cpp
@@ -143,11 +143,12 @@ std::multimap<u64, std::shared_ptr<NCA>> NSP::GetNCAsByTitleID() const {
143 return out; 143 return out;
144} 144}
145 145
146std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> NSP::GetNCAs() const { 146std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>>
147NSP::GetNCAs() const {
147 return ncas; 148 return ncas;
148} 149}
149 150
150std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type) const { 151std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type, TitleType title_type) const {
151 if (extracted) 152 if (extracted)
152 LOG_WARNING(Service_FS, "called on an NSP that is of type extracted."); 153 LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
153 154
@@ -155,14 +156,14 @@ std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type) const {
155 if (title_id_iter == ncas.end()) 156 if (title_id_iter == ncas.end())
156 return nullptr; 157 return nullptr;
157 158
158 const auto type_iter = title_id_iter->second.find(type); 159 const auto type_iter = title_id_iter->second.find({title_type, type});
159 if (type_iter == title_id_iter->second.end()) 160 if (type_iter == title_id_iter->second.end())
160 return nullptr; 161 return nullptr;
161 162
162 return type_iter->second; 163 return type_iter->second;
163} 164}
164 165
165VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type) const { 166VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type, TitleType title_type) const {
166 if (extracted) 167 if (extracted)
167 LOG_WARNING(Service_FS, "called on an NSP that is of type extracted."); 168 LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
168 const auto nca = GetNCA(title_id, type); 169 const auto nca = GetNCA(title_id, type);
@@ -240,7 +241,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
240 const CNMT cnmt(inner_file); 241 const CNMT cnmt(inner_file);
241 auto& ncas_title = ncas[cnmt.GetTitleID()]; 242 auto& ncas_title = ncas[cnmt.GetTitleID()];
242 243
243 ncas_title[ContentRecordType::Meta] = nca; 244 ncas_title[{cnmt.GetType(), ContentRecordType::Meta}] = nca;
244 for (const auto& rec : cnmt.GetContentRecords()) { 245 for (const auto& rec : cnmt.GetContentRecords()) {
245 const auto id_string = Common::HexArrayToString(rec.nca_id, false); 246 const auto id_string = Common::HexArrayToString(rec.nca_id, false);
246 const auto next_file = pfs->GetFile(fmt::format("{}.nca", id_string)); 247 const auto next_file = pfs->GetFile(fmt::format("{}.nca", id_string));
@@ -258,7 +259,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
258 if (next_nca->GetStatus() == Loader::ResultStatus::Success || 259 if (next_nca->GetStatus() == Loader::ResultStatus::Success ||
259 (next_nca->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS && 260 (next_nca->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS &&
260 (cnmt.GetTitleID() & 0x800) != 0)) { 261 (cnmt.GetTitleID() & 0x800) != 0)) {
261 ncas_title[rec.type] = std::move(next_nca); 262 ncas_title[{cnmt.GetType(), rec.type}] = std::move(next_nca);
262 } 263 }
263 } 264 }
264 265
diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h
index 9a28ed5bb..ee9b6ce17 100644
--- a/src/core/file_sys/submission_package.h
+++ b/src/core/file_sys/submission_package.h
@@ -42,9 +42,12 @@ public:
42 // Type 0 Only (Collection of NCAs + Certificate + Ticket + Meta XML) 42 // Type 0 Only (Collection of NCAs + Certificate + Ticket + Meta XML)
43 std::vector<std::shared_ptr<NCA>> GetNCAsCollapsed() const; 43 std::vector<std::shared_ptr<NCA>> GetNCAsCollapsed() const;
44 std::multimap<u64, std::shared_ptr<NCA>> GetNCAsByTitleID() const; 44 std::multimap<u64, std::shared_ptr<NCA>> GetNCAsByTitleID() const;
45 std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> GetNCAs() const; 45 std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> GetNCAs()
46 std::shared_ptr<NCA> GetNCA(u64 title_id, ContentRecordType type) const; 46 const;
47 VirtualFile GetNCAFile(u64 title_id, ContentRecordType type) const; 47 std::shared_ptr<NCA> GetNCA(u64 title_id, ContentRecordType type,
48 TitleType title_type = TitleType::Application) const;
49 VirtualFile GetNCAFile(u64 title_id, ContentRecordType type,
50 TitleType title_type = TitleType::Application) const;
48 std::vector<Core::Crypto::Key128> GetTitlekey() const; 51 std::vector<Core::Crypto::Key128> GetTitlekey() const;
49 52
50 std::vector<VirtualFile> GetFiles() const override; 53 std::vector<VirtualFile> GetFiles() const override;
@@ -67,7 +70,7 @@ private:
67 70
68 std::shared_ptr<PartitionFilesystem> pfs; 71 std::shared_ptr<PartitionFilesystem> pfs;
69 // Map title id -> {map type -> NCA} 72 // Map title id -> {map type -> NCA}
70 std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> ncas; 73 std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> ncas;
71 std::vector<VirtualFile> ticket_files; 74 std::vector<VirtualFile> ticket_files;
72 75
73 Core::Crypto::KeyManager keys; 76 Core::Crypto::KeyManager keys;
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index e3e79f40a..c9722ed77 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -6,6 +6,7 @@
6#include "core/file_sys/romfs.h" 6#include "core/file_sys/romfs.h"
7#include "core/file_sys/system_archive/ng_word.h" 7#include "core/file_sys/system_archive/ng_word.h"
8#include "core/file_sys/system_archive/system_archive.h" 8#include "core/file_sys/system_archive/system_archive.h"
9#include "core/file_sys/system_archive/system_version.h"
9 10
10namespace FileSys::SystemArchive { 11namespace FileSys::SystemArchive {
11 12
@@ -30,7 +31,7 @@ constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHI
30 {0x0100000000000806, "NgWord", &NgWord1}, 31 {0x0100000000000806, "NgWord", &NgWord1},
31 {0x0100000000000807, "SsidList", nullptr}, 32 {0x0100000000000807, "SsidList", nullptr},
32 {0x0100000000000808, "Dictionary", nullptr}, 33 {0x0100000000000808, "Dictionary", nullptr},
33 {0x0100000000000809, "SystemVersion", nullptr}, 34 {0x0100000000000809, "SystemVersion", &SystemVersion},
34 {0x010000000000080A, "AvatarImage", nullptr}, 35 {0x010000000000080A, "AvatarImage", nullptr},
35 {0x010000000000080B, "LocalNews", nullptr}, 36 {0x010000000000080B, "LocalNews", nullptr},
36 {0x010000000000080C, "Eula", nullptr}, 37 {0x010000000000080C, "Eula", nullptr},
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
new file mode 100644
index 000000000..6e22f97b0
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -0,0 +1,52 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/file_sys/system_archive/system_version.h"
6#include "core/file_sys/vfs_vector.h"
7
8namespace FileSys::SystemArchive {
9
10namespace SystemVersionData {
11
12// This section should reflect the best system version to describe yuzu's HLE api.
13// TODO(DarkLordZach): Update when HLE gets better.
14
15constexpr u8 VERSION_MAJOR = 5;
16constexpr u8 VERSION_MINOR = 1;
17constexpr u8 VERSION_MICRO = 0;
18
19constexpr u8 REVISION_MAJOR = 3;
20constexpr u8 REVISION_MINOR = 0;
21
22constexpr char PLATFORM_STRING[] = "NX";
23constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
24constexpr char DISPLAY_VERSION[] = "5.1.0";
25constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
26
27} // namespace SystemVersionData
28
29std::string GetLongDisplayVersion() {
30 return SystemVersionData::DISPLAY_TITLE;
31}
32
33VirtualDir SystemVersion() {
34 VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
35 file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
36 file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
37 file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
38 file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
39 file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
40 file->WriteArray(SystemVersionData::PLATFORM_STRING,
41 std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
42 file->WriteArray(SystemVersionData::VERSION_HASH,
43 std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
44 file->WriteArray(SystemVersionData::DISPLAY_VERSION,
45 std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
46 file->WriteArray(SystemVersionData::DISPLAY_TITLE,
47 std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
48 return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
49 std::vector<VirtualDir>{}, "data");
50}
51
52} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_version.h b/src/core/file_sys/system_archive/system_version.h
new file mode 100644
index 000000000..deed79b26
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.h
@@ -0,0 +1,16 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include "core/file_sys/vfs_types.h"
9
10namespace FileSys::SystemArchive {
11
12std::string GetLongDisplayVersion();
13
14VirtualDir SystemVersion();
15
16} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 515626658..75fc04302 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
47 if (offset + length > data.size()) 47 if (offset + length > data.size())
48 data.resize(offset + length); 48 data.resize(offset + length);
49 const auto write = std::min(length, data.size() - offset); 49 const auto write = std::min(length, data.size() - offset);
50 std::memcpy(data.data(), data_, write); 50 std::memcpy(data.data() + offset, data_, write);
51 return write; 51 return write;
52} 52}
53 53
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..1320bbe77 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -30,7 +30,7 @@ private:
30 explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {} 30 explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
31 std::tuple<float, float, bool> GetStatus() const override { 31 std::tuple<float, float, bool> GetStatus() const override {
32 if (auto state = touch_state.lock()) { 32 if (auto state = touch_state.lock()) {
33 std::lock_guard<std::mutex> guard(state->mutex); 33 std::lock_guard guard{state->mutex};
34 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed); 34 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
35 } 35 }
36 return std::make_tuple(0.0f, 0.0f, false); 36 return std::make_tuple(0.0f, 0.0f, false);
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); 67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
68} 68}
69 69
70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { 70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
71 new_x = std::max(new_x, framebuffer_layout.screen.left); 71 new_x = std::max(new_x, framebuffer_layout.screen.left);
72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1); 72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
73 73
@@ -81,7 +81,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
81 if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y)) 81 if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
82 return; 82 return;
83 83
84 std::lock_guard<std::mutex> guard(touch_state->mutex); 84 std::lock_guard guard{touch_state->mutex};
85 touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) / 85 touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
86 (framebuffer_layout.screen.right - framebuffer_layout.screen.left); 86 (framebuffer_layout.screen.right - framebuffer_layout.screen.left);
87 touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) / 87 touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
@@ -91,7 +91,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
91} 91}
92 92
93void EmuWindow::TouchReleased() { 93void EmuWindow::TouchReleased() {
94 std::lock_guard<std::mutex> guard(touch_state->mutex); 94 std::lock_guard guard{touch_state->mutex};
95 touch_state->touch_pressed = false; 95 touch_state->touch_pressed = false;
96 touch_state->touch_x = 0; 96 touch_state->touch_x = 0;
97 touch_state->touch_y = 0; 97 touch_state->touch_y = 0;
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 75c2be4ae..70a522556 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -187,7 +187,7 @@ private:
187 /** 187 /**
188 * Clip the provided coordinates to be inside the touchscreen area. 188 * Clip the provided coordinates to be inside the touchscreen area.
189 */ 189 */
190 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); 190 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
191}; 191};
192 192
193} // namespace Core::Frontend 193} // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
12 12
13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio 13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
14template <class T> 14template <class T>
15static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, 15static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
16 float screen_aspect_ratio) { 16 float screen_aspect_ratio) {
17 float scale = std::min(static_cast<float>(window_area.GetWidth()), 17 float scale = std::min(static_cast<float>(window_area.GetWidth()),
18 window_area.GetHeight() / screen_aspect_ratio); 18 window_area.GetHeight() / screen_aspect_ratio);
19 return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), 19 return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
20 static_cast<T>(std::round(scale * screen_aspect_ratio))}; 20 static_cast<T>(std::round(scale * screen_aspect_ratio))};
21} 21}
22 22
23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { 23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
31 ScreenUndocked::Width}; 31 ScreenUndocked::Width};
32 MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; 32 Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
33 MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); 33 Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
34 34
35 float window_aspect_ratio = static_cast<float>(height) / width; 35 float window_aspect_ratio = static_cast<float>(height) / width;
36 36
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
16 unsigned width{ScreenUndocked::Width}; 16 unsigned width{ScreenUndocked::Width};
17 unsigned height{ScreenUndocked::Height}; 17 unsigned height{ScreenUndocked::Height};
18 18
19 MathUtil::Rectangle<unsigned> screen; 19 Common::Rectangle<unsigned> screen;
20 20
21 /** 21 /**
22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked 22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
124 * Orientation is determined by right-hand rule. 124 * Orientation is determined by right-hand rule.
125 * Units: deg/sec 125 * Units: deg/sec
126 */ 126 */
127using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; 127using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
128 128
129/** 129/**
130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are 130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/frontend/scope_acquire_window_context.cpp b/src/core/frontend/scope_acquire_window_context.cpp
new file mode 100644
index 000000000..3663dad17
--- /dev/null
+++ b/src/core/frontend/scope_acquire_window_context.cpp
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/frontend/emu_window.h"
6#include "core/frontend/scope_acquire_window_context.h"
7
8namespace Core::Frontend {
9
10ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_)
11 : emu_window{emu_window_} {
12 emu_window.MakeCurrent();
13}
14ScopeAcquireWindowContext::~ScopeAcquireWindowContext() {
15 emu_window.DoneCurrent();
16}
17
18} // namespace Core::Frontend
diff --git a/src/core/frontend/scope_acquire_window_context.h b/src/core/frontend/scope_acquire_window_context.h
new file mode 100644
index 000000000..2d9f6e825
--- /dev/null
+++ b/src/core/frontend/scope_acquire_window_context.h
@@ -0,0 +1,23 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Core::Frontend {
10
11class EmuWindow;
12
13/// Helper class to acquire/release window context within a given scope
14class ScopeAcquireWindowContext : NonCopyable {
15public:
16 explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window);
17 ~ScopeAcquireWindowContext();
18
19private:
20 Core::Frontend::EmuWindow& emu_window;
21};
22
23} // namespace Core::Frontend
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index a1cad4fcb..afa812598 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -507,8 +507,11 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {
507 507
508 LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}", 508 LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
509 bp->second.len, bp->second.addr, static_cast<int>(type)); 509 bp->second.len, bp->second.addr, static_cast<int>(type));
510 Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size()); 510
511 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 511 if (type == BreakpointType::Execute) {
512 Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
513 Core::System::GetInstance().InvalidateCpuInstructionCaches();
514 }
512 p.erase(addr); 515 p.erase(addr);
513} 516}
514 517
@@ -1027,7 +1030,7 @@ static void Step() {
1027 1030
1028/// Tell the CPU if we hit a memory breakpoint. 1031/// Tell the CPU if we hit a memory breakpoint.
1029bool IsMemoryBreak() { 1032bool IsMemoryBreak() {
1030 if (IsConnected()) { 1033 if (!IsConnected()) {
1031 return false; 1034 return false;
1032 } 1035 }
1033 1036
@@ -1057,9 +1060,12 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
1057 breakpoint.addr = addr; 1060 breakpoint.addr = addr;
1058 breakpoint.len = len; 1061 breakpoint.len = len;
1059 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size()); 1062 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
1063
1060 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4}; 1064 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
1061 Memory::WriteBlock(addr, btrap.data(), btrap.size()); 1065 if (type == BreakpointType::Execute) {
1062 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 1066 Memory::WriteBlock(addr, btrap.data(), btrap.size());
1067 Core::System::GetInstance().InvalidateCpuInstructionCaches();
1068 }
1063 p.insert({addr, breakpoint}); 1069 p.insert({addr, breakpoint});
1064 1070
1065 LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}", 1071 LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..fae54bcc7 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
@@ -39,10 +39,10 @@ struct CommandHeader {
39 union { 39 union {
40 u32_le raw_low; 40 u32_le raw_low;
41 BitField<0, 16, CommandType> type; 41 BitField<0, 16, CommandType> type;
42 BitField<16, 4, u32_le> num_buf_x_descriptors; 42 BitField<16, 4, u32> num_buf_x_descriptors;
43 BitField<20, 4, u32_le> num_buf_a_descriptors; 43 BitField<20, 4, u32> num_buf_a_descriptors;
44 BitField<24, 4, u32_le> num_buf_b_descriptors; 44 BitField<24, 4, u32> num_buf_b_descriptors;
45 BitField<28, 4, u32_le> num_buf_w_descriptors; 45 BitField<28, 4, u32> num_buf_w_descriptors;
46 }; 46 };
47 47
48 enum class BufferDescriptorCFlag : u32 { 48 enum class BufferDescriptorCFlag : u32 {
@@ -53,28 +53,28 @@ struct CommandHeader {
53 53
54 union { 54 union {
55 u32_le raw_high; 55 u32_le raw_high;
56 BitField<0, 10, u32_le> data_size; 56 BitField<0, 10, u32> data_size;
57 BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags; 57 BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags;
58 BitField<31, 1, u32_le> enable_handle_descriptor; 58 BitField<31, 1, u32> enable_handle_descriptor;
59 }; 59 };
60}; 60};
61static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect"); 61static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect");
62 62
63union HandleDescriptorHeader { 63union HandleDescriptorHeader {
64 u32_le raw_high; 64 u32_le raw_high;
65 BitField<0, 1, u32_le> send_current_pid; 65 BitField<0, 1, u32> send_current_pid;
66 BitField<1, 4, u32_le> num_handles_to_copy; 66 BitField<1, 4, u32> num_handles_to_copy;
67 BitField<5, 4, u32_le> num_handles_to_move; 67 BitField<5, 4, u32> num_handles_to_move;
68}; 68};
69static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect"); 69static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect");
70 70
71struct BufferDescriptorX { 71struct BufferDescriptorX {
72 union { 72 union {
73 BitField<0, 6, u32_le> counter_bits_0_5; 73 BitField<0, 6, u32> counter_bits_0_5;
74 BitField<6, 3, u32_le> address_bits_36_38; 74 BitField<6, 3, u32> address_bits_36_38;
75 BitField<9, 3, u32_le> counter_bits_9_11; 75 BitField<9, 3, u32> counter_bits_9_11;
76 BitField<12, 4, u32_le> address_bits_32_35; 76 BitField<12, 4, u32> address_bits_32_35;
77 BitField<16, 16, u32_le> size; 77 BitField<16, 16, u32> size;
78 }; 78 };
79 79
80 u32_le address_bits_0_31; 80 u32_le address_bits_0_31;
@@ -103,10 +103,10 @@ struct BufferDescriptorABW {
103 u32_le address_bits_0_31; 103 u32_le address_bits_0_31;
104 104
105 union { 105 union {
106 BitField<0, 2, u32_le> flags; 106 BitField<0, 2, u32> flags;
107 BitField<2, 3, u32_le> address_bits_36_38; 107 BitField<2, 3, u32> address_bits_36_38;
108 BitField<24, 4, u32_le> size_bits_32_35; 108 BitField<24, 4, u32> size_bits_32_35;
109 BitField<28, 4, u32_le> address_bits_32_35; 109 BitField<28, 4, u32> address_bits_32_35;
110 }; 110 };
111 111
112 VAddr Address() const { 112 VAddr Address() const {
@@ -128,8 +128,8 @@ struct BufferDescriptorC {
128 u32_le address_bits_0_31; 128 u32_le address_bits_0_31;
129 129
130 union { 130 union {
131 BitField<0, 16, u32_le> address_bits_32_47; 131 BitField<0, 16, u32> address_bits_32_47;
132 BitField<16, 16, u32_le> size; 132 BitField<16, 16, u32> size;
133 }; 133 };
134 134
135 VAddr Address() const { 135 VAddr Address() const {
@@ -167,8 +167,8 @@ struct DomainMessageHeader {
167 struct { 167 struct {
168 union { 168 union {
169 BitField<0, 8, CommandType> command; 169 BitField<0, 8, CommandType> command;
170 BitField<8, 8, u32_le> input_object_count; 170 BitField<8, 8, u32> input_object_count;
171 BitField<16, 16, u32_le> size; 171 BitField<16, 16, u32> size;
172 }; 172 };
173 u32_le object_id; 173 u32_le object_id;
174 INSERT_PADDING_WORDS(2); 174 INSERT_PADDING_WORDS(2);
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 0d6c85aed..ac0e1d796 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
19#include "core/hle/kernel/hle_ipc.h" 19#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/object.h" 20#include "core/hle/kernel/object.h"
21#include "core/hle/kernel/server_session.h" 21#include "core/hle/kernel/server_session.h"
22#include "core/hle/result.h"
22 23
23namespace IPC { 24namespace IPC {
24 25
26constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
27
25class RequestHelperBase { 28class RequestHelperBase {
26protected: 29protected:
27 Kernel::HLERequestContext* context = nullptr; 30 Kernel::HLERequestContext* context = nullptr;
@@ -136,10 +139,8 @@ public:
136 context->AddDomainObject(std::move(iface)); 139 context->AddDomainObject(std::move(iface));
137 } else { 140 } else {
138 auto& kernel = Core::System::GetInstance().Kernel(); 141 auto& kernel = Core::System::GetInstance().Kernel();
139 auto sessions = 142 auto [server, client] =
140 Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName()); 143 Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
141 auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
142 auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
143 iface->ClientConnected(server); 144 iface->ClientConnected(server);
144 context->AddMoveObject(std::move(client)); 145 context->AddMoveObject(std::move(client));
145 } 146 }
@@ -217,6 +218,11 @@ private:
217/// Push /// 218/// Push ///
218 219
219template <> 220template <>
221inline void ResponseBuilder::Push(s32 value) {
222 cmdbuf[index++] = static_cast<u32>(value);
223}
224
225template <>
220inline void ResponseBuilder::Push(u32 value) { 226inline void ResponseBuilder::Push(u32 value) {
221 cmdbuf[index++] = value; 227 cmdbuf[index++] = value;
222} 228}
@@ -235,6 +241,22 @@ inline void ResponseBuilder::Push(ResultCode value) {
235} 241}
236 242
237template <> 243template <>
244inline void ResponseBuilder::Push(s8 value) {
245 PushRaw(value);
246}
247
248template <>
249inline void ResponseBuilder::Push(s16 value) {
250 PushRaw(value);
251}
252
253template <>
254inline void ResponseBuilder::Push(s64 value) {
255 Push(static_cast<u32>(value));
256 Push(static_cast<u32>(value >> 32));
257}
258
259template <>
238inline void ResponseBuilder::Push(u8 value) { 260inline void ResponseBuilder::Push(u8 value) {
239 PushRaw(value); 261 PushRaw(value);
240} 262}
@@ -251,6 +273,20 @@ inline void ResponseBuilder::Push(u64 value) {
251} 273}
252 274
253template <> 275template <>
276inline void ResponseBuilder::Push(float value) {
277 u32 integral;
278 std::memcpy(&integral, &value, sizeof(u32));
279 Push(integral);
280}
281
282template <>
283inline void ResponseBuilder::Push(double value) {
284 u64 integral;
285 std::memcpy(&integral, &value, sizeof(u64));
286 Push(integral);
287}
288
289template <>
254inline void ResponseBuilder::Push(bool value) { 290inline void ResponseBuilder::Push(bool value) {
255 Push(static_cast<u8>(value)); 291 Push(static_cast<u8>(value));
256} 292}
@@ -329,7 +365,7 @@ public:
329 template <class T> 365 template <class T>
330 std::shared_ptr<T> PopIpcInterface() { 366 std::shared_ptr<T> PopIpcInterface() {
331 ASSERT(context->Session()->IsDomain()); 367 ASSERT(context->Session()->IsDomain());
332 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 368 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
333 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 369 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
334 } 370 }
335}; 371};
@@ -341,6 +377,11 @@ inline u32 RequestParser::Pop() {
341 return cmdbuf[index++]; 377 return cmdbuf[index++];
342} 378}
343 379
380template <>
381inline s32 RequestParser::Pop() {
382 return static_cast<s32>(Pop<u32>());
383}
384
344template <typename T> 385template <typename T>
345void RequestParser::PopRaw(T& value) { 386void RequestParser::PopRaw(T& value) {
346 std::memcpy(&value, cmdbuf + index, sizeof(T)); 387 std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -372,11 +413,37 @@ inline u64 RequestParser::Pop() {
372} 413}
373 414
374template <> 415template <>
416inline s8 RequestParser::Pop() {
417 return static_cast<s8>(Pop<u8>());
418}
419
420template <>
421inline s16 RequestParser::Pop() {
422 return static_cast<s16>(Pop<u16>());
423}
424
425template <>
375inline s64 RequestParser::Pop() { 426inline s64 RequestParser::Pop() {
376 return static_cast<s64>(Pop<u64>()); 427 return static_cast<s64>(Pop<u64>());
377} 428}
378 429
379template <> 430template <>
431inline float RequestParser::Pop() {
432 const u32 value = Pop<u32>();
433 float real;
434 std::memcpy(&real, &value, sizeof(real));
435 return real;
436}
437
438template <>
439inline double RequestParser::Pop() {
440 const u64 value = Pop<u64>();
441 float real;
442 std::memcpy(&real, &value, sizeof(real));
443 return real;
444}
445
446template <>
380inline bool RequestParser::Pop() { 447inline bool RequestParser::Pop() {
381 return Pop<u8>() != 0; 448 return Pop<u8>() != 0;
382} 449}
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 57157beb4..c8842410b 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_cpu.h" 11#include "core/core_cpu.h"
12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -18,58 +19,15 @@
18#include "core/memory.h" 19#include "core/memory.h"
19 20
20namespace Kernel { 21namespace Kernel {
21namespace AddressArbiter { 22namespace {
22
23// Performs actual address waiting logic.
24static ResultCode WaitForAddress(VAddr address, s64 timeout) {
25 SharedPtr<Thread> current_thread = GetCurrentThread();
26 current_thread->SetArbiterWaitAddress(address);
27 current_thread->SetStatus(ThreadStatus::WaitArb);
28 current_thread->InvalidateWakeupCallback();
29
30 current_thread->WakeAfterDelay(timeout);
31
32 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
33 return RESULT_TIMEOUT;
34}
35
36// Gets the threads waiting on an address.
37static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
38 const auto RetrieveWaitingThreads = [](std::size_t core_index,
39 std::vector<SharedPtr<Thread>>& waiting_threads,
40 VAddr arb_addr) {
41 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
42 const auto& thread_list = scheduler.GetThreadList();
43
44 for (const auto& thread : thread_list) {
45 if (thread->GetArbiterWaitAddress() == arb_addr)
46 waiting_threads.push_back(thread);
47 }
48 };
49
50 // Retrieve all threads that are waiting for this address.
51 std::vector<SharedPtr<Thread>> threads;
52 RetrieveWaitingThreads(0, threads, address);
53 RetrieveWaitingThreads(1, threads, address);
54 RetrieveWaitingThreads(2, threads, address);
55 RetrieveWaitingThreads(3, threads, address);
56
57 // Sort them by priority, such that the highest priority ones come first.
58 std::sort(threads.begin(), threads.end(),
59 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
60 return lhs->GetPriority() < rhs->GetPriority();
61 });
62
63 return threads;
64}
65
66// Wake up num_to_wake (or all) threads in a vector. 23// Wake up num_to_wake (or all) threads in a vector.
67static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { 24void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
68 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 25 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
69 // them all. 26 // them all.
70 std::size_t last = waiting_threads.size(); 27 std::size_t last = waiting_threads.size();
71 if (num_to_wake > 0) 28 if (num_to_wake > 0) {
72 last = num_to_wake; 29 last = std::min(last, static_cast<std::size_t>(num_to_wake));
30 }
73 31
74 // Signal the waiting threads. 32 // Signal the waiting threads.
75 for (std::size_t i = 0; i < last; i++) { 33 for (std::size_t i = 0; i < last; i++) {
@@ -79,88 +37,114 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
79 waiting_threads[i]->ResumeFromWait(); 37 waiting_threads[i]->ResumeFromWait();
80 } 38 }
81} 39}
40} // Anonymous namespace
41
42AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
43AddressArbiter::~AddressArbiter() = default;
44
45ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
46 s32 num_to_wake) {
47 switch (type) {
48 case SignalType::Signal:
49 return SignalToAddressOnly(address, num_to_wake);
50 case SignalType::IncrementAndSignalIfEqual:
51 return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
52 case SignalType::ModifyByWaitingCountAndSignalIfEqual:
53 return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
54 default:
55 return ERR_INVALID_ENUM_VALUE;
56 }
57}
82 58
83// Signals an address being waited on. 59ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
84ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { 60 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
85 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
86
87 WakeThreads(waiting_threads, num_to_wake); 61 WakeThreads(waiting_threads, num_to_wake);
88 return RESULT_SUCCESS; 62 return RESULT_SUCCESS;
89} 63}
90 64
91// Signals an address being waited on and increments its value if equal to the value argument. 65ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
92ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { 66 s32 num_to_wake) {
93 // Ensure that we can write to the address. 67 // Ensure that we can write to the address.
94 if (!Memory::IsValidVirtualAddress(address)) { 68 if (!Memory::IsValidVirtualAddress(address)) {
95 return ERR_INVALID_ADDRESS_STATE; 69 return ERR_INVALID_ADDRESS_STATE;
96 } 70 }
97 71
98 if (static_cast<s32>(Memory::Read32(address)) == value) { 72 if (static_cast<s32>(Memory::Read32(address)) != value) {
99 Memory::Write32(address, static_cast<u32>(value + 1));
100 } else {
101 return ERR_INVALID_STATE; 73 return ERR_INVALID_STATE;
102 } 74 }
103 75
104 return SignalToAddress(address, num_to_wake); 76 Memory::Write32(address, static_cast<u32>(value + 1));
77 return SignalToAddressOnly(address, num_to_wake);
105} 78}
106 79
107// Signals an address being waited on and modifies its value based on waiting thread count if equal 80ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
108// to the value argument. 81 s32 num_to_wake) {
109ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
110 s32 num_to_wake) {
111 // Ensure that we can write to the address. 82 // Ensure that we can write to the address.
112 if (!Memory::IsValidVirtualAddress(address)) { 83 if (!Memory::IsValidVirtualAddress(address)) {
113 return ERR_INVALID_ADDRESS_STATE; 84 return ERR_INVALID_ADDRESS_STATE;
114 } 85 }
115 86
116 // Get threads waiting on the address. 87 // Get threads waiting on the address.
117 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); 88 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
118 89
119 // Determine the modified value depending on the waiting count. 90 // Determine the modified value depending on the waiting count.
120 s32 updated_value; 91 s32 updated_value;
121 if (waiting_threads.empty()) { 92 if (waiting_threads.empty()) {
122 updated_value = value - 1;
123 } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
124 updated_value = value + 1; 93 updated_value = value + 1;
94 } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
95 updated_value = value - 1;
125 } else { 96 } else {
126 updated_value = value; 97 updated_value = value;
127 } 98 }
128 99
129 if (static_cast<s32>(Memory::Read32(address)) == value) { 100 if (static_cast<s32>(Memory::Read32(address)) != value) {
130 Memory::Write32(address, static_cast<u32>(updated_value));
131 } else {
132 return ERR_INVALID_STATE; 101 return ERR_INVALID_STATE;
133 } 102 }
134 103
104 Memory::Write32(address, static_cast<u32>(updated_value));
135 WakeThreads(waiting_threads, num_to_wake); 105 WakeThreads(waiting_threads, num_to_wake);
136 return RESULT_SUCCESS; 106 return RESULT_SUCCESS;
137} 107}
138 108
139// Waits on an address if the value passed is less than the argument value, optionally decrementing. 109ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
140ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { 110 s64 timeout_ns) {
111 switch (type) {
112 case ArbitrationType::WaitIfLessThan:
113 return WaitForAddressIfLessThan(address, value, timeout_ns, false);
114 case ArbitrationType::DecrementAndWaitIfLessThan:
115 return WaitForAddressIfLessThan(address, value, timeout_ns, true);
116 case ArbitrationType::WaitIfEqual:
117 return WaitForAddressIfEqual(address, value, timeout_ns);
118 default:
119 return ERR_INVALID_ENUM_VALUE;
120 }
121}
122
123ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
124 bool should_decrement) {
141 // Ensure that we can read the address. 125 // Ensure that we can read the address.
142 if (!Memory::IsValidVirtualAddress(address)) { 126 if (!Memory::IsValidVirtualAddress(address)) {
143 return ERR_INVALID_ADDRESS_STATE; 127 return ERR_INVALID_ADDRESS_STATE;
144 } 128 }
145 129
146 s32 cur_value = static_cast<s32>(Memory::Read32(address)); 130 const s32 cur_value = static_cast<s32>(Memory::Read32(address));
147 if (cur_value < value) { 131 if (cur_value >= value) {
148 if (should_decrement) {
149 Memory::Write32(address, static_cast<u32>(cur_value - 1));
150 }
151 } else {
152 return ERR_INVALID_STATE; 132 return ERR_INVALID_STATE;
153 } 133 }
134
135 if (should_decrement) {
136 Memory::Write32(address, static_cast<u32>(cur_value - 1));
137 }
138
154 // Short-circuit without rescheduling, if timeout is zero. 139 // Short-circuit without rescheduling, if timeout is zero.
155 if (timeout == 0) { 140 if (timeout == 0) {
156 return RESULT_TIMEOUT; 141 return RESULT_TIMEOUT;
157 } 142 }
158 143
159 return WaitForAddress(address, timeout); 144 return WaitForAddressImpl(address, timeout);
160} 145}
161 146
162// Waits on an address if the value passed is equal to the argument value. 147ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
164 // Ensure that we can read the address. 148 // Ensure that we can read the address.
165 if (!Memory::IsValidVirtualAddress(address)) { 149 if (!Memory::IsValidVirtualAddress(address)) {
166 return ERR_INVALID_ADDRESS_STATE; 150 return ERR_INVALID_ADDRESS_STATE;
@@ -174,7 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
174 return RESULT_TIMEOUT; 158 return RESULT_TIMEOUT;
175 } 159 }
176 160
177 return WaitForAddress(address, timeout); 161 return WaitForAddressImpl(address, timeout);
162}
163
164ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
165 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
166 current_thread->SetArbiterWaitAddress(address);
167 current_thread->SetStatus(ThreadStatus::WaitArb);
168 current_thread->InvalidateWakeupCallback();
169
170 current_thread->WakeAfterDelay(timeout);
171
172 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
173 return RESULT_TIMEOUT;
174}
175
176std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
177 const auto RetrieveWaitingThreads = [this](std::size_t core_index,
178 std::vector<SharedPtr<Thread>>& waiting_threads,
179 VAddr arb_addr) {
180 const auto& scheduler = system.Scheduler(core_index);
181 const auto& thread_list = scheduler.GetThreadList();
182
183 for (const auto& thread : thread_list) {
184 if (thread->GetArbiterWaitAddress() == arb_addr) {
185 waiting_threads.push_back(thread);
186 }
187 }
188 };
189
190 // Retrieve all threads that are waiting for this address.
191 std::vector<SharedPtr<Thread>> threads;
192 RetrieveWaitingThreads(0, threads, address);
193 RetrieveWaitingThreads(1, threads, address);
194 RetrieveWaitingThreads(2, threads, address);
195 RetrieveWaitingThreads(3, threads, address);
196
197 // Sort them by priority, such that the highest priority ones come first.
198 std::sort(threads.begin(), threads.end(),
199 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
200 return lhs->GetPriority() < rhs->GetPriority();
201 });
202
203 return threads;
178} 204}
179} // namespace AddressArbiter
180} // namespace Kernel 205} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index e3657b8e9..ed0d0e69f 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,31 +4,77 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/object.h"
8 11
9union ResultCode; 12union ResultCode;
10 13
14namespace Core {
15class System;
16}
17
11namespace Kernel { 18namespace Kernel {
12 19
13namespace AddressArbiter { 20class Thread;
14enum class ArbitrationType {
15 WaitIfLessThan = 0,
16 DecrementAndWaitIfLessThan = 1,
17 WaitIfEqual = 2,
18};
19 21
20enum class SignalType { 22class AddressArbiter {
21 Signal = 0, 23public:
22 IncrementAndSignalIfEqual = 1, 24 enum class ArbitrationType {
23 ModifyByWaitingCountAndSignalIfEqual = 2, 25 WaitIfLessThan = 0,
24}; 26 DecrementAndWaitIfLessThan = 1,
27 WaitIfEqual = 2,
28 };
29
30 enum class SignalType {
31 Signal = 0,
32 IncrementAndSignalIfEqual = 1,
33 ModifyByWaitingCountAndSignalIfEqual = 2,
34 };
35
36 explicit AddressArbiter(Core::System& system);
37 ~AddressArbiter();
38
39 AddressArbiter(const AddressArbiter&) = delete;
40 AddressArbiter& operator=(const AddressArbiter&) = delete;
41
42 AddressArbiter(AddressArbiter&&) = default;
43 AddressArbiter& operator=(AddressArbiter&&) = delete;
44
45 /// Signals an address being waited on with a particular signaling type.
46 ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
25 47
26ResultCode SignalToAddress(VAddr address, s32 num_to_wake); 48 /// Waits on an address with a particular arbitration type.
27ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); 49 ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
28ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
29 50
30ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); 51private:
31ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 52 /// Signals an address being waited on.
32} // namespace AddressArbiter 53 ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
54
55 /// Signals an address being waited on and increments its value if equal to the value argument.
56 ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
57
58 /// Signals an address being waited on and modifies its value based on waiting thread count if
59 /// equal to the value argument.
60 ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
61 s32 num_to_wake);
62
63 /// Waits on an address if the value passed is less than the argument value,
64 /// optionally decrementing.
65 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
66 bool should_decrement);
67
68 /// Waits on an address if the value passed is equal to the argument value.
69 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
70
71 // Waits on the given address with a timeout in nanoseconds
72 ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
73
74 // Gets the threads waiting on an address.
75 std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
76
77 Core::System& system;
78};
33 79
34} // namespace Kernel 80} // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index d4c91d529..744b1697d 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -2,8 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <tuple>
6
7#include "core/hle/kernel/client_port.h" 5#include "core/hle/kernel/client_port.h"
8#include "core/hle/kernel/client_session.h" 6#include "core/hle/kernel/client_session.h"
9#include "core/hle/kernel/errors.h" 7#include "core/hle/kernel/errors.h"
@@ -31,17 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
31 active_sessions++; 29 active_sessions++;
32 30
33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler. 31 // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); 32 auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
35 33
36 if (server_port->hle_handler) 34 if (server_port->HasHLEHandler()) {
37 server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); 35 server_port->GetHLEHandler()->ClientConnected(server);
38 else 36 } else {
39 server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions)); 37 server_port->AppendPendingSession(server);
38 }
40 39
41 // Wake the threads waiting on the ServerPort 40 // Wake the threads waiting on the ServerPort
42 server_port->WakeupAllWaitingThreads(); 41 server_port->WakeupAllWaitingThreads();
43 42
44 return MakeResult(std::get<SharedPtr<ClientSession>>(sessions)); 43 return MakeResult(client);
45} 44}
46 45
47void ClientPort::ConnectionClosed() { 46void ClientPort::ConnectionClosed() {
diff --git a/src/core/hle/kernel/client_port.h b/src/core/hle/kernel/client_port.h
index 6cd607206..4921ad4f0 100644
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -25,7 +25,7 @@ public:
25 return name; 25 return name;
26 } 26 }
27 27
28 static const HandleType HANDLE_TYPE = HandleType::ClientPort; 28 static constexpr HandleType HANDLE_TYPE = HandleType::ClientPort;
29 HandleType GetHandleType() const override { 29 HandleType GetHandleType() const override {
30 return HANDLE_TYPE; 30 return HANDLE_TYPE;
31 } 31 }
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..09cdff588 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -29,21 +29,22 @@ public:
29 return name; 29 return name;
30 } 30 }
31 31
32 static const HandleType HANDLE_TYPE = HandleType::ClientSession; 32 static constexpr HandleType HANDLE_TYPE = HandleType::ClientSession;
33 HandleType GetHandleType() const override { 33 HandleType GetHandleType() const override {
34 return HANDLE_TYPE; 34 return HANDLE_TYPE;
35 } 35 }
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.cpp b/src/core/hle/kernel/code_set.cpp
new file mode 100644
index 000000000..1f434e9af
--- /dev/null
+++ b/src/core/hle/kernel/code_set.cpp
@@ -0,0 +1,12 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/kernel/code_set.h"
6
7namespace Kernel {
8
9CodeSet::CodeSet() = default;
10CodeSet::~CodeSet() = default;
11
12} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
new file mode 100644
index 000000000..879957dcb
--- /dev/null
+++ b/src/core/hle/kernel/code_set.h
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace Kernel {
13
14/**
15 * Represents executable data that may be loaded into a kernel process.
16 *
17 * A code set consists of three basic segments:
18 * - A code (AKA text) segment,
19 * - A read-only data segment (rodata)
20 * - A data segment
21 *
22 * The code segment is the portion of the object file that contains
23 * executable instructions.
24 *
25 * The read-only data segment in the portion of the object file that
26 * contains (as one would expect) read-only data, such as fixed constant
27 * values and data structures.
28 *
29 * The data segment is similar to the read-only data segment -- it contains
30 * variables and data structures that have predefined values, however,
31 * entities within this segment can be modified.
32 */
33struct CodeSet final {
34 /// A single segment within a code set.
35 struct Segment final {
36 /// The byte offset that this segment is located at.
37 std::size_t offset = 0;
38
39 /// The address to map this segment to.
40 VAddr addr = 0;
41
42 /// The size of this segment in bytes.
43 u32 size = 0;
44 };
45
46 explicit CodeSet();
47 ~CodeSet();
48
49 CodeSet(const CodeSet&) = delete;
50 CodeSet& operator=(const CodeSet&) = delete;
51
52 CodeSet(CodeSet&&) = default;
53 CodeSet& operator=(CodeSet&&) = default;
54
55 Segment& CodeSegment() {
56 return segments[0];
57 }
58
59 const Segment& CodeSegment() const {
60 return segments[0];
61 }
62
63 Segment& RODataSegment() {
64 return segments[1];
65 }
66
67 const Segment& RODataSegment() const {
68 return segments[1];
69 }
70
71 Segment& DataSegment() {
72 return segments[2];
73 }
74
75 const Segment& DataSegment() const {
76 return segments[2];
77 }
78
79 /// The overall data that backs this code set.
80 std::vector<u8> memory;
81
82 /// The segments that comprise this code set.
83 std::array<Segment, 3> segments;
84
85 /// The entry point address for this code set.
86 VAddr entrypoint = 0;
87};
88
89} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
17constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; 18constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
18constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; 19constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
19constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; 20constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
16constexpr u16 GetSlot(Handle handle) { 16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15; 17 return static_cast<u16>(handle >> 15);
18} 18}
19 19
20constexpr u16 GetGeneration(Handle handle) { 20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF; 21 return static_cast<u16>(handle & 0x7FFF);
22} 22}
23} // Anonymous namespace 23} // Anonymous namespace
24 24
25HandleTable::HandleTable() { 25HandleTable::HandleTable() {
26 next_generation = 1;
27 Clear(); 26 Clear();
28} 27}
29 28
30HandleTable::~HandleTable() = default; 29HandleTable::~HandleTable() = default;
31 30
31ResultCode HandleTable::SetSize(s32 handle_table_size) {
32 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
33 return ERR_OUT_OF_MEMORY;
34 }
35
36 // Values less than or equal to zero indicate to use the maximum allowable
37 // size for the handle table in the actual kernel, so we ignore the given
38 // value in that case, since we assume this by default unless this function
39 // is called.
40 if (handle_table_size > 0) {
41 table_size = static_cast<u16>(handle_table_size);
42 }
43
44 return RESULT_SUCCESS;
45}
46
32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 47ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
33 DEBUG_ASSERT(obj != nullptr); 48 DEBUG_ASSERT(obj != nullptr);
34 49
35 u16 slot = next_free_slot; 50 const u16 slot = next_free_slot;
36 if (slot >= generations.size()) { 51 if (slot >= table_size) {
37 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 52 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
38 return ERR_HANDLE_TABLE_FULL; 53 return ERR_HANDLE_TABLE_FULL;
39 } 54 }
40 next_free_slot = generations[slot]; 55 next_free_slot = generations[slot];
41 56
42 u16 generation = next_generation++; 57 const u16 generation = next_generation++;
43 58
44 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. 59 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
45 // Horizon OS uses zero to represent an invalid handle, so skip to 1. 60 // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
64} 79}
65 80
66ResultCode HandleTable::Close(Handle handle) { 81ResultCode HandleTable::Close(Handle handle) {
67 if (!IsValid(handle)) 82 if (!IsValid(handle)) {
68 return ERR_INVALID_HANDLE; 83 return ERR_INVALID_HANDLE;
84 }
69 85
70 u16 slot = GetSlot(handle); 86 const u16 slot = GetSlot(handle);
71 87
72 objects[slot] = nullptr; 88 objects[slot] = nullptr;
73 89
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
77} 93}
78 94
79bool HandleTable::IsValid(Handle handle) const { 95bool HandleTable::IsValid(Handle handle) const {
80 std::size_t slot = GetSlot(handle); 96 const std::size_t slot = GetSlot(handle);
81 u16 generation = GetGeneration(handle); 97 const u16 generation = GetGeneration(handle);
82 98
83 return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; 99 return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
84} 100}
85 101
86SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { 102SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
97} 113}
98 114
99void HandleTable::Clear() { 115void HandleTable::Clear() {
100 for (u16 i = 0; i < MAX_COUNT; ++i) { 116 for (u16 i = 0; i < table_size; ++i) {
101 generations[i] = i + 1; 117 generations[i] = i + 1;
102 objects[i] = nullptr; 118 objects[i] = nullptr;
103 } 119 }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
50 ~HandleTable(); 50 ~HandleTable();
51 51
52 /** 52 /**
53 * Sets the number of handles that may be in use at one time
54 * for this handle table.
55 *
56 * @param handle_table_size The desired size to limit the handle table to.
57 *
58 * @returns an error code indicating if initialization was successful.
59 * If initialization was not successful, then ERR_OUT_OF_MEMORY
60 * will be returned.
61 *
62 * @pre handle_table_size must be within the range [0, 1024]
63 */
64 ResultCode SetSize(s32 handle_table_size);
65
66 /**
53 * Allocates a handle for the given object. 67 * Allocates a handle for the given object.
54 * @return The created Handle or one of the following errors: 68 * @return The created Handle or one of the following errors:
55 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. 69 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
104 std::array<u16, MAX_COUNT> generations; 118 std::array<u16, MAX_COUNT> generations;
105 119
106 /** 120 /**
121 * The limited size of the handle table. This can be specified by process
122 * capabilities in order to restrict the overall number of handles that
123 * can be created in a process instance
124 */
125 u16 table_size = static_cast<u16>(MAX_COUNT);
126
127 /**
107 * Global counter of the number of created handles. Stored in `generations` when a handle is 128 * Global counter of the number of created handles. Stored in `generations` when a handle is
108 * created, and wraps around to 1 when it hits 0x8000. 129 * created, and wraps around to 1 when it hits 0x8000.
109 */ 130 */
110 u16 next_generation; 131 u16 next_generation = 1;
111 132
112 /// Head of the free slots linked list. 133 /// Head of the free slots linked list.
113 u16 next_free_slot; 134 u16 next_free_slot = 0;
114}; 135};
115 136
116} // namespace Kernel 137} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 67674cd47..4d58e7c69 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,15 +12,16 @@
12 12
13#include "core/core.h" 13#include "core/core.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/client_port.h" 16#include "core/hle/kernel/client_port.h"
16#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
18#include "core/hle/kernel/process.h" 19#include "core/hle/kernel/process.h"
19#include "core/hle/kernel/resource_limit.h" 20#include "core/hle/kernel/resource_limit.h"
20#include "core/hle/kernel/thread.h" 21#include "core/hle/kernel/thread.h"
21#include "core/hle/kernel/timer.h"
22#include "core/hle/lock.h" 22#include "core/hle/lock.h"
23#include "core/hle/result.h" 23#include "core/hle/result.h"
24#include "core/memory.h"
24 25
25namespace Kernel { 26namespace Kernel {
26 27
@@ -29,12 +30,12 @@ namespace Kernel {
29 * @param thread_handle The handle of the thread that's been awoken 30 * @param thread_handle The handle of the thread that's been awoken
30 * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time 31 * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
31 */ 32 */
32static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) { 33static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
33 const auto proper_handle = static_cast<Handle>(thread_handle); 34 const auto proper_handle = static_cast<Handle>(thread_handle);
34 const auto& system = Core::System::GetInstance(); 35 const auto& system = Core::System::GetInstance();
35 36
36 // Lock the global kernel mutex when we enter the kernel HLE. 37 // Lock the global kernel mutex when we enter the kernel HLE.
37 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 38 std::lock_guard lock{HLE::g_hle_lock};
38 39
39 SharedPtr<Thread> thread = 40 SharedPtr<Thread> thread =
40 system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle); 41 system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
@@ -62,7 +63,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
62 63
63 if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 || 64 if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
64 thread->GetWaitHandle() != 0) { 65 thread->GetWaitHandle() != 0) {
65 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 66 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
67 thread->GetStatus() == ThreadStatus::WaitCondVar);
66 thread->SetMutexWaitAddress(0); 68 thread->SetMutexWaitAddress(0);
67 thread->SetCondVarWaitAddress(0); 69 thread->SetCondVarWaitAddress(0);
68 thread->SetWaitHandle(0); 70 thread->SetWaitHandle(0);
@@ -86,27 +88,14 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
86 } 88 }
87} 89}
88 90
89/// The timer callback event, called when a timer is fired
90static void TimerCallback(u64 timer_handle, int cycles_late) {
91 const auto proper_handle = static_cast<Handle>(timer_handle);
92 const auto& system = Core::System::GetInstance();
93 SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);
94
95 if (timer == nullptr) {
96 LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle);
97 return;
98 }
99
100 timer->Signal(cycles_late);
101}
102
103struct KernelCore::Impl { 91struct KernelCore::Impl {
92 explicit Impl(Core::System& system) : system{system} {}
93
104 void Initialize(KernelCore& kernel) { 94 void Initialize(KernelCore& kernel) {
105 Shutdown(); 95 Shutdown();
106 96
107 InitializeSystemResourceLimit(kernel); 97 InitializeSystemResourceLimit(kernel);
108 InitializeThreads(); 98 InitializeThreads();
109 InitializeTimers();
110 } 99 }
111 100
112 void Shutdown() { 101 void Shutdown() {
@@ -122,15 +111,12 @@ struct KernelCore::Impl {
122 thread_wakeup_callback_handle_table.Clear(); 111 thread_wakeup_callback_handle_table.Clear();
123 thread_wakeup_event_type = nullptr; 112 thread_wakeup_event_type = nullptr;
124 113
125 timer_callback_handle_table.Clear();
126 timer_callback_event_type = nullptr;
127
128 named_ports.clear(); 114 named_ports.clear();
129 } 115 }
130 116
131 // Creates the default system resource limit 117 // Creates the default system resource limit
132 void InitializeSystemResourceLimit(KernelCore& kernel) { 118 void InitializeSystemResourceLimit(KernelCore& kernel) {
133 system_resource_limit = ResourceLimit::Create(kernel, "System"); 119 system_resource_limit = ResourceLimit::Create(kernel);
134 120
135 // If setting the default system values fails, then something seriously wrong has occurred. 121 // If setting the default system values fails, then something seriously wrong has occurred.
136 ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000) 122 ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000)
@@ -143,12 +129,7 @@ struct KernelCore::Impl {
143 129
144 void InitializeThreads() { 130 void InitializeThreads() {
145 thread_wakeup_event_type = 131 thread_wakeup_event_type =
146 CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); 132 system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
147 }
148
149 void InitializeTimers() {
150 timer_callback_handle_table.Clear();
151 timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback);
152 } 133 }
153 134
154 std::atomic<u32> next_object_id{0}; 135 std::atomic<u32> next_object_id{0};
@@ -161,13 +142,7 @@ struct KernelCore::Impl {
161 142
162 SharedPtr<ResourceLimit> system_resource_limit; 143 SharedPtr<ResourceLimit> system_resource_limit;
163 144
164 /// The event type of the generic timer callback event 145 Core::Timing::EventType* thread_wakeup_event_type = nullptr;
165 CoreTiming::EventType* timer_callback_event_type = nullptr;
166 // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future,
167 // allowing us to simply use a pool index or similar.
168 Kernel::HandleTable timer_callback_handle_table;
169
170 CoreTiming::EventType* thread_wakeup_event_type = nullptr;
171 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, 146 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
172 // allowing us to simply use a pool index or similar. 147 // allowing us to simply use a pool index or similar.
173 Kernel::HandleTable thread_wakeup_callback_handle_table; 148 Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -175,9 +150,12 @@ struct KernelCore::Impl {
175 /// Map of named ports managed by the kernel, which can be retrieved using 150 /// Map of named ports managed by the kernel, which can be retrieved using
176 /// the ConnectToPort SVC. 151 /// the ConnectToPort SVC.
177 NamedPortTable named_ports; 152 NamedPortTable named_ports;
153
154 // System context
155 Core::System& system;
178}; 156};
179 157
180KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} 158KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
181KernelCore::~KernelCore() { 159KernelCore::~KernelCore() {
182 Shutdown(); 160 Shutdown();
183} 161}
@@ -198,16 +176,13 @@ SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle
198 return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle); 176 return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle);
199} 177}
200 178
201SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const {
202 return impl->timer_callback_handle_table.Get<Timer>(handle);
203}
204
205void KernelCore::AppendNewProcess(SharedPtr<Process> process) { 179void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
206 impl->process_list.push_back(std::move(process)); 180 impl->process_list.push_back(std::move(process));
207} 181}
208 182
209void KernelCore::MakeCurrentProcess(Process* process) { 183void KernelCore::MakeCurrentProcess(Process* process) {
210 impl->current_process = process; 184 impl->current_process = process;
185 Memory::SetCurrentPageTable(&process->VMManager().page_table);
211} 186}
212 187
213Process* KernelCore::CurrentProcess() { 188Process* KernelCore::CurrentProcess() {
@@ -218,6 +193,10 @@ const Process* KernelCore::CurrentProcess() const {
218 return impl->current_process; 193 return impl->current_process;
219} 194}
220 195
196const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
197 return impl->process_list;
198}
199
221void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) { 200void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
222 impl->named_ports.emplace(std::move(name), std::move(port)); 201 impl->named_ports.emplace(std::move(name), std::move(port));
223} 202}
@@ -247,18 +226,10 @@ u64 KernelCore::CreateNewProcessID() {
247 return impl->next_process_id++; 226 return impl->next_process_id++;
248} 227}
249 228
250ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) { 229Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
251 return impl->timer_callback_handle_table.Create(timer);
252}
253
254CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
255 return impl->thread_wakeup_event_type; 230 return impl->thread_wakeup_event_type;
256} 231}
257 232
258CoreTiming::EventType* KernelCore::TimerCallbackEventType() const {
259 return impl->timer_callback_event_type;
260}
261
262Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() { 233Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() {
263 return impl->thread_wakeup_callback_handle_table; 234 return impl->thread_wakeup_callback_handle_table;
264} 235}
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 58c9d108b..6b8738599 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -8,21 +8,23 @@
8#include <unordered_map> 8#include <unordered_map>
9#include "core/hle/kernel/object.h" 9#include "core/hle/kernel/object.h"
10 10
11template <typename T> 11namespace Core {
12class ResultVal; 12class System;
13}
13 14
14namespace CoreTiming { 15namespace Core::Timing {
16class CoreTiming;
15struct EventType; 17struct EventType;
16} 18} // namespace Core::Timing
17 19
18namespace Kernel { 20namespace Kernel {
19 21
22class AddressArbiter;
20class ClientPort; 23class ClientPort;
21class HandleTable; 24class HandleTable;
22class Process; 25class Process;
23class ResourceLimit; 26class ResourceLimit;
24class Thread; 27class Thread;
25class Timer;
26 28
27/// Represents a single instance of the kernel. 29/// Represents a single instance of the kernel.
28class KernelCore { 30class KernelCore {
@@ -30,7 +32,14 @@ private:
30 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; 32 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
31 33
32public: 34public:
33 KernelCore(); 35 /// Constructs an instance of the kernel using the given System
36 /// instance as a context for any necessary system-related state,
37 /// such as threads, CPU core state, etc.
38 ///
39 /// @post After execution of the constructor, the provided System
40 /// object *must* outlive the kernel instance itself.
41 ///
42 explicit KernelCore(Core::System& system);
34 ~KernelCore(); 43 ~KernelCore();
35 44
36 KernelCore(const KernelCore&) = delete; 45 KernelCore(const KernelCore&) = delete;
@@ -51,9 +60,6 @@ public:
51 /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. 60 /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
52 SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const; 61 SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const;
53 62
54 /// Retrieves a shared pointer to a Timer instance within the timer callback handle table.
55 SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const;
56
57 /// Adds the given shared pointer to an internal list of active processes. 63 /// Adds the given shared pointer to an internal list of active processes.
58 void AppendNewProcess(SharedPtr<Process> process); 64 void AppendNewProcess(SharedPtr<Process> process);
59 65
@@ -66,6 +72,9 @@ public:
66 /// Retrieves a const pointer to the current process. 72 /// Retrieves a const pointer to the current process.
67 const Process* CurrentProcess() const; 73 const Process* CurrentProcess() const;
68 74
75 /// Retrieves the list of processes.
76 const std::vector<SharedPtr<Process>>& GetProcessList() const;
77
69 /// Adds a port to the named port table 78 /// Adds a port to the named port table
70 void AddNamedPort(std::string name, SharedPtr<ClientPort> port); 79 void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
71 80
@@ -82,7 +91,6 @@ private:
82 friend class Object; 91 friend class Object;
83 friend class Process; 92 friend class Process;
84 friend class Thread; 93 friend class Thread;
85 friend class Timer;
86 94
87 /// Creates a new object ID, incrementing the internal object ID counter. 95 /// Creates a new object ID, incrementing the internal object ID counter.
88 u32 CreateNewObjectID(); 96 u32 CreateNewObjectID();
@@ -93,14 +101,8 @@ private:
93 /// Creates a new thread ID, incrementing the internal thread ID counter. 101 /// Creates a new thread ID, incrementing the internal thread ID counter.
94 u64 CreateNewThreadID(); 102 u64 CreateNewThreadID();
95 103
96 /// Creates a timer callback handle for the given timer.
97 ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer);
98
99 /// Retrieves the event type used for thread wakeup callbacks. 104 /// Retrieves the event type used for thread wakeup callbacks.
100 CoreTiming::EventType* ThreadWakeupCallbackEventType() const; 105 Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
101
102 /// Retrieves the event type used for timer callbacks.
103 CoreTiming::EventType* TimerCallbackEventType() const;
104 106
105 /// Provides a reference to the thread wakeup callback handle table. 107 /// Provides a reference to the thread wakeup callback handle table.
106 Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); 108 Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 0743670ad..98e87313b 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <map>
6#include <utility> 5#include <utility>
7#include <vector> 6#include <vector>
8 7
@@ -10,8 +9,11 @@
10#include "core/core.h" 9#include "core/core.h"
11#include "core/hle/kernel/errors.h" 10#include "core/hle/kernel/errors.h"
12#include "core/hle/kernel/handle_table.h" 11#include "core/hle/kernel/handle_table.h"
12#include "core/hle/kernel/kernel.h"
13#include "core/hle/kernel/mutex.h" 13#include "core/hle/kernel/mutex.h"
14#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
15#include "core/hle/kernel/process.h"
16#include "core/hle/kernel/scheduler.h"
15#include "core/hle/kernel/thread.h" 17#include "core/hle/kernel/thread.h"
16#include "core/hle/result.h" 18#include "core/hle/result.h"
17#include "core/memory.h" 19#include "core/memory.h"
@@ -57,41 +59,47 @@ static void TransferMutexOwnership(VAddr mutex_addr, SharedPtr<Thread> current_t
57 } 59 }
58} 60}
59 61
60ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle holding_thread_handle, 62Mutex::Mutex(Core::System& system) : system{system} {}
63Mutex::~Mutex() = default;
64
65ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
61 Handle requesting_thread_handle) { 66 Handle requesting_thread_handle) {
62 // The mutex address must be 4-byte aligned 67 // The mutex address must be 4-byte aligned
63 if ((address % sizeof(u32)) != 0) { 68 if ((address % sizeof(u32)) != 0) {
64 return ERR_INVALID_ADDRESS; 69 return ERR_INVALID_ADDRESS;
65 } 70 }
66 71
72 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
73 Thread* const current_thread = system.CurrentScheduler().GetCurrentThread();
67 SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); 74 SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
68 SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle); 75 SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
69 76
70 // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another 77 // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
71 // thread. 78 // thread.
72 ASSERT(requesting_thread == GetCurrentThread()); 79 ASSERT(requesting_thread == current_thread);
73 80
74 u32 addr_value = Memory::Read32(address); 81 const u32 addr_value = Memory::Read32(address);
75 82
76 // If the mutex isn't being held, just return success. 83 // If the mutex isn't being held, just return success.
77 if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { 84 if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
78 return RESULT_SUCCESS; 85 return RESULT_SUCCESS;
79 } 86 }
80 87
81 if (holding_thread == nullptr) 88 if (holding_thread == nullptr) {
82 return ERR_INVALID_HANDLE; 89 return ERR_INVALID_HANDLE;
90 }
83 91
84 // Wait until the mutex is released 92 // Wait until the mutex is released
85 GetCurrentThread()->SetMutexWaitAddress(address); 93 current_thread->SetMutexWaitAddress(address);
86 GetCurrentThread()->SetWaitHandle(requesting_thread_handle); 94 current_thread->SetWaitHandle(requesting_thread_handle);
87 95
88 GetCurrentThread()->SetStatus(ThreadStatus::WaitMutex); 96 current_thread->SetStatus(ThreadStatus::WaitMutex);
89 GetCurrentThread()->InvalidateWakeupCallback(); 97 current_thread->InvalidateWakeupCallback();
90 98
91 // Update the lock holder thread's priority to prevent priority inversion. 99 // Update the lock holder thread's priority to prevent priority inversion.
92 holding_thread->AddMutexWaiter(GetCurrentThread()); 100 holding_thread->AddMutexWaiter(current_thread);
93 101
94 Core::System::GetInstance().PrepareReschedule(); 102 system.PrepareReschedule();
95 103
96 return RESULT_SUCCESS; 104 return RESULT_SUCCESS;
97} 105}
@@ -102,7 +110,8 @@ ResultCode Mutex::Release(VAddr address) {
102 return ERR_INVALID_ADDRESS; 110 return ERR_INVALID_ADDRESS;
103 } 111 }
104 112
105 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address); 113 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
114 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
106 115
107 // There are no more threads waiting for the mutex, release it completely. 116 // There are no more threads waiting for the mutex, release it completely.
108 if (thread == nullptr) { 117 if (thread == nullptr) {
@@ -111,7 +120,7 @@ ResultCode Mutex::Release(VAddr address) {
111 } 120 }
112 121
113 // Transfer the ownership of the mutex from the previous owner to the new one. 122 // Transfer the ownership of the mutex from the previous owner to the new one.
114 TransferMutexOwnership(address, GetCurrentThread(), thread); 123 TransferMutexOwnership(address, current_thread, thread);
115 124
116 u32 mutex_value = thread->GetWaitHandle(); 125 u32 mutex_value = thread->GetWaitHandle();
117 126
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index 81e62d497..b904de2e8 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -5,32 +5,34 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/hle/kernel/object.h"
9 8
10union ResultCode; 9union ResultCode;
11 10
12namespace Kernel { 11namespace Core {
12class System;
13}
13 14
14class HandleTable; 15namespace Kernel {
15class Thread;
16 16
17class Mutex final { 17class Mutex final {
18public: 18public:
19 explicit Mutex(Core::System& system);
20 ~Mutex();
21
19 /// Flag that indicates that a mutex still has threads waiting for it. 22 /// Flag that indicates that a mutex still has threads waiting for it.
20 static constexpr u32 MutexHasWaitersFlag = 0x40000000; 23 static constexpr u32 MutexHasWaitersFlag = 0x40000000;
21 /// Mask of the bits in a mutex address value that contain the mutex owner. 24 /// Mask of the bits in a mutex address value that contain the mutex owner.
22 static constexpr u32 MutexOwnerMask = 0xBFFFFFFF; 25 static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;
23 26
24 /// Attempts to acquire a mutex at the specified address. 27 /// Attempts to acquire a mutex at the specified address.
25 static ResultCode TryAcquire(HandleTable& handle_table, VAddr address, 28 ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
26 Handle holding_thread_handle, Handle requesting_thread_handle); 29 Handle requesting_thread_handle);
27 30
28 /// Releases the mutex at the specified address. 31 /// Releases the mutex at the specified address.
29 static ResultCode Release(VAddr address); 32 ResultCode Release(VAddr address);
30 33
31private: 34private:
32 Mutex() = default; 35 Core::System& system;
33 ~Mutex() = default;
34}; 36};
35 37
36} // namespace Kernel 38} // namespace Kernel
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp
index 806078638..10431e94c 100644
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -16,7 +16,6 @@ bool Object::IsWaitable() const {
16 case HandleType::ReadableEvent: 16 case HandleType::ReadableEvent:
17 case HandleType::Thread: 17 case HandleType::Thread:
18 case HandleType::Process: 18 case HandleType::Process:
19 case HandleType::Timer:
20 case HandleType::ServerPort: 19 case HandleType::ServerPort:
21 case HandleType::ServerSession: 20 case HandleType::ServerSession:
22 return true; 21 return true;
@@ -24,7 +23,7 @@ bool Object::IsWaitable() const {
24 case HandleType::Unknown: 23 case HandleType::Unknown:
25 case HandleType::WritableEvent: 24 case HandleType::WritableEvent:
26 case HandleType::SharedMemory: 25 case HandleType::SharedMemory:
27 case HandleType::AddressArbiter: 26 case HandleType::TransferMemory:
28 case HandleType::ResourceLimit: 27 case HandleType::ResourceLimit:
29 case HandleType::ClientPort: 28 case HandleType::ClientPort:
30 case HandleType::ClientSession: 29 case HandleType::ClientSession:
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 1541b6e3c..332876c27 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -22,10 +22,9 @@ enum class HandleType : u32 {
22 WritableEvent, 22 WritableEvent,
23 ReadableEvent, 23 ReadableEvent,
24 SharedMemory, 24 SharedMemory,
25 TransferMemory,
25 Thread, 26 Thread,
26 Process, 27 Process,
27 AddressArbiter,
28 Timer,
29 ResourceLimit, 28 ResourceLimit,
30 ClientPort, 29 ClientPort,
31 ServerPort, 30 ServerPort,
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..4e94048da 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -5,10 +5,12 @@
5#include <algorithm> 5#include <algorithm>
6#include <memory> 6#include <memory>
7#include <random> 7#include <random>
8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
10#include "core/core.h" 11#include "core/core.h"
11#include "core/file_sys/program_metadata.h" 12#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/process.h" 16#include "core/hle/kernel/process.h"
@@ -30,9 +32,6 @@ namespace {
30 * @param priority The priority to give the main thread 32 * @param priority The priority to give the main thread
31 */ 33 */
32void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { 34void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
33 // Setup page table so we can write to memory
34 SetCurrentPageTable(&owner_process.VMManager().page_table);
35
36 // Initialize new "main" thread 35 // Initialize new "main" thread
37 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); 36 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
38 auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, 37 auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
@@ -50,12 +49,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
50} 49}
51} // Anonymous namespace 50} // Anonymous namespace
52 51
53CodeSet::CodeSet() = default; 52SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
54CodeSet::~CodeSet() = default; 53 auto& kernel = system.Kernel();
55
56SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
57 SharedPtr<Process> process(new Process(kernel));
58 54
55 SharedPtr<Process> process(new Process(system));
59 process->name = std::move(name); 56 process->name = std::move(name);
60 process->resource_limit = kernel.GetSystemResourceLimit(); 57 process->resource_limit = kernel.GetSystemResourceLimit();
61 process->status = ProcessStatus::Created; 58 process->status = ProcessStatus::Created;
@@ -76,6 +73,18 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const {
76 return resource_limit; 73 return resource_limit;
77} 74}
78 75
76u64 Process::GetTotalPhysicalMemoryUsed() const {
77 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
78}
79
80void Process::RegisterThread(const Thread* thread) {
81 thread_list.push_back(thread);
82}
83
84void Process::UnregisterThread(const Thread* thread) {
85 thread_list.remove(thread);
86}
87
79ResultCode Process::ClearSignalState() { 88ResultCode Process::ClearSignalState() {
80 if (status == ProcessStatus::Exited) { 89 if (status == ProcessStatus::Exited) {
81 LOG_ERROR(Kernel, "called on a terminated process instance."); 90 LOG_ERROR(Kernel, "called on a terminated process instance.");
@@ -97,19 +106,30 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
97 is_64bit_process = metadata.Is64BitProgram(); 106 is_64bit_process = metadata.Is64BitProgram();
98 107
99 vm_manager.Reset(metadata.GetAddressSpaceType()); 108 vm_manager.Reset(metadata.GetAddressSpaceType());
109 // Ensure that the potentially resized page table is seen by CPU backends.
110 Memory::SetCurrentPageTable(&vm_manager.page_table);
100 111
101 const auto& caps = metadata.GetKernelCapabilities(); 112 const auto& caps = metadata.GetKernelCapabilities();
102 return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); 113 const auto capability_init_result =
114 capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
115 if (capability_init_result.IsError()) {
116 return capability_init_result;
117 }
118
119 return handle_table.SetSize(capabilities.GetHandleTableSize());
103} 120}
104 121
105void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { 122void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
123 // The kernel always ensures that the given stack size is page aligned.
124 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
125
106 // Allocate and map the main thread stack 126 // Allocate and map the main thread stack
107 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part 127 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
108 // of the user address space. 128 // of the user address space.
129 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
109 vm_manager 130 vm_manager
110 .MapMemoryBlock(vm_manager.GetTLSIORegionEndAddress() - stack_size, 131 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
111 std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, 132 0, main_thread_stack_size, MemoryState::Stack)
112 MemoryState::Stack)
113 .Unwrap(); 133 .Unwrap();
114 134
115 vm_manager.LogLayout(); 135 vm_manager.LogLayout();
@@ -126,7 +146,7 @@ void Process::PrepareForTermination() {
126 if (thread->GetOwnerProcess() != this) 146 if (thread->GetOwnerProcess() != this)
127 continue; 147 continue;
128 148
129 if (thread == GetCurrentThread()) 149 if (thread == system.CurrentScheduler().GetCurrentThread())
130 continue; 150 continue;
131 151
132 // TODO(Subv): When are the other running/ready threads terminated? 152 // TODO(Subv): When are the other running/ready threads terminated?
@@ -138,7 +158,6 @@ void Process::PrepareForTermination() {
138 } 158 }
139 }; 159 };
140 160
141 const auto& system = Core::System::GetInstance();
142 stop_threads(system.Scheduler(0).GetThreadList()); 161 stop_threads(system.Scheduler(0).GetThreadList());
143 stop_threads(system.Scheduler(1).GetThreadList()); 162 stop_threads(system.Scheduler(1).GetThreadList());
144 stop_threads(system.Scheduler(2).GetThreadList()); 163 stop_threads(system.Scheduler(2).GetThreadList());
@@ -206,35 +225,38 @@ void Process::FreeTLSSlot(VAddr tls_address) {
206} 225}
207 226
208void Process::LoadModule(CodeSet module_, VAddr base_addr) { 227void Process::LoadModule(CodeSet module_, VAddr base_addr) {
209 const auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, 228 const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory));
229
230 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
210 MemoryState memory_state) { 231 MemoryState memory_state) {
211 const auto vma = vm_manager 232 const auto vma = vm_manager
212 .MapMemoryBlock(segment.addr + base_addr, module_.memory, 233 .MapMemoryBlock(segment.addr + base_addr, memory, segment.offset,
213 segment.offset, segment.size, memory_state) 234 segment.size, memory_state)
214 .Unwrap(); 235 .Unwrap();
215 vm_manager.Reprotect(vma, permissions); 236 vm_manager.Reprotect(vma, permissions);
216 }; 237 };
217 238
218 // Map CodeSet segments 239 // Map CodeSet segments
219 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::CodeStatic); 240 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
220 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeMutable); 241 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
221 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable); 242 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
243
244 code_memory_size += module_.memory.size();
222 245
223 // Clear instruction cache in CPU JIT 246 // Clear instruction cache in CPU JIT
224 Core::System::GetInstance().ArmInterface(0).ClearInstructionCache(); 247 system.InvalidateCpuInstructionCaches();
225 Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
226 Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
227 Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
228} 248}
229 249
230Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} 250Process::Process(Core::System& system)
231Kernel::Process::~Process() {} 251 : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
252
253Process::~Process() = default;
232 254
233void Process::Acquire(Thread* thread) { 255void Process::Acquire(Thread* thread) {
234 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); 256 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
235} 257}
236 258
237bool Process::ShouldWait(Thread* thread) const { 259bool Process::ShouldWait(const Thread* thread) const {
238 return !is_signaled; 260 return !is_signaled;
239} 261}
240 262
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dcc57ae9f..dda52f4c0 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -7,17 +7,23 @@
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <cstddef> 9#include <cstddef>
10#include <memory> 10#include <list>
11#include <string> 11#include <string>
12#include <vector> 12#include <vector>
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/handle_table.h" 16#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/mutex.h"
16#include "core/hle/kernel/process_capability.h" 18#include "core/hle/kernel/process_capability.h"
17#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
18#include "core/hle/kernel/wait_object.h" 20#include "core/hle/kernel/wait_object.h"
19#include "core/hle/result.h" 21#include "core/hle/result.h"
20 22
23namespace Core {
24class System;
25}
26
21namespace FileSys { 27namespace FileSys {
22class ProgramMetadata; 28class ProgramMetadata;
23} 29}
@@ -28,13 +34,7 @@ class KernelCore;
28class ResourceLimit; 34class ResourceLimit;
29class Thread; 35class Thread;
30 36
31struct AddressMapping { 37struct CodeSet;
32 // Address and size must be page-aligned
33 VAddr address;
34 u64 size;
35 bool read_only;
36 bool unk_flag;
37};
38 38
39enum class MemoryRegion : u16 { 39enum class MemoryRegion : u16 {
40 APPLICATION = 1, 40 APPLICATION = 1,
@@ -60,46 +60,6 @@ enum class ProcessStatus {
60 DebugBreak, 60 DebugBreak,
61}; 61};
62 62
63struct CodeSet final {
64 struct Segment {
65 std::size_t offset = 0;
66 VAddr addr = 0;
67 u32 size = 0;
68 };
69
70 explicit CodeSet();
71 ~CodeSet();
72
73 Segment& CodeSegment() {
74 return segments[0];
75 }
76
77 const Segment& CodeSegment() const {
78 return segments[0];
79 }
80
81 Segment& RODataSegment() {
82 return segments[1];
83 }
84
85 const Segment& RODataSegment() const {
86 return segments[1];
87 }
88
89 Segment& DataSegment() {
90 return segments[2];
91 }
92
93 const Segment& DataSegment() const {
94 return segments[2];
95 }
96
97 std::shared_ptr<std::vector<u8>> memory;
98
99 std::array<Segment, 3> segments;
100 VAddr entrypoint = 0;
101};
102
103class Process final : public WaitObject { 63class Process final : public WaitObject {
104public: 64public:
105 enum : u64 { 65 enum : u64 {
@@ -116,7 +76,7 @@ public:
116 76
117 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; 77 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
118 78
119 static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name); 79 static SharedPtr<Process> Create(Core::System& system, std::string&& name);
120 80
121 std::string GetTypeName() const override { 81 std::string GetTypeName() const override {
122 return "Process"; 82 return "Process";
@@ -125,7 +85,7 @@ public:
125 return name; 85 return name;
126 } 86 }
127 87
128 static const HandleType HANDLE_TYPE = HandleType::Process; 88 static constexpr HandleType HANDLE_TYPE = HandleType::Process;
129 HandleType GetHandleType() const override { 89 HandleType GetHandleType() const override {
130 return HANDLE_TYPE; 90 return HANDLE_TYPE;
131 } 91 }
@@ -150,6 +110,26 @@ public:
150 return handle_table; 110 return handle_table;
151 } 111 }
152 112
113 /// Gets a reference to the process' address arbiter.
114 AddressArbiter& GetAddressArbiter() {
115 return address_arbiter;
116 }
117
118 /// Gets a const reference to the process' address arbiter.
119 const AddressArbiter& GetAddressArbiter() const {
120 return address_arbiter;
121 }
122
123 /// Gets a reference to the process' mutex lock.
124 Mutex& GetMutex() {
125 return mutex;
126 }
127
128 /// Gets a const reference to the process' mutex lock
129 const Mutex& GetMutex() const {
130 return mutex;
131 }
132
153 /// Gets the current status of the process 133 /// Gets the current status of the process
154 ProcessStatus GetStatus() const { 134 ProcessStatus GetStatus() const {
155 return status; 135 return status;
@@ -207,6 +187,22 @@ public:
207 return random_entropy.at(index); 187 return random_entropy.at(index);
208 } 188 }
209 189
190 /// Retrieves the total physical memory used by this process in bytes.
191 u64 GetTotalPhysicalMemoryUsed() const;
192
193 /// Gets the list of all threads created with this process as their owner.
194 const std::list<const Thread*>& GetThreadList() const {
195 return thread_list;
196 }
197
198 /// Registers a thread as being created under this process,
199 /// adding it to this process' thread list.
200 void RegisterThread(const Thread* thread);
201
202 /// Unregisters a thread from this process, removing it
203 /// from this process' thread list.
204 void UnregisterThread(const Thread* thread);
205
210 /// Clears the signaled state of the process if and only if it's signaled. 206 /// Clears the signaled state of the process if and only if it's signaled.
211 /// 207 ///
212 /// @pre The process must not be already terminated. If this is called on a 208 /// @pre The process must not be already terminated. If this is called on a
@@ -231,7 +227,7 @@ public:
231 /** 227 /**
232 * Applies address space changes and launches the process main thread. 228 * Applies address space changes and launches the process main thread.
233 */ 229 */
234 void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size); 230 void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
235 231
236 /** 232 /**
237 * Prepares a process for termination by stopping all of its threads 233 * Prepares a process for termination by stopping all of its threads
@@ -251,11 +247,11 @@ public:
251 void FreeTLSSlot(VAddr tls_address); 247 void FreeTLSSlot(VAddr tls_address);
252 248
253private: 249private:
254 explicit Process(KernelCore& kernel); 250 explicit Process(Core::System& system);
255 ~Process() override; 251 ~Process() override;
256 252
257 /// Checks if the specified thread should wait until this process is available. 253 /// Checks if the specified thread should wait until this process is available.
258 bool ShouldWait(Thread* thread) const override; 254 bool ShouldWait(const Thread* thread) const override;
259 255
260 /// Acquires/locks this process for the specified thread if it's available. 256 /// Acquires/locks this process for the specified thread if it's available.
261 void Acquire(Thread* thread) override; 257 void Acquire(Thread* thread) override;
@@ -268,6 +264,12 @@ private:
268 /// Memory manager for this process. 264 /// Memory manager for this process.
269 Kernel::VMManager vm_manager; 265 Kernel::VMManager vm_manager;
270 266
267 /// Size of the main thread's stack in bytes.
268 u64 main_thread_stack_size = 0;
269
270 /// Size of the loaded code memory in bytes.
271 u64 code_memory_size = 0;
272
271 /// Current status of the process 273 /// Current status of the process
272 ProcessStatus status; 274 ProcessStatus status;
273 275
@@ -309,9 +311,24 @@ private:
309 /// Per-process handle table for storing created object handles in. 311 /// Per-process handle table for storing created object handles in.
310 HandleTable handle_table; 312 HandleTable handle_table;
311 313
314 /// Per-process address arbiter.
315 AddressArbiter address_arbiter;
316
317 /// The per-process mutex lock instance used for handling various
318 /// forms of services, such as lock arbitration, and condition
319 /// variable related facilities.
320 Mutex mutex;
321
312 /// Random values for svcGetInfo RandomEntropy 322 /// Random values for svcGetInfo RandomEntropy
313 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; 323 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
314 324
325 /// List of threads that are running with this process as their owner.
326 std::list<const Thread*> thread_list;
327
328 /// System context
329 Core::System& system;
330
331 /// Name of this process
315 std::string name; 332 std::string name;
316}; 333};
317 334
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
96 interrupt_capabilities.set(); 96 interrupt_capabilities.set();
97 97
98 // Allow using the maximum possible amount of handles 98 // Allow using the maximum possible amount of handles
99 handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); 99 handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
100 100
101 // Allow all debugging capabilities. 101 // Allow all debugging capabilities.
102 is_debuggable = true; 102 is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
337 return ERR_RESERVED_VALUE; 337 return ERR_RESERVED_VALUE;
338 } 338 }
339 339
340 handle_table_size = (flags >> 16) & 0x3FF; 340 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
341 return RESULT_SUCCESS; 341 return RESULT_SUCCESS;
342} 342}
343 343
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
156 } 156 }
157 157
158 /// Gets the number of total allowable handles for the process' handle table. 158 /// Gets the number of total allowable handles for the process' handle table.
159 u32 GetHandleTableSize() const { 159 s32 GetHandleTableSize() const {
160 return handle_table_size; 160 return handle_table_size;
161 } 161 }
162 162
@@ -252,7 +252,7 @@ private:
252 u64 core_mask = 0; 252 u64 core_mask = 0;
253 u64 priority_mask = 0; 253 u64 priority_mask = 0;
254 254
255 u32 handle_table_size = 0; 255 s32 handle_table_size = 0;
256 u32 kernel_version = 0; 256 u32 kernel_version = 0;
257 257
258 ProgramType program_type = ProgramType::SysModule; 258 ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 6973e580c..c2b798a4e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -14,7 +14,7 @@ namespace Kernel {
14ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {} 14ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
15ReadableEvent::~ReadableEvent() = default; 15ReadableEvent::~ReadableEvent() = default;
16 16
17bool ReadableEvent::ShouldWait(Thread* thread) const { 17bool ReadableEvent::ShouldWait(const Thread* thread) const {
18 return !signaled; 18 return !signaled;
19} 19}
20 20
@@ -44,8 +44,4 @@ ResultCode ReadableEvent::Reset() {
44 return RESULT_SUCCESS; 44 return RESULT_SUCCESS;
45} 45}
46 46
47void ReadableEvent::WakeupAllWaitingThreads() {
48 WaitObject::WakeupAllWaitingThreads();
49}
50
51} // namespace Kernel 47} // namespace Kernel
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 80b3b0aba..84215f572 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -31,16 +31,14 @@ public:
31 return reset_type; 31 return reset_type;
32 } 32 }
33 33
34 static const HandleType HANDLE_TYPE = HandleType::ReadableEvent; 34 static constexpr HandleType HANDLE_TYPE = HandleType::ReadableEvent;
35 HandleType GetHandleType() const override { 35 HandleType GetHandleType() const override {
36 return HANDLE_TYPE; 36 return HANDLE_TYPE;
37 } 37 }
38 38
39 bool ShouldWait(Thread* thread) const override; 39 bool ShouldWait(const Thread* thread) const override;
40 void Acquire(Thread* thread) override; 40 void Acquire(Thread* thread) override;
41 41
42 void WakeupAllWaitingThreads() override;
43
44 /// Unconditionally clears the readable event's state. 42 /// Unconditionally clears the readable event's state.
45 void Clear(); 43 void Clear();
46 44
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 2f9695005..173f69915 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -16,11 +16,8 @@ constexpr std::size_t ResourceTypeToIndex(ResourceType type) {
16ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {} 16ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {}
17ResourceLimit::~ResourceLimit() = default; 17ResourceLimit::~ResourceLimit() = default;
18 18
19SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel, std::string name) { 19SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel) {
20 SharedPtr<ResourceLimit> resource_limit(new ResourceLimit(kernel)); 20 return new ResourceLimit(kernel);
21
22 resource_limit->name = std::move(name);
23 return resource_limit;
24} 21}
25 22
26s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const { 23s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const {
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h
index 59dc11c22..2613a6bb5 100644
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -31,19 +31,17 @@ constexpr bool IsValidResourceType(ResourceType type) {
31 31
32class ResourceLimit final : public Object { 32class ResourceLimit final : public Object {
33public: 33public:
34 /** 34 /// Creates a resource limit object.
35 * Creates a resource limit object. 35 static SharedPtr<ResourceLimit> Create(KernelCore& kernel);
36 */
37 static SharedPtr<ResourceLimit> Create(KernelCore& kernel, std::string name = "Unknown");
38 36
39 std::string GetTypeName() const override { 37 std::string GetTypeName() const override {
40 return "ResourceLimit"; 38 return "ResourceLimit";
41 } 39 }
42 std::string GetName() const override { 40 std::string GetName() const override {
43 return name; 41 return GetTypeName();
44 } 42 }
45 43
46 static const HandleType HANDLE_TYPE = HandleType::ResourceLimit; 44 static constexpr HandleType HANDLE_TYPE = HandleType::ResourceLimit;
47 HandleType GetHandleType() const override { 45 HandleType GetHandleType() const override {
48 return HANDLE_TYPE; 46 return HANDLE_TYPE;
49 } 47 }
@@ -95,9 +93,6 @@ private:
95 ResourceArray limits{}; 93 ResourceArray limits{};
96 /// Current resource limit values. 94 /// Current resource limit values.
97 ResourceArray values{}; 95 ResourceArray values{};
98
99 /// Name of resource limit object.
100 std::string name;
101}; 96};
102 97
103} // namespace Kernel 98} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index df4d6cf0a..e8447b69a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -28,8 +29,8 @@ Scheduler::~Scheduler() {
28} 29}
29 30
30bool Scheduler::HaveReadyThreads() const { 31bool Scheduler::HaveReadyThreads() const {
31 std::lock_guard<std::mutex> lock(scheduler_mutex); 32 std::lock_guard lock{scheduler_mutex};
32 return ready_queue.get_first() != nullptr; 33 return !ready_queue.empty();
33} 34}
34 35
35Thread* Scheduler::GetCurrentThread() const { 36Thread* Scheduler::GetCurrentThread() const {
@@ -45,23 +46,28 @@ Thread* Scheduler::PopNextReadyThread() {
45 Thread* thread = GetCurrentThread(); 46 Thread* thread = GetCurrentThread();
46 47
47 if (thread && thread->GetStatus() == ThreadStatus::Running) { 48 if (thread && thread->GetStatus() == ThreadStatus::Running) {
49 if (ready_queue.empty()) {
50 return thread;
51 }
48 // We have to do better than the current thread. 52 // We have to do better than the current thread.
49 // This call returns null when that's not possible. 53 // This call returns null when that's not possible.
50 next = ready_queue.pop_first_better(thread->GetPriority()); 54 next = ready_queue.front();
51 if (!next) { 55 if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
52 // Otherwise just keep going with the current thread
53 next = thread; 56 next = thread;
54 } 57 }
55 } else { 58 } else {
56 next = ready_queue.pop_first(); 59 if (ready_queue.empty()) {
60 return nullptr;
61 }
62 next = ready_queue.front();
57 } 63 }
58 64
59 return next; 65 return next;
60} 66}
61 67
62void Scheduler::SwitchContext(Thread* new_thread) { 68void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 69 Thread* previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 70 Process* const previous_process = system.Kernel().CurrentProcess();
65 71
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 72 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 73
@@ -74,7 +80,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
74 if (previous_thread->GetStatus() == ThreadStatus::Running) { 80 if (previous_thread->GetStatus() == ThreadStatus::Running) {
75 // This is only the case when a reschedule is triggered without the current thread 81 // This is only the case when a reschedule is triggered without the current thread
76 // yielding execution (i.e. an event triggered, system core time-sliced, etc) 82 // yielding execution (i.e. an event triggered, system core time-sliced, etc)
77 ready_queue.push_front(previous_thread->GetPriority(), previous_thread); 83 ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
78 previous_thread->SetStatus(ThreadStatus::Ready); 84 previous_thread->SetStatus(ThreadStatus::Ready);
79 } 85 }
80 } 86 }
@@ -89,13 +95,12 @@ void Scheduler::SwitchContext(Thread* new_thread) {
89 95
90 current_thread = new_thread; 96 current_thread = new_thread;
91 97
92 ready_queue.remove(new_thread->GetPriority(), new_thread); 98 ready_queue.remove(new_thread, new_thread->GetPriority());
93 new_thread->SetStatus(ThreadStatus::Running); 99 new_thread->SetStatus(ThreadStatus::Running);
94 100
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 101 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 102 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 103 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
99 } 104 }
100 105
101 cpu_core.LoadContext(new_thread->GetContext()); 106 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +116,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 116
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 117void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 118 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = CoreTiming::GetTicks(); 119 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 120 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 121
117 if (thread != nullptr) { 122 if (thread != nullptr) {
@@ -126,7 +131,7 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
126} 131}
127 132
128void Scheduler::Reschedule() { 133void Scheduler::Reschedule() {
129 std::lock_guard<std::mutex> lock(scheduler_mutex); 134 std::lock_guard lock{scheduler_mutex};
130 135
131 Thread* cur = GetCurrentThread(); 136 Thread* cur = GetCurrentThread();
132 Thread* next = PopNextReadyThread(); 137 Thread* next = PopNextReadyThread();
@@ -142,51 +147,54 @@ void Scheduler::Reschedule() {
142 SwitchContext(next); 147 SwitchContext(next);
143} 148}
144 149
145void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) { 150void Scheduler::AddThread(SharedPtr<Thread> thread) {
146 std::lock_guard<std::mutex> lock(scheduler_mutex); 151 std::lock_guard lock{scheduler_mutex};
147 152
148 thread_list.push_back(std::move(thread)); 153 thread_list.push_back(std::move(thread));
149 ready_queue.prepare(priority);
150} 154}
151 155
152void Scheduler::RemoveThread(Thread* thread) { 156void Scheduler::RemoveThread(Thread* thread) {
153 std::lock_guard<std::mutex> lock(scheduler_mutex); 157 std::lock_guard lock{scheduler_mutex};
154 158
155 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), 159 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
156 thread_list.end()); 160 thread_list.end());
157} 161}
158 162
159void Scheduler::ScheduleThread(Thread* thread, u32 priority) { 163void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
160 std::lock_guard<std::mutex> lock(scheduler_mutex); 164 std::lock_guard lock{scheduler_mutex};
161 165
162 ASSERT(thread->GetStatus() == ThreadStatus::Ready); 166 ASSERT(thread->GetStatus() == ThreadStatus::Ready);
163 ready_queue.push_back(priority, thread); 167 ready_queue.add(thread, priority);
164} 168}
165 169
166void Scheduler::UnscheduleThread(Thread* thread, u32 priority) { 170void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
167 std::lock_guard<std::mutex> lock(scheduler_mutex); 171 std::lock_guard lock{scheduler_mutex};
168 172
169 ASSERT(thread->GetStatus() == ThreadStatus::Ready); 173 ASSERT(thread->GetStatus() == ThreadStatus::Ready);
170 ready_queue.remove(priority, thread); 174 ready_queue.remove(thread, priority);
171} 175}
172 176
173void Scheduler::SetThreadPriority(Thread* thread, u32 priority) { 177void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
174 std::lock_guard<std::mutex> lock(scheduler_mutex); 178 std::lock_guard lock{scheduler_mutex};
179 if (thread->GetPriority() == priority) {
180 return;
181 }
175 182
176 // If thread was ready, adjust queues 183 // If thread was ready, adjust queues
177 if (thread->GetStatus() == ThreadStatus::Ready) 184 if (thread->GetStatus() == ThreadStatus::Ready)
178 ready_queue.move(thread, thread->GetPriority(), priority); 185 ready_queue.adjust(thread, thread->GetPriority(), priority);
179 else
180 ready_queue.prepare(priority);
181} 186}
182 187
183Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const { 188Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
184 std::lock_guard<std::mutex> lock(scheduler_mutex); 189 std::lock_guard lock{scheduler_mutex};
185 190
186 const u32 mask = 1U << core; 191 const u32 mask = 1U << core;
187 return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) { 192 for (auto* thread : ready_queue) {
188 return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority; 193 if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
189 }); 194 return thread;
195 }
196 }
197 return nullptr;
190} 198}
191 199
192void Scheduler::YieldWithoutLoadBalancing(Thread* thread) { 200void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
@@ -198,8 +206,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
198 ASSERT(thread->GetPriority() < THREADPRIO_COUNT); 206 ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
199 207
200 // Yield this thread -- sleep for zero time and force reschedule to different thread 208 // Yield this thread -- sleep for zero time and force reschedule to different thread
201 WaitCurrentThread_Sleep(); 209 GetCurrentThread()->Sleep(0);
202 GetCurrentThread()->WakeAfterDelay(0);
203} 210}
204 211
205void Scheduler::YieldWithLoadBalancing(Thread* thread) { 212void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +221,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
214 ASSERT(priority < THREADPRIO_COUNT); 221 ASSERT(priority < THREADPRIO_COUNT);
215 222
216 // Sleep for zero time to be able to force reschedule to different thread 223 // Sleep for zero time to be able to force reschedule to different thread
217 WaitCurrentThread_Sleep(); 224 GetCurrentThread()->Sleep(0);
218 GetCurrentThread()->WakeAfterDelay(0);
219 225
220 Thread* suggested_thread = nullptr; 226 Thread* suggested_thread = nullptr;
221 227
@@ -223,8 +229,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 229 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 230 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 231 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 232 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 233
229 // If scheduler provides a suggested thread 234 // If scheduler provides a suggested thread
230 if (res != nullptr) { 235 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..b29bf7be8 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -7,13 +7,14 @@
7#include <mutex> 7#include <mutex>
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/thread_queue_list.h" 10#include "common/multi_level_queue.h"
11#include "core/hle/kernel/object.h" 11#include "core/hle/kernel/object.h"
12#include "core/hle/kernel/thread.h" 12#include "core/hle/kernel/thread.h"
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -37,7 +38,7 @@ public:
37 u64 GetLastContextSwitchTicks() const; 38 u64 GetLastContextSwitchTicks() const;
38 39
39 /// Adds a new thread to the scheduler 40 /// Adds a new thread to the scheduler
40 void AddThread(SharedPtr<Thread> thread, u32 priority); 41 void AddThread(SharedPtr<Thread> thread);
41 42
42 /// Removes a thread from the scheduler 43 /// Removes a thread from the scheduler
43 void RemoveThread(Thread* thread); 44 void RemoveThread(Thread* thread);
@@ -155,13 +156,14 @@ private:
155 std::vector<SharedPtr<Thread>> thread_list; 156 std::vector<SharedPtr<Thread>> thread_list;
156 157
157 /// Lists only ready thread ids. 158 /// Lists only ready thread ids.
158 Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST + 1> ready_queue; 159 Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
159 160
160 SharedPtr<Thread> current_thread = nullptr; 161 SharedPtr<Thread> current_thread = nullptr;
161 162
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index d6ceeb2da..02e7c60e6 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,7 +26,11 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
26 return MakeResult(std::move(session)); 26 return MakeResult(std::move(session));
27} 27}
28 28
29bool ServerPort::ShouldWait(Thread* thread) const { 29void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
30 pending_sessions.push_back(std::move(pending_session));
31}
32
33bool ServerPort::ShouldWait(const Thread* thread) const {
30 // If there are no pending sessions, we wait until a new one is added. 34 // If there are no pending sessions, we wait until a new one is added.
31 return pending_sessions.empty(); 35 return pending_sessions.empty();
32} 36}
@@ -35,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) {
35 ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); 39 ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
36} 40}
37 41
38std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair( 42ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
39 KernelCore& kernel, u32 max_sessions, std::string name) { 43 std::string name) {
40
41 SharedPtr<ServerPort> server_port(new ServerPort(kernel)); 44 SharedPtr<ServerPort> server_port(new ServerPort(kernel));
42 SharedPtr<ClientPort> client_port(new ClientPort(kernel)); 45 SharedPtr<ClientPort> client_port(new ClientPort(kernel));
43 46
@@ -47,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP
47 client_port->max_sessions = max_sessions; 50 client_port->max_sessions = max_sessions;
48 client_port->active_sessions = 0; 51 client_port->active_sessions = 0;
49 52
50 return std::make_tuple(std::move(server_port), std::move(client_port)); 53 return std::make_pair(std::move(server_port), std::move(client_port));
51} 54}
52 55
53} // namespace Kernel 56} // namespace Kernel
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index e52f8245f..dc88a1ebd 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -6,7 +6,7 @@
6 6
7#include <memory> 7#include <memory>
8#include <string> 8#include <string>
9#include <tuple> 9#include <utility>
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
@@ -22,6 +22,9 @@ class SessionRequestHandler;
22 22
23class ServerPort final : public WaitObject { 23class ServerPort final : public WaitObject {
24public: 24public:
25 using HLEHandler = std::shared_ptr<SessionRequestHandler>;
26 using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>;
27
25 /** 28 /**
26 * Creates a pair of ServerPort and an associated ClientPort. 29 * Creates a pair of ServerPort and an associated ClientPort.
27 * 30 *
@@ -30,8 +33,8 @@ public:
30 * @param name Optional name of the ports 33 * @param name Optional name of the ports
31 * @return The created port tuple 34 * @return The created port tuple
32 */ 35 */
33 static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair( 36 static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions,
34 KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort"); 37 std::string name = "UnknownPort");
35 38
36 std::string GetTypeName() const override { 39 std::string GetTypeName() const override {
37 return "ServerPort"; 40 return "ServerPort";
@@ -40,7 +43,7 @@ public:
40 return name; 43 return name;
41 } 44 }
42 45
43 static const HandleType HANDLE_TYPE = HandleType::ServerPort; 46 static constexpr HandleType HANDLE_TYPE = HandleType::ServerPort;
44 HandleType GetHandleType() const override { 47 HandleType GetHandleType() const override {
45 return HANDLE_TYPE; 48 return HANDLE_TYPE;
46 } 49 }
@@ -51,29 +54,44 @@ public:
51 */ 54 */
52 ResultVal<SharedPtr<ServerSession>> Accept(); 55 ResultVal<SharedPtr<ServerSession>> Accept();
53 56
57 /// Whether or not this server port has an HLE handler available.
58 bool HasHLEHandler() const {
59 return hle_handler != nullptr;
60 }
61
62 /// Gets the HLE handler for this port.
63 HLEHandler GetHLEHandler() const {
64 return hle_handler;
65 }
66
54 /** 67 /**
55 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port 68 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
56 * will inherit a reference to this handler. 69 * will inherit a reference to this handler.
57 */ 70 */
58 void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) { 71 void SetHleHandler(HLEHandler hle_handler_) {
59 hle_handler = std::move(hle_handler_); 72 hle_handler = std::move(hle_handler_);
60 } 73 }
61 74
62 std::string name; ///< Name of port (optional) 75 /// Appends a ServerSession to the collection of ServerSessions
76 /// waiting to be accepted by this port.
77 void AppendPendingSession(SharedPtr<ServerSession> pending_session);
78
79 bool ShouldWait(const Thread* thread) const override;
80 void Acquire(Thread* thread) override;
81
82private:
83 explicit ServerPort(KernelCore& kernel);
84 ~ServerPort() override;
63 85
64 /// ServerSessions waiting to be accepted by the port 86 /// ServerSessions waiting to be accepted by the port
65 std::vector<SharedPtr<ServerSession>> pending_sessions; 87 std::vector<SharedPtr<ServerSession>> pending_sessions;
66 88
67 /// This session's HLE request handler template (optional) 89 /// This session's HLE request handler template (optional)
68 /// ServerSessions created from this port inherit a reference to this handler. 90 /// ServerSessions created from this port inherit a reference to this handler.
69 std::shared_ptr<SessionRequestHandler> hle_handler; 91 HLEHandler hle_handler;
70
71 bool ShouldWait(Thread* thread) const override;
72 void Acquire(Thread* thread) override;
73 92
74private: 93 /// Name of the port (optional)
75 explicit ServerPort(KernelCore& kernel); 94 std::string name;
76 ~ServerPort() override;
77}; 95};
78 96
79} // namespace Kernel 97} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..696a82cd9 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -28,11 +28,9 @@ ServerSession::~ServerSession() {
28 // the emulated application. 28 // the emulated application.
29 29
30 // Decrease the port's connection count. 30 // Decrease the port's connection count.
31 if (parent->port) 31 if (parent->port) {
32 parent->port->ConnectionClosed(); 32 parent->port->ConnectionClosed();
33 33 }
34 // TODO(Subv): Wake up all the ClientSession's waiting threads and set
35 // the SendSyncRequest result to 0xC920181A.
36 34
37 parent->server = nullptr; 35 parent->server = nullptr;
38} 36}
@@ -46,7 +44,7 @@ ResultVal<SharedPtr<ServerSession>> ServerSession::Create(KernelCore& kernel, st
46 return MakeResult(std::move(server_session)); 44 return MakeResult(std::move(server_session));
47} 45}
48 46
49bool ServerSession::ShouldWait(Thread* thread) const { 47bool ServerSession::ShouldWait(const Thread* thread) const {
50 // Closed sessions should never wait, an error will be returned from svcReplyAndReceive. 48 // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
51 if (parent->client == nullptr) 49 if (parent->client == nullptr)
52 return false; 50 return false;
@@ -63,42 +61,68 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 61 pending_requesting_threads.pop_back();
64} 62}
65 63
64void ServerSession::ClientDisconnected() {
65 // We keep a shared pointer to the hle handler to keep it alive throughout
66 // the call to ClientDisconnected, as ClientDisconnected invalidates the
67 // hle_handler member itself during the course of the function executing.
68 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
69 if (handler) {
70 // Note that after this returns, this server session's hle_handler is
71 // invalidated (set to null).
72 handler->ClientDisconnected(this);
73 }
74
75 // Clean up the list of client threads with pending requests, they are unneeded now that the
76 // client endpoint is closed.
77 pending_requesting_threads.clear();
78 currently_handling = nullptr;
79}
80
81void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
82 domain_request_handlers.push_back(std::move(handler));
83}
84
85std::size_t ServerSession::NumDomainRequestHandlers() const {
86 return domain_request_handlers.size();
87}
88
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 89ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 90 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 91 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 92 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 93
71 94 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 95 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 96
74 switch (domain_message_header->command) { 97 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 98 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 99 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 100 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 101 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 102 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 103 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 104 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 105 "to {} needed to return a new interface!",
83 } 106 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 107 UNREACHABLE();
85 108 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 109 }
110 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
111
112 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
113 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
96 114
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 115 domain_request_handlers[object_id - 1] = nullptr;
98 static_cast<int>(domain_message_header->command.Value())); 116
99 ASSERT(false); 117 IPC::ResponseBuilder rb{context, 2};
118 rb.Push(RESULT_SUCCESS);
119 return RESULT_SUCCESS;
120 }
100 } 121 }
101 122
123 LOG_CRITICAL(IPC, "Unknown domain command={}",
124 static_cast<int>(domain_message_header.command.Value()));
125 ASSERT(false);
102 return RESULT_SUCCESS; 126 return RESULT_SUCCESS;
103} 127}
104 128
@@ -175,6 +199,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
175 client_session->parent = parent; 199 client_session->parent = parent;
176 server_session->parent = parent; 200 server_session->parent = parent;
177 201
178 return std::make_tuple(std::move(server_session), std::move(client_session)); 202 return std::make_pair(std::move(server_session), std::move(client_session));
179} 203}
180} // namespace Kernel 204} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..738df30f8 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -6,6 +6,7 @@
6 6
7#include <memory> 7#include <memory>
8#include <string> 8#include <string>
9#include <utility>
9#include <vector> 10#include <vector>
10 11
11#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
@@ -41,12 +42,24 @@ public:
41 return "ServerSession"; 42 return "ServerSession";
42 } 43 }
43 44
44 static const HandleType HANDLE_TYPE = HandleType::ServerSession; 45 std::string GetName() const override {
46 return name;
47 }
48
49 static constexpr HandleType HANDLE_TYPE = HandleType::ServerSession;
45 HandleType GetHandleType() const override { 50 HandleType GetHandleType() const override {
46 return HANDLE_TYPE; 51 return HANDLE_TYPE;
47 } 52 }
48 53
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 54 Session* GetParent() {
55 return parent.get();
56 }
57
58 const Session* GetParent() const {
59 return parent.get();
60 }
61
62 using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 63
51 /** 64 /**
52 * Creates a pair of ServerSession and an associated ClientSession. 65 * Creates a pair of ServerSession and an associated ClientSession.
@@ -74,27 +87,20 @@ public:
74 */ 87 */
75 ResultCode HandleSyncRequest(SharedPtr<Thread> thread); 88 ResultCode HandleSyncRequest(SharedPtr<Thread> thread);
76 89
77 bool ShouldWait(Thread* thread) const override; 90 bool ShouldWait(const Thread* thread) const override;
78 91
79 void Acquire(Thread* thread) override; 92 void Acquire(Thread* thread) override;
80 93
81 std::string name; ///< The name of this session (optional) 94 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 95 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 96
86 /// This is the list of domain request handlers (after conversion to a domain) 97 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 98 /// this ServerSession instance.
99 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
88 100
89 /// List of threads that are pending a response after a sync request. This list is processed in 101 /// Retrieves the total number of domain request handlers that have been
90 /// a LIFO manner, thus, the last request will be dispatched first. 102 /// appended to this ServerSession instance.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test. 103 std::size_t NumDomainRequestHandlers() const;
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93
94 /// Thread whose request is currently being handled. A request is considered "handled" when a
95 /// response is sent via svcReplyAndReceive.
96 /// TODO(Subv): Find a better name for this.
97 SharedPtr<Thread> currently_handling;
98 104
99 /// Returns true if the session has been converted to a domain, otherwise False 105 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 106 bool IsDomain() const {
@@ -129,8 +135,30 @@ private:
129 /// object handle. 135 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 136 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 137
138 /// The parent session, which links to the client endpoint.
139 std::shared_ptr<Session> parent;
140
141 /// This session's HLE request handler (applicable when not a domain)
142 std::shared_ptr<SessionRequestHandler> hle_handler;
143
144 /// This is the list of domain request handlers (after conversion to a domain)
145 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
146
147 /// List of threads that are pending a response after a sync request. This list is processed in
148 /// a LIFO manner, thus, the last request will be dispatched first.
149 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
150 std::vector<SharedPtr<Thread>> pending_requesting_threads;
151
152 /// Thread whose request is currently being handled. A request is considered "handled" when a
153 /// response is sent via svcReplyAndReceive.
154 /// TODO(Subv): Find a better name for this.
155 SharedPtr<Thread> currently_handling;
156
132 /// When set to True, converts the session to a domain at the end of the command 157 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 158 bool convert_to_domain{};
159
160 /// The name of this session (optional)
161 std::string name;
134}; 162};
135 163
136} // namespace Kernel 164} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..f15c5ee36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,11 +6,9 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
13#include "core/memory.h"
14 12
15namespace Kernel { 13namespace Kernel {
16 14
@@ -34,8 +32,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 32 shared_memory->backing_block_offset = 0;
35 33
36 // Refresh the address mappings for the current process. 34 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 35 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 36 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 37 shared_memory->backing_block.get());
40 } 38 }
41 } else { 39 } else {
@@ -120,7 +118,15 @@ ResultCode SharedMemory::Map(Process& target_process, VAddr address, MemoryPermi
120 ConvertPermissions(permissions)); 118 ConvertPermissions(permissions));
121} 119}
122 120
123ResultCode SharedMemory::Unmap(Process& target_process, VAddr address) { 121ResultCode SharedMemory::Unmap(Process& target_process, VAddr address, u64 unmap_size) {
122 if (unmap_size != size) {
123 LOG_ERROR(Kernel,
124 "Invalid size passed to Unmap. Size must be equal to the size of the "
125 "memory managed. Shared memory size=0x{:016X}, Unmap size=0x{:016X}",
126 size, unmap_size);
127 return ERR_INVALID_SIZE;
128 }
129
124 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not 130 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not
125 // mapped to a SharedMemory. 131 // mapped to a SharedMemory.
126 return target_process.VMManager().UnmapRange(address, size); 132 return target_process.VMManager().UnmapRange(address, size);
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index dab2a6bea..c2b6155e1 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -76,7 +76,7 @@ public:
76 return name; 76 return name;
77 } 77 }
78 78
79 static const HandleType HANDLE_TYPE = HandleType::SharedMemory; 79 static constexpr HandleType HANDLE_TYPE = HandleType::SharedMemory;
80 HandleType GetHandleType() const override { 80 HandleType GetHandleType() const override {
81 return HANDLE_TYPE; 81 return HANDLE_TYPE;
82 } 82 }
@@ -104,11 +104,17 @@ public:
104 104
105 /** 105 /**
106 * Unmaps a shared memory block from the specified address in system memory 106 * Unmaps a shared memory block from the specified address in system memory
107 *
107 * @param target_process Process from which to unmap the memory block. 108 * @param target_process Process from which to unmap the memory block.
108 * @param address Address in system memory where the shared memory block is mapped 109 * @param address Address in system memory where the shared memory block is mapped.
110 * @param unmap_size The amount of bytes to unmap from this shared memory instance.
111 *
109 * @return Result code of the unmap operation 112 * @return Result code of the unmap operation
113 *
114 * @pre The given size to unmap must be the same size as the amount of memory managed by
115 * the SharedMemory instance itself, otherwise ERR_INVALID_SIZE will be returned.
110 */ 116 */
111 ResultCode Unmap(Process& target_process, VAddr address); 117 ResultCode Unmap(Process& target_process, VAddr address, u64 unmap_size);
112 118
113 /** 119 /**
114 * Gets a pointer to the shared memory block 120 * Gets a pointer to the shared memory block
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 6588bd3b8..e5d4d6b55 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
@@ -31,6 +32,7 @@
31#include "core/hle/kernel/svc.h" 32#include "core/hle/kernel/svc.h"
32#include "core/hle/kernel/svc_wrap.h" 33#include "core/hle/kernel/svc_wrap.h"
33#include "core/hle/kernel/thread.h" 34#include "core/hle/kernel/thread.h"
35#include "core/hle/kernel/transfer_memory.h"
34#include "core/hle/kernel/writable_event.h" 36#include "core/hle/kernel/writable_event.h"
35#include "core/hle/lock.h" 37#include "core/hle/lock.h"
36#include "core/hle/result.h" 38#include "core/hle/result.h"
@@ -47,23 +49,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address; 49 return address + size > address;
48} 50}
49 51
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// 8 GiB 52// 8 GiB
68constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; 53constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
69 54
@@ -105,14 +90,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 return ERR_INVALID_ADDRESS_STATE; 90 return ERR_INVALID_ADDRESS_STATE;
106 } 91 }
107 92
108 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { 93 if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
109 LOG_ERROR(Kernel_SVC, 94 LOG_ERROR(Kernel_SVC,
110 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 95 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
111 src_addr, size); 96 src_addr, size);
112 return ERR_INVALID_ADDRESS_STATE; 97 return ERR_INVALID_ADDRESS_STATE;
113 } 98 }
114 99
115 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { 100 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
116 LOG_ERROR(Kernel_SVC, 101 LOG_ERROR(Kernel_SVC,
117 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 102 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
118 dst_addr, size); 103 dst_addr, size);
@@ -146,16 +131,15 @@ enum class ResourceLimitValueType {
146 LimitValue, 131 LimitValue,
147}; 132};
148 133
149ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type, 134ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
150 ResourceLimitValueType value_type) { 135 u32 resource_type, ResourceLimitValueType value_type) {
151 const auto type = static_cast<ResourceType>(resource_type); 136 const auto type = static_cast<ResourceType>(resource_type);
152 if (!IsValidResourceType(type)) { 137 if (!IsValidResourceType(type)) {
153 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); 138 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
154 return ERR_INVALID_ENUM_VALUE; 139 return ERR_INVALID_ENUM_VALUE;
155 } 140 }
156 141
157 const auto& kernel = Core::System::GetInstance().Kernel(); 142 const auto* const current_process = system.Kernel().CurrentProcess();
158 const auto* const current_process = kernel.CurrentProcess();
159 ASSERT(current_process != nullptr); 143 ASSERT(current_process != nullptr);
160 144
161 const auto resource_limit_object = 145 const auto resource_limit_object =
@@ -175,7 +159,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_ty
175} // Anonymous namespace 159} // Anonymous namespace
176 160
177/// Set the process heap to a given Size. It can both extend and shrink the heap. 161/// Set the process heap to a given Size. It can both extend and shrink the heap.
178static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) { 162static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
179 LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size); 163 LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
180 164
181 // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB. 165 // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
@@ -190,11 +174,8 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
190 return ERR_INVALID_SIZE; 174 return ERR_INVALID_SIZE;
191 } 175 }
192 176
193 auto& vm_manager = Core::CurrentProcess()->VMManager(); 177 auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
194 const VAddr heap_base = vm_manager.GetHeapRegionBaseAddress(); 178 const auto alloc_result = vm_manager.SetHeapSize(heap_size);
195 const auto alloc_result =
196 vm_manager.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite);
197
198 if (alloc_result.Failed()) { 179 if (alloc_result.Failed()) {
199 return alloc_result.Code(); 180 return alloc_result.Code();
200 } 181 }
@@ -203,7 +184,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
203 return RESULT_SUCCESS; 184 return RESULT_SUCCESS;
204} 185}
205 186
206static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) { 187static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) {
207 LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); 188 LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot);
208 189
209 if (!Common::Is4KBAligned(addr)) { 190 if (!Common::Is4KBAligned(addr)) {
@@ -235,10 +216,10 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
235 return ERR_INVALID_MEMORY_PERMISSIONS; 216 return ERR_INVALID_MEMORY_PERMISSIONS;
236 } 217 }
237 218
238 auto* const current_process = Core::CurrentProcess(); 219 auto* const current_process = system.Kernel().CurrentProcess();
239 auto& vm_manager = current_process->VMManager(); 220 auto& vm_manager = current_process->VMManager();
240 221
241 if (!IsInsideAddressSpace(vm_manager, addr, size)) { 222 if (!vm_manager.IsWithinAddressSpace(addr, size)) {
242 LOG_ERROR(Kernel_SVC, 223 LOG_ERROR(Kernel_SVC,
243 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 224 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
244 size); 225 size);
@@ -260,7 +241,8 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
260 return vm_manager.ReprotectRange(addr, size, converted_permissions); 241 return vm_manager.ReprotectRange(addr, size, converted_permissions);
261} 242}
262 243
263static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attribute) { 244static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
245 u32 attribute) {
264 LOG_DEBUG(Kernel_SVC, 246 LOG_DEBUG(Kernel_SVC,
265 "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address, 247 "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,
266 size, mask, attribute); 248 size, mask, attribute);
@@ -298,8 +280,8 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
298 return ERR_INVALID_COMBINATION; 280 return ERR_INVALID_COMBINATION;
299 } 281 }
300 282
301 auto& vm_manager = Core::CurrentProcess()->VMManager(); 283 auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
302 if (!IsInsideAddressSpace(vm_manager, address, size)) { 284 if (!vm_manager.IsWithinAddressSpace(address, size)) {
303 LOG_ERROR(Kernel_SVC, 285 LOG_ERROR(Kernel_SVC,
304 "Given address (0x{:016X}) is outside the bounds of the address space.", address); 286 "Given address (0x{:016X}) is outside the bounds of the address space.", address);
305 return ERR_INVALID_ADDRESS_STATE; 287 return ERR_INVALID_ADDRESS_STATE;
@@ -309,11 +291,11 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
309} 291}
310 292
311/// Maps a memory range into a different range. 293/// Maps a memory range into a different range.
312static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { 294static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
313 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, 295 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
314 src_addr, size); 296 src_addr, size);
315 297
316 auto& vm_manager = Core::CurrentProcess()->VMManager(); 298 auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
317 const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); 299 const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
318 300
319 if (result.IsError()) { 301 if (result.IsError()) {
@@ -324,11 +306,11 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
324} 306}
325 307
326/// Unmaps a region that was previously mapped with svcMapMemory 308/// Unmaps a region that was previously mapped with svcMapMemory
327static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { 309static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
328 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, 310 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
329 src_addr, size); 311 src_addr, size);
330 312
331 auto& vm_manager = Core::CurrentProcess()->VMManager(); 313 auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
332 const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); 314 const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
333 315
334 if (result.IsError()) { 316 if (result.IsError()) {
@@ -339,7 +321,8 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
339} 321}
340 322
341/// Connect to an OS service given the port name, returns the handle to the port to out 323/// Connect to an OS service given the port name, returns the handle to the port to out
342static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address) { 324static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
325 VAddr port_name_address) {
343 if (!Memory::IsValidVirtualAddress(port_name_address)) { 326 if (!Memory::IsValidVirtualAddress(port_name_address)) {
344 LOG_ERROR(Kernel_SVC, 327 LOG_ERROR(Kernel_SVC,
345 "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", 328 "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
@@ -358,8 +341,8 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
358 341
359 LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); 342 LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
360 343
361 auto& kernel = Core::System::GetInstance().Kernel(); 344 auto& kernel = system.Kernel();
362 auto it = kernel.FindNamedPort(port_name); 345 const auto it = kernel.FindNamedPort(port_name);
363 if (!kernel.IsValidNamedPort(it)) { 346 if (!kernel.IsValidNamedPort(it)) {
364 LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); 347 LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
365 return ERR_NOT_FOUND; 348 return ERR_NOT_FOUND;
@@ -371,14 +354,14 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
371 CASCADE_RESULT(client_session, client_port->Connect()); 354 CASCADE_RESULT(client_session, client_port->Connect());
372 355
373 // Return the client session 356 // Return the client session
374 auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 357 auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
375 CASCADE_RESULT(*out_handle, handle_table.Create(client_session)); 358 CASCADE_RESULT(*out_handle, handle_table.Create(client_session));
376 return RESULT_SUCCESS; 359 return RESULT_SUCCESS;
377} 360}
378 361
379/// Makes a blocking IPC call to an OS service. 362/// Makes a blocking IPC call to an OS service.
380static ResultCode SendSyncRequest(Handle handle) { 363static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
381 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 364 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
382 SharedPtr<ClientSession> session = handle_table.Get<ClientSession>(handle); 365 SharedPtr<ClientSession> session = handle_table.Get<ClientSession>(handle);
383 if (!session) { 366 if (!session) {
384 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); 367 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
@@ -387,18 +370,18 @@ static ResultCode SendSyncRequest(Handle handle) {
387 370
388 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); 371 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
389 372
390 Core::System::GetInstance().PrepareReschedule(); 373 system.PrepareReschedule();
391 374
392 // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server 375 // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server
393 // responds and cause a reschedule. 376 // responds and cause a reschedule.
394 return session->SendSyncRequest(GetCurrentThread()); 377 return session->SendSyncRequest(system.CurrentScheduler().GetCurrentThread());
395} 378}
396 379
397/// Get the ID for the specified thread. 380/// Get the ID for the specified thread.
398static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) { 381static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) {
399 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); 382 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
400 383
401 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 384 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
402 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 385 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
403 if (!thread) { 386 if (!thread) {
404 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", thread_handle); 387 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", thread_handle);
@@ -410,10 +393,10 @@ static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) {
410} 393}
411 394
412/// Gets the ID of the specified process or a specified thread's owning process. 395/// Gets the ID of the specified process or a specified thread's owning process.
413static ResultCode GetProcessId(u64* process_id, Handle handle) { 396static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) {
414 LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); 397 LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle);
415 398
416 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 399 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
417 const SharedPtr<Process> process = handle_table.Get<Process>(handle); 400 const SharedPtr<Process> process = handle_table.Get<Process>(handle);
418 if (process) { 401 if (process) {
419 *process_id = process->GetProcessID(); 402 *process_id = process->GetProcessID();
@@ -455,8 +438,8 @@ static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thr
455}; 438};
456 439
457/// Wait for the given handles to synchronize, timeout after the specified nanoseconds 440/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
458static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 handle_count, 441static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
459 s64 nano_seconds) { 442 u64 handle_count, s64 nano_seconds) {
460 LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}", 443 LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}",
461 handles_address, handle_count, nano_seconds); 444 handles_address, handle_count, nano_seconds);
462 445
@@ -475,11 +458,11 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
475 return ERR_OUT_OF_RANGE; 458 return ERR_OUT_OF_RANGE;
476 } 459 }
477 460
478 auto* const thread = GetCurrentThread(); 461 auto* const thread = system.CurrentScheduler().GetCurrentThread();
479 462
480 using ObjectPtr = Thread::ThreadWaitObjects::value_type; 463 using ObjectPtr = Thread::ThreadWaitObjects::value_type;
481 Thread::ThreadWaitObjects objects(handle_count); 464 Thread::ThreadWaitObjects objects(handle_count);
482 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 465 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
483 466
484 for (u64 i = 0; i < handle_count; ++i) { 467 for (u64 i = 0; i < handle_count; ++i) {
485 const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle)); 468 const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle));
@@ -525,16 +508,16 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
525 thread->WakeAfterDelay(nano_seconds); 508 thread->WakeAfterDelay(nano_seconds);
526 thread->SetWakeupCallback(DefaultThreadWakeupCallback); 509 thread->SetWakeupCallback(DefaultThreadWakeupCallback);
527 510
528 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); 511 system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
529 512
530 return RESULT_TIMEOUT; 513 return RESULT_TIMEOUT;
531} 514}
532 515
533/// Resumes a thread waiting on WaitSynchronization 516/// Resumes a thread waiting on WaitSynchronization
534static ResultCode CancelSynchronization(Handle thread_handle) { 517static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) {
535 LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); 518 LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle);
536 519
537 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 520 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
538 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 521 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
539 if (!thread) { 522 if (!thread) {
540 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", 523 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -549,8 +532,8 @@ static ResultCode CancelSynchronization(Handle thread_handle) {
549} 532}
550 533
551/// Attempts to locks a mutex, creating it if it does not already exist 534/// Attempts to locks a mutex, creating it if it does not already exist
552static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr, 535static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
553 Handle requesting_thread_handle) { 536 VAddr mutex_addr, Handle requesting_thread_handle) {
554 LOG_TRACE(Kernel_SVC, 537 LOG_TRACE(Kernel_SVC,
555 "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, " 538 "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, "
556 "requesting_current_thread_handle=0x{:08X}", 539 "requesting_current_thread_handle=0x{:08X}",
@@ -567,13 +550,13 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
567 return ERR_INVALID_ADDRESS; 550 return ERR_INVALID_ADDRESS;
568 } 551 }
569 552
570 auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 553 auto* const current_process = system.Kernel().CurrentProcess();
571 return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle, 554 return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
572 requesting_thread_handle); 555 requesting_thread_handle);
573} 556}
574 557
575/// Unlock a mutex 558/// Unlock a mutex
576static ResultCode ArbitrateUnlock(VAddr mutex_addr) { 559static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
577 LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); 560 LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
578 561
579 if (Memory::IsKernelVirtualAddress(mutex_addr)) { 562 if (Memory::IsKernelVirtualAddress(mutex_addr)) {
@@ -587,7 +570,8 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
587 return ERR_INVALID_ADDRESS; 570 return ERR_INVALID_ADDRESS;
588 } 571 }
589 572
590 return Mutex::Release(mutex_addr); 573 auto* const current_process = system.Kernel().CurrentProcess();
574 return current_process->GetMutex().Release(mutex_addr);
591} 575}
592 576
593enum class BreakType : u32 { 577enum class BreakType : u32 {
@@ -597,6 +581,7 @@ enum class BreakType : u32 {
597 PostNROLoad = 4, 581 PostNROLoad = 4,
598 PreNROUnload = 5, 582 PreNROUnload = 5,
599 PostNROUnload = 6, 583 PostNROUnload = 6,
584 CppException = 7,
600}; 585};
601 586
602struct BreakReason { 587struct BreakReason {
@@ -608,7 +593,7 @@ struct BreakReason {
608}; 593};
609 594
610/// Break program execution 595/// Break program execution
611static void Break(u32 reason, u64 info1, u64 info2) { 596static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
612 BreakReason break_reason{reason}; 597 BreakReason break_reason{reason};
613 bool has_dumped_buffer{}; 598 bool has_dumped_buffer{};
614 599
@@ -669,6 +654,9 @@ static void Break(u32 reason, u64 info1, u64 info2) {
669 "Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1, 654 "Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1,
670 info2); 655 info2);
671 break; 656 break;
657 case BreakType::CppException:
658 LOG_CRITICAL(Debug_Emulated, "Signalling debugger. Uncaught C++ exception encountered.");
659 break;
672 default: 660 default:
673 LOG_WARNING( 661 LOG_WARNING(
674 Debug_Emulated, 662 Debug_Emulated,
@@ -683,22 +671,24 @@ static void Break(u32 reason, u64 info1, u64 info2) {
683 Debug_Emulated, 671 Debug_Emulated,
684 "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", 672 "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
685 reason, info1, info2); 673 reason, info1, info2);
674
686 handle_debug_buffer(info1, info2); 675 handle_debug_buffer(info1, info2);
687 Core::System::GetInstance() 676
688 .ArmInterface(static_cast<std::size_t>(GetCurrentThread()->GetProcessorID())) 677 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
689 .LogBacktrace(); 678 const auto thread_processor_id = current_thread->GetProcessorID();
679 system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
690 ASSERT(false); 680 ASSERT(false);
691 681
692 Core::CurrentProcess()->PrepareForTermination(); 682 system.Kernel().CurrentProcess()->PrepareForTermination();
693 683
694 // Kill the current thread 684 // Kill the current thread
695 GetCurrentThread()->Stop(); 685 current_thread->Stop();
696 Core::System::GetInstance().PrepareReschedule(); 686 system.PrepareReschedule();
697 } 687 }
698} 688}
699 689
700/// Used to output a message on a debug hardware unit - does nothing on a retail unit 690/// Used to output a message on a debug hardware unit - does nothing on a retail unit
701static void OutputDebugString(VAddr address, u64 len) { 691static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) {
702 if (len == 0) { 692 if (len == 0) {
703 return; 693 return;
704 } 694 }
@@ -709,7 +699,8 @@ static void OutputDebugString(VAddr address, u64 len) {
709} 699}
710 700
711/// Gets system/memory information for the current process 701/// Gets system/memory information for the current process
712static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) { 702static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,
703 u64 info_sub_id) {
713 LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id, 704 LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
714 info_sub_id, handle); 705 info_sub_id, handle);
715 706
@@ -722,7 +713,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
722 HeapRegionBaseAddr = 4, 713 HeapRegionBaseAddr = 4,
723 HeapRegionSize = 5, 714 HeapRegionSize = 5,
724 TotalMemoryUsage = 6, 715 TotalMemoryUsage = 6,
725 TotalHeapUsage = 7, 716 TotalPhysicalMemoryUsed = 7,
726 IsCurrentProcessBeingDebugged = 8, 717 IsCurrentProcessBeingDebugged = 8,
727 RegisterResourceLimit = 9, 718 RegisterResourceLimit = 9,
728 IdleTickCount = 10, 719 IdleTickCount = 10,
@@ -758,7 +749,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
758 case GetInfoType::NewMapRegionBaseAddr: 749 case GetInfoType::NewMapRegionBaseAddr:
759 case GetInfoType::NewMapRegionSize: 750 case GetInfoType::NewMapRegionSize:
760 case GetInfoType::TotalMemoryUsage: 751 case GetInfoType::TotalMemoryUsage:
761 case GetInfoType::TotalHeapUsage: 752 case GetInfoType::TotalPhysicalMemoryUsed:
762 case GetInfoType::IsVirtualAddressMemoryEnabled: 753 case GetInfoType::IsVirtualAddressMemoryEnabled:
763 case GetInfoType::PersonalMmHeapUsage: 754 case GetInfoType::PersonalMmHeapUsage:
764 case GetInfoType::TitleId: 755 case GetInfoType::TitleId:
@@ -767,7 +758,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
767 return ERR_INVALID_ENUM_VALUE; 758 return ERR_INVALID_ENUM_VALUE;
768 } 759 }
769 760
770 const auto& current_process_handle_table = Core::CurrentProcess()->GetHandleTable(); 761 const auto& current_process_handle_table =
762 system.Kernel().CurrentProcess()->GetHandleTable();
771 const auto process = current_process_handle_table.Get<Process>(static_cast<Handle>(handle)); 763 const auto process = current_process_handle_table.Get<Process>(static_cast<Handle>(handle));
772 if (!process) { 764 if (!process) {
773 return ERR_INVALID_HANDLE; 765 return ERR_INVALID_HANDLE;
@@ -818,8 +810,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
818 *result = process->VMManager().GetTotalMemoryUsage(); 810 *result = process->VMManager().GetTotalMemoryUsage();
819 return RESULT_SUCCESS; 811 return RESULT_SUCCESS;
820 812
821 case GetInfoType::TotalHeapUsage: 813 case GetInfoType::TotalPhysicalMemoryUsed:
822 *result = process->VMManager().GetTotalHeapUsage(); 814 *result = process->GetTotalPhysicalMemoryUsed();
823 return RESULT_SUCCESS; 815 return RESULT_SUCCESS;
824 816
825 case GetInfoType::IsVirtualAddressMemoryEnabled: 817 case GetInfoType::IsVirtualAddressMemoryEnabled:
@@ -857,7 +849,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
857 return ERR_INVALID_COMBINATION; 849 return ERR_INVALID_COMBINATION;
858 } 850 }
859 851
860 Process* const current_process = Core::CurrentProcess(); 852 Process* const current_process = system.Kernel().CurrentProcess();
861 HandleTable& handle_table = current_process->GetHandleTable(); 853 HandleTable& handle_table = current_process->GetHandleTable();
862 const auto resource_limit = current_process->GetResourceLimit(); 854 const auto resource_limit = current_process->GetResourceLimit();
863 if (!resource_limit) { 855 if (!resource_limit) {
@@ -888,7 +880,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
888 return ERR_INVALID_COMBINATION; 880 return ERR_INVALID_COMBINATION;
889 } 881 }
890 882
891 *result = Core::CurrentProcess()->GetRandomEntropy(info_sub_id); 883 *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
892 return RESULT_SUCCESS; 884 return RESULT_SUCCESS;
893 885
894 case GetInfoType::PrivilegedProcessId: 886 case GetInfoType::PrivilegedProcessId:
@@ -905,15 +897,15 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
905 return ERR_INVALID_COMBINATION; 897 return ERR_INVALID_COMBINATION;
906 } 898 }
907 899
908 const auto thread = 900 const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<Thread>(
909 Core::CurrentProcess()->GetHandleTable().Get<Thread>(static_cast<Handle>(handle)); 901 static_cast<Handle>(handle));
910 if (!thread) { 902 if (!thread) {
911 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", 903 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
912 static_cast<Handle>(handle)); 904 static_cast<Handle>(handle));
913 return ERR_INVALID_HANDLE; 905 return ERR_INVALID_HANDLE;
914 } 906 }
915 907
916 const auto& system = Core::System::GetInstance(); 908 const auto& core_timing = system.CoreTiming();
917 const auto& scheduler = system.CurrentScheduler(); 909 const auto& scheduler = system.CurrentScheduler();
918 const auto* const current_thread = scheduler.GetCurrentThread(); 910 const auto* const current_thread = scheduler.GetCurrentThread();
919 const bool same_thread = current_thread == thread; 911 const bool same_thread = current_thread == thread;
@@ -923,9 +915,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
923 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { 915 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
924 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); 916 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
925 917
926 out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks); 918 out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
927 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { 919 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
928 out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks; 920 out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
929 } 921 }
930 922
931 *result = out_ticks; 923 *result = out_ticks;
@@ -939,13 +931,13 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
939} 931}
940 932
941/// Sets the thread activity 933/// Sets the thread activity
942static ResultCode SetThreadActivity(Handle handle, u32 activity) { 934static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
943 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); 935 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
944 if (activity > static_cast<u32>(ThreadActivity::Paused)) { 936 if (activity > static_cast<u32>(ThreadActivity::Paused)) {
945 return ERR_INVALID_ENUM_VALUE; 937 return ERR_INVALID_ENUM_VALUE;
946 } 938 }
947 939
948 const auto* current_process = Core::CurrentProcess(); 940 const auto* current_process = system.Kernel().CurrentProcess();
949 const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle); 941 const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
950 if (!thread) { 942 if (!thread) {
951 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); 943 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -962,7 +954,7 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) {
962 return ERR_INVALID_HANDLE; 954 return ERR_INVALID_HANDLE;
963 } 955 }
964 956
965 if (thread == GetCurrentThread()) { 957 if (thread == system.CurrentScheduler().GetCurrentThread()) {
966 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); 958 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
967 return ERR_BUSY; 959 return ERR_BUSY;
968 } 960 }
@@ -972,10 +964,10 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) {
972} 964}
973 965
974/// Gets the thread context 966/// Gets the thread context
975static ResultCode GetThreadContext(VAddr thread_context, Handle handle) { 967static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, Handle handle) {
976 LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle); 968 LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle);
977 969
978 const auto* current_process = Core::CurrentProcess(); 970 const auto* current_process = system.Kernel().CurrentProcess();
979 const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle); 971 const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
980 if (!thread) { 972 if (!thread) {
981 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); 973 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -992,7 +984,7 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
992 return ERR_INVALID_HANDLE; 984 return ERR_INVALID_HANDLE;
993 } 985 }
994 986
995 if (thread == GetCurrentThread()) { 987 if (thread == system.CurrentScheduler().GetCurrentThread()) {
996 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); 988 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
997 return ERR_BUSY; 989 return ERR_BUSY;
998 } 990 }
@@ -1013,10 +1005,10 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
1013} 1005}
1014 1006
1015/// Gets the priority for the specified thread 1007/// Gets the priority for the specified thread
1016static ResultCode GetThreadPriority(u32* priority, Handle handle) { 1008static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) {
1017 LOG_TRACE(Kernel_SVC, "called"); 1009 LOG_TRACE(Kernel_SVC, "called");
1018 1010
1019 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1011 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1020 const SharedPtr<Thread> thread = handle_table.Get<Thread>(handle); 1012 const SharedPtr<Thread> thread = handle_table.Get<Thread>(handle);
1021 if (!thread) { 1013 if (!thread) {
1022 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); 1014 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -1028,7 +1020,7 @@ static ResultCode GetThreadPriority(u32* priority, Handle handle) {
1028} 1020}
1029 1021
1030/// Sets the priority for the specified thread 1022/// Sets the priority for the specified thread
1031static ResultCode SetThreadPriority(Handle handle, u32 priority) { 1023static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) {
1032 LOG_TRACE(Kernel_SVC, "called"); 1024 LOG_TRACE(Kernel_SVC, "called");
1033 1025
1034 if (priority > THREADPRIO_LOWEST) { 1026 if (priority > THREADPRIO_LOWEST) {
@@ -1039,7 +1031,7 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
1039 return ERR_INVALID_THREAD_PRIORITY; 1031 return ERR_INVALID_THREAD_PRIORITY;
1040 } 1032 }
1041 1033
1042 const auto* const current_process = Core::CurrentProcess(); 1034 const auto* const current_process = system.Kernel().CurrentProcess();
1043 1035
1044 SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle); 1036 SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
1045 if (!thread) { 1037 if (!thread) {
@@ -1049,18 +1041,18 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
1049 1041
1050 thread->SetPriority(priority); 1042 thread->SetPriority(priority);
1051 1043
1052 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); 1044 system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
1053 return RESULT_SUCCESS; 1045 return RESULT_SUCCESS;
1054} 1046}
1055 1047
1056/// Get which CPU core is executing the current thread 1048/// Get which CPU core is executing the current thread
1057static u32 GetCurrentProcessorNumber() { 1049static u32 GetCurrentProcessorNumber(Core::System& system) {
1058 LOG_TRACE(Kernel_SVC, "called"); 1050 LOG_TRACE(Kernel_SVC, "called");
1059 return GetCurrentThread()->GetProcessorID(); 1051 return system.CurrentScheduler().GetCurrentThread()->GetProcessorID();
1060} 1052}
1061 1053
1062static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size, 1054static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
1063 u32 permissions) { 1055 u64 size, u32 permissions) {
1064 LOG_TRACE(Kernel_SVC, 1056 LOG_TRACE(Kernel_SVC,
1065 "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}", 1057 "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
1066 shared_memory_handle, addr, size, permissions); 1058 shared_memory_handle, addr, size, permissions);
@@ -1094,7 +1086,7 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
1094 return ERR_INVALID_MEMORY_PERMISSIONS; 1086 return ERR_INVALID_MEMORY_PERMISSIONS;
1095 } 1087 }
1096 1088
1097 auto* const current_process = Core::CurrentProcess(); 1089 auto* const current_process = system.Kernel().CurrentProcess();
1098 auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle); 1090 auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle);
1099 if (!shared_memory) { 1091 if (!shared_memory) {
1100 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", 1092 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
@@ -1112,7 +1104,8 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
1112 return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare); 1104 return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare);
1113} 1105}
1114 1106
1115static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) { 1107static ResultCode UnmapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
1108 u64 size) {
1116 LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}", 1109 LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}",
1117 shared_memory_handle, addr, size); 1110 shared_memory_handle, addr, size);
1118 1111
@@ -1137,7 +1130,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
1137 return ERR_INVALID_ADDRESS_STATE; 1130 return ERR_INVALID_ADDRESS_STATE;
1138 } 1131 }
1139 1132
1140 auto* const current_process = Core::CurrentProcess(); 1133 auto* const current_process = system.Kernel().CurrentProcess();
1141 auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle); 1134 auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle);
1142 if (!shared_memory) { 1135 if (!shared_memory) {
1143 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", 1136 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
@@ -1152,13 +1145,14 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
1152 return ERR_INVALID_MEMORY_RANGE; 1145 return ERR_INVALID_MEMORY_RANGE;
1153 } 1146 }
1154 1147
1155 return shared_memory->Unmap(*current_process, addr); 1148 return shared_memory->Unmap(*current_process, addr, size);
1156} 1149}
1157 1150
1158static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address, 1151static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
1159 Handle process_handle, VAddr address) { 1152 VAddr page_info_address, Handle process_handle,
1153 VAddr address) {
1160 LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address); 1154 LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);
1161 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1155 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1162 SharedPtr<Process> process = handle_table.Get<Process>(process_handle); 1156 SharedPtr<Process> process = handle_table.Get<Process>(process_handle);
1163 if (!process) { 1157 if (!process) {
1164 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", 1158 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
@@ -1184,20 +1178,20 @@ static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_
1184 return RESULT_SUCCESS; 1178 return RESULT_SUCCESS;
1185} 1179}
1186 1180
1187static ResultCode QueryMemory(VAddr memory_info_address, VAddr page_info_address, 1181static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address,
1188 VAddr query_address) { 1182 VAddr page_info_address, VAddr query_address) {
1189 LOG_TRACE(Kernel_SVC, 1183 LOG_TRACE(Kernel_SVC,
1190 "called, memory_info_address=0x{:016X}, page_info_address=0x{:016X}, " 1184 "called, memory_info_address=0x{:016X}, page_info_address=0x{:016X}, "
1191 "query_address=0x{:016X}", 1185 "query_address=0x{:016X}",
1192 memory_info_address, page_info_address, query_address); 1186 memory_info_address, page_info_address, query_address);
1193 1187
1194 return QueryProcessMemory(memory_info_address, page_info_address, CurrentProcess, 1188 return QueryProcessMemory(system, memory_info_address, page_info_address, CurrentProcess,
1195 query_address); 1189 query_address);
1196} 1190}
1197 1191
1198/// Exits the current process 1192/// Exits the current process
1199static void ExitProcess() { 1193static void ExitProcess(Core::System& system) {
1200 auto* current_process = Core::CurrentProcess(); 1194 auto* current_process = system.Kernel().CurrentProcess();
1201 1195
1202 LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); 1196 LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
1203 ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, 1197 ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
@@ -1206,20 +1200,20 @@ static void ExitProcess() {
1206 current_process->PrepareForTermination(); 1200 current_process->PrepareForTermination();
1207 1201
1208 // Kill the current thread 1202 // Kill the current thread
1209 GetCurrentThread()->Stop(); 1203 system.CurrentScheduler().GetCurrentThread()->Stop();
1210 1204
1211 Core::System::GetInstance().PrepareReschedule(); 1205 system.PrepareReschedule();
1212} 1206}
1213 1207
1214/// Creates a new thread 1208/// Creates a new thread
1215static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top, 1209static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
1216 u32 priority, s32 processor_id) { 1210 VAddr stack_top, u32 priority, s32 processor_id) {
1217 LOG_TRACE(Kernel_SVC, 1211 LOG_TRACE(Kernel_SVC,
1218 "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, " 1212 "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
1219 "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}", 1213 "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
1220 entry_point, arg, stack_top, priority, processor_id, *out_handle); 1214 entry_point, arg, stack_top, priority, processor_id, *out_handle);
1221 1215
1222 auto* const current_process = Core::CurrentProcess(); 1216 auto* const current_process = system.Kernel().CurrentProcess();
1223 1217
1224 if (processor_id == THREADPROCESSORID_IDEAL) { 1218 if (processor_id == THREADPROCESSORID_IDEAL) {
1225 // Set the target CPU to the one specified by the process. 1219 // Set the target CPU to the one specified by the process.
@@ -1251,7 +1245,7 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
1251 } 1245 }
1252 1246
1253 const std::string name = fmt::format("thread-{:X}", entry_point); 1247 const std::string name = fmt::format("thread-{:X}", entry_point);
1254 auto& kernel = Core::System::GetInstance().Kernel(); 1248 auto& kernel = system.Kernel();
1255 CASCADE_RESULT(SharedPtr<Thread> thread, 1249 CASCADE_RESULT(SharedPtr<Thread> thread,
1256 Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top, 1250 Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top,
1257 *current_process)); 1251 *current_process));
@@ -1265,16 +1259,16 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
1265 thread->SetGuestHandle(*new_guest_handle); 1259 thread->SetGuestHandle(*new_guest_handle);
1266 *out_handle = *new_guest_handle; 1260 *out_handle = *new_guest_handle;
1267 1261
1268 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); 1262 system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
1269 1263
1270 return RESULT_SUCCESS; 1264 return RESULT_SUCCESS;
1271} 1265}
1272 1266
1273/// Starts the thread for the provided handle 1267/// Starts the thread for the provided handle
1274static ResultCode StartThread(Handle thread_handle) { 1268static ResultCode StartThread(Core::System& system, Handle thread_handle) {
1275 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); 1269 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
1276 1270
1277 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1271 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1278 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1272 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1279 if (!thread) { 1273 if (!thread) {
1280 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", 1274 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1287,22 +1281,24 @@ static ResultCode StartThread(Handle thread_handle) {
1287 thread->ResumeFromWait(); 1281 thread->ResumeFromWait();
1288 1282
1289 if (thread->GetStatus() == ThreadStatus::Ready) { 1283 if (thread->GetStatus() == ThreadStatus::Ready) {
1290 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); 1284 system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
1291 } 1285 }
1292 1286
1293 return RESULT_SUCCESS; 1287 return RESULT_SUCCESS;
1294} 1288}
1295 1289
1296/// Called when a thread exits 1290/// Called when a thread exits
1297static void ExitThread() { 1291static void ExitThread(Core::System& system) {
1298 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC()); 1292 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
1299 1293
1300 ExitCurrentThread(); 1294 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
1301 Core::System::GetInstance().PrepareReschedule(); 1295 current_thread->Stop();
1296 system.CurrentScheduler().RemoveThread(current_thread);
1297 system.PrepareReschedule();
1302} 1298}
1303 1299
1304/// Sleep the current thread 1300/// Sleep the current thread
1305static void SleepThread(s64 nanoseconds) { 1301static void SleepThread(Core::System& system, s64 nanoseconds) {
1306 LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); 1302 LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
1307 1303
1308 enum class SleepType : s64 { 1304 enum class SleepType : s64 {
@@ -1311,72 +1307,91 @@ static void SleepThread(s64 nanoseconds) {
1311 YieldAndWaitForLoadBalancing = -2, 1307 YieldAndWaitForLoadBalancing = -2,
1312 }; 1308 };
1313 1309
1310 auto& scheduler = system.CurrentScheduler();
1311 auto* const current_thread = scheduler.GetCurrentThread();
1312
1314 if (nanoseconds <= 0) { 1313 if (nanoseconds <= 0) {
1315 auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
1316 switch (static_cast<SleepType>(nanoseconds)) { 1314 switch (static_cast<SleepType>(nanoseconds)) {
1317 case SleepType::YieldWithoutLoadBalancing: 1315 case SleepType::YieldWithoutLoadBalancing:
1318 scheduler.YieldWithoutLoadBalancing(GetCurrentThread()); 1316 scheduler.YieldWithoutLoadBalancing(current_thread);
1319 break; 1317 break;
1320 case SleepType::YieldWithLoadBalancing: 1318 case SleepType::YieldWithLoadBalancing:
1321 scheduler.YieldWithLoadBalancing(GetCurrentThread()); 1319 scheduler.YieldWithLoadBalancing(current_thread);
1322 break; 1320 break;
1323 case SleepType::YieldAndWaitForLoadBalancing: 1321 case SleepType::YieldAndWaitForLoadBalancing:
1324 scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread()); 1322 scheduler.YieldAndWaitForLoadBalancing(current_thread);
1325 break; 1323 break;
1326 default: 1324 default:
1327 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); 1325 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
1328 } 1326 }
1329 } else { 1327 } else {
1330 // Sleep current thread and check for next thread to schedule 1328 current_thread->Sleep(nanoseconds);
1331 WaitCurrentThread_Sleep();
1332
1333 // Create an event to wake the thread up after the specified nanosecond delay has passed
1334 GetCurrentThread()->WakeAfterDelay(nanoseconds);
1335 } 1329 }
1336 1330
1337 // Reschedule all CPU cores 1331 // Reschedule all CPU cores
1338 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) 1332 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
1339 Core::System::GetInstance().CpuCore(i).PrepareReschedule(); 1333 system.CpuCore(i).PrepareReschedule();
1334 }
1340} 1335}
1341 1336
1342/// Wait process wide key atomic 1337/// Wait process wide key atomic
1343static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr, 1338static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr,
1344 Handle thread_handle, s64 nano_seconds) { 1339 VAddr condition_variable_addr, Handle thread_handle,
1340 s64 nano_seconds) {
1345 LOG_TRACE( 1341 LOG_TRACE(
1346 Kernel_SVC, 1342 Kernel_SVC,
1347 "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}", 1343 "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
1348 mutex_addr, condition_variable_addr, thread_handle, nano_seconds); 1344 mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
1349 1345
1350 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1346 if (Memory::IsKernelVirtualAddress(mutex_addr)) {
1347 LOG_ERROR(
1348 Kernel_SVC,
1349 "Given mutex address must not be within the kernel address space. address=0x{:016X}",
1350 mutex_addr);
1351 return ERR_INVALID_ADDRESS_STATE;
1352 }
1353
1354 if (!Common::IsWordAligned(mutex_addr)) {
1355 LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}",
1356 mutex_addr);
1357 return ERR_INVALID_ADDRESS;
1358 }
1359
1360 auto* const current_process = system.Kernel().CurrentProcess();
1361 const auto& handle_table = current_process->GetHandleTable();
1351 SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1362 SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1352 ASSERT(thread); 1363 ASSERT(thread);
1353 1364
1354 CASCADE_CODE(Mutex::Release(mutex_addr)); 1365 const auto release_result = current_process->GetMutex().Release(mutex_addr);
1366 if (release_result.IsError()) {
1367 return release_result;
1368 }
1355 1369
1356 SharedPtr<Thread> current_thread = GetCurrentThread(); 1370 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
1357 current_thread->SetCondVarWaitAddress(condition_variable_addr); 1371 current_thread->SetCondVarWaitAddress(condition_variable_addr);
1358 current_thread->SetMutexWaitAddress(mutex_addr); 1372 current_thread->SetMutexWaitAddress(mutex_addr);
1359 current_thread->SetWaitHandle(thread_handle); 1373 current_thread->SetWaitHandle(thread_handle);
1360 current_thread->SetStatus(ThreadStatus::WaitMutex); 1374 current_thread->SetStatus(ThreadStatus::WaitCondVar);
1361 current_thread->InvalidateWakeupCallback(); 1375 current_thread->InvalidateWakeupCallback();
1362 1376
1363 current_thread->WakeAfterDelay(nano_seconds); 1377 current_thread->WakeAfterDelay(nano_seconds);
1364 1378
1365 // Note: Deliberately don't attempt to inherit the lock owner's priority. 1379 // Note: Deliberately don't attempt to inherit the lock owner's priority.
1366 1380
1367 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); 1381 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
1368 return RESULT_SUCCESS; 1382 return RESULT_SUCCESS;
1369} 1383}
1370 1384
1371/// Signal process wide key 1385/// Signal process wide key
1372static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target) { 1386static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
1387 s32 target) {
1373 LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", 1388 LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
1374 condition_variable_addr, target); 1389 condition_variable_addr, target);
1375 1390
1376 const auto RetrieveWaitingThreads = [](std::size_t core_index, 1391 const auto RetrieveWaitingThreads = [&system](std::size_t core_index,
1377 std::vector<SharedPtr<Thread>>& waiting_threads, 1392 std::vector<SharedPtr<Thread>>& waiting_threads,
1378 VAddr condvar_addr) { 1393 VAddr condvar_addr) {
1379 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index); 1394 const auto& scheduler = system.Scheduler(core_index);
1380 const auto& thread_list = scheduler.GetThreadList(); 1395 const auto& thread_list = scheduler.GetThreadList();
1381 1396
1382 for (const auto& thread : thread_list) { 1397 for (const auto& thread : thread_list) {
@@ -1401,10 +1416,10 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1401 // them all. 1416 // them all.
1402 std::size_t last = waiting_threads.size(); 1417 std::size_t last = waiting_threads.size();
1403 if (target != -1) 1418 if (target != -1)
1404 last = target; 1419 last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
1405 1420
1406 // If there are no threads waiting on this condition variable, just exit 1421 // If there are no threads waiting on this condition variable, just exit
1407 if (last > waiting_threads.size()) 1422 if (last == 0)
1408 return RESULT_SUCCESS; 1423 return RESULT_SUCCESS;
1409 1424
1410 for (std::size_t index = 0; index < last; ++index) { 1425 for (std::size_t index = 0; index < last; ++index) {
@@ -1412,9 +1427,11 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1412 1427
1413 ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr); 1428 ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
1414 1429
1415 std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex(); 1430 // liberate Cond Var Thread.
1431 thread->SetCondVarWaitAddress(0);
1416 1432
1417 auto& monitor = Core::System::GetInstance().Monitor(); 1433 const std::size_t current_core = system.CurrentCoreIndex();
1434 auto& monitor = system.Monitor();
1418 1435
1419 // Atomically read the value of the mutex. 1436 // Atomically read the value of the mutex.
1420 u32 mutex_val = 0; 1437 u32 mutex_val = 0;
@@ -1430,10 +1447,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1430 } 1447 }
1431 } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(), 1448 } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
1432 thread->GetWaitHandle())); 1449 thread->GetWaitHandle()));
1433
1434 if (mutex_val == 0) { 1450 if (mutex_val == 0) {
1435 // We were able to acquire the mutex, resume this thread. 1451 // We were able to acquire the mutex, resume this thread.
1436 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 1452 ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
1437 thread->ResumeFromWait(); 1453 thread->ResumeFromWait();
1438 1454
1439 auto* const lock_owner = thread->GetLockOwner(); 1455 auto* const lock_owner = thread->GetLockOwner();
@@ -1443,8 +1459,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1443 1459
1444 thread->SetLockOwner(nullptr); 1460 thread->SetLockOwner(nullptr);
1445 thread->SetMutexWaitAddress(0); 1461 thread->SetMutexWaitAddress(0);
1446 thread->SetCondVarWaitAddress(0);
1447 thread->SetWaitHandle(0); 1462 thread->SetWaitHandle(0);
1463 system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
1448 } else { 1464 } else {
1449 // Atomically signal that the mutex now has a waiting thread. 1465 // Atomically signal that the mutex now has a waiting thread.
1450 do { 1466 do {
@@ -1460,15 +1476,14 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1460 1476
1461 // The mutex is already owned by some other thread, make this thread wait on it. 1477 // The mutex is already owned by some other thread, make this thread wait on it.
1462 const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); 1478 const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
1463 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1479 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1464 auto owner = handle_table.Get<Thread>(owner_handle); 1480 auto owner = handle_table.Get<Thread>(owner_handle);
1465 ASSERT(owner); 1481 ASSERT(owner);
1466 ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); 1482 ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
1467 thread->InvalidateWakeupCallback(); 1483 thread->InvalidateWakeupCallback();
1484 thread->SetStatus(ThreadStatus::WaitMutex);
1468 1485
1469 owner->AddMutexWaiter(thread); 1486 owner->AddMutexWaiter(thread);
1470
1471 Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
1472 } 1487 }
1473 } 1488 }
1474 1489
@@ -1476,93 +1491,77 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
1476} 1491}
1477 1492
1478// Wait for an address (via Address Arbiter) 1493// Wait for an address (via Address Arbiter)
1479static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) { 1494static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1495 s64 timeout) {
1480 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", 1496 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
1481 address, type, value, timeout); 1497 address, type, value, timeout);
1498
1482 // If the passed address is a kernel virtual address, return invalid memory state. 1499 // If the passed address is a kernel virtual address, return invalid memory state.
1483 if (Memory::IsKernelVirtualAddress(address)) { 1500 if (Memory::IsKernelVirtualAddress(address)) {
1484 LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); 1501 LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
1485 return ERR_INVALID_ADDRESS_STATE; 1502 return ERR_INVALID_ADDRESS_STATE;
1486 } 1503 }
1504
1487 // If the address is not properly aligned to 4 bytes, return invalid address. 1505 // If the address is not properly aligned to 4 bytes, return invalid address.
1488 if (!Common::IsWordAligned(address)) { 1506 if (!Common::IsWordAligned(address)) {
1489 LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address); 1507 LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
1490 return ERR_INVALID_ADDRESS; 1508 return ERR_INVALID_ADDRESS;
1491 } 1509 }
1492 1510
1493 switch (static_cast<AddressArbiter::ArbitrationType>(type)) { 1511 const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
1494 case AddressArbiter::ArbitrationType::WaitIfLessThan: 1512 auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
1495 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); 1513 return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
1496 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
1497 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
1498 case AddressArbiter::ArbitrationType::WaitIfEqual:
1499 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
1500 default:
1501 LOG_ERROR(Kernel_SVC,
1502 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
1503 "or WaitIfEqual but got {}",
1504 type);
1505 return ERR_INVALID_ENUM_VALUE;
1506 }
1507} 1514}
1508 1515
1509// Signals to an address (via Address Arbiter) 1516// Signals to an address (via Address Arbiter)
1510static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) { 1517static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
1518 s32 num_to_wake) {
1511 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", 1519 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
1512 address, type, value, num_to_wake); 1520 address, type, value, num_to_wake);
1521
1513 // If the passed address is a kernel virtual address, return invalid memory state. 1522 // If the passed address is a kernel virtual address, return invalid memory state.
1514 if (Memory::IsKernelVirtualAddress(address)) { 1523 if (Memory::IsKernelVirtualAddress(address)) {
1515 LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); 1524 LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
1516 return ERR_INVALID_ADDRESS_STATE; 1525 return ERR_INVALID_ADDRESS_STATE;
1517 } 1526 }
1527
1518 // If the address is not properly aligned to 4 bytes, return invalid address. 1528 // If the address is not properly aligned to 4 bytes, return invalid address.
1519 if (!Common::IsWordAligned(address)) { 1529 if (!Common::IsWordAligned(address)) {
1520 LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address); 1530 LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
1521 return ERR_INVALID_ADDRESS; 1531 return ERR_INVALID_ADDRESS;
1522 } 1532 }
1523 1533
1524 switch (static_cast<AddressArbiter::SignalType>(type)) { 1534 const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
1525 case AddressArbiter::SignalType::Signal: 1535 auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
1526 return AddressArbiter::SignalToAddress(address, num_to_wake); 1536 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
1527 case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
1528 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
1529 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
1530 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
1531 num_to_wake);
1532 default:
1533 LOG_ERROR(Kernel_SVC,
1534 "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
1535 "or ModifyByWaitingCountAndSignalIfEqual but got {}",
1536 type);
1537 return ERR_INVALID_ENUM_VALUE;
1538 }
1539} 1537}
1540 1538
1541/// This returns the total CPU ticks elapsed since the CPU was powered-on 1539/// This returns the total CPU ticks elapsed since the CPU was powered-on
1542static u64 GetSystemTick() { 1540static u64 GetSystemTick(Core::System& system) {
1543 LOG_TRACE(Kernel_SVC, "called"); 1541 LOG_TRACE(Kernel_SVC, "called");
1544 1542
1545 const u64 result{CoreTiming::GetTicks()}; 1543 auto& core_timing = system.CoreTiming();
1544 const u64 result{core_timing.GetTicks()};
1546 1545
1547 // Advance time to defeat dumb games that busy-wait for the frame to end. 1546 // Advance time to defeat dumb games that busy-wait for the frame to end.
1548 CoreTiming::AddTicks(400); 1547 core_timing.AddTicks(400);
1549 1548
1550 return result; 1549 return result;
1551} 1550}
1552 1551
1553/// Close a handle 1552/// Close a handle
1554static ResultCode CloseHandle(Handle handle) { 1553static ResultCode CloseHandle(Core::System& system, Handle handle) {
1555 LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle); 1554 LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle);
1556 1555
1557 auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1556 auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1558 return handle_table.Close(handle); 1557 return handle_table.Close(handle);
1559} 1558}
1560 1559
1561/// Clears the signaled state of an event or process. 1560/// Clears the signaled state of an event or process.
1562static ResultCode ResetSignal(Handle handle) { 1561static ResultCode ResetSignal(Core::System& system, Handle handle) {
1563 LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); 1562 LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle);
1564 1563
1565 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1564 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1566 1565
1567 auto event = handle_table.Get<ReadableEvent>(handle); 1566 auto event = handle_table.Get<ReadableEvent>(handle);
1568 if (event) { 1567 if (event) {
@@ -1579,7 +1578,8 @@ static ResultCode ResetSignal(Handle handle) {
1579} 1578}
1580 1579
1581/// Creates a TransferMemory object 1580/// Creates a TransferMemory object
1582static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 permissions) { 1581static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,
1582 u32 permissions) {
1583 LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size, 1583 LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
1584 permissions); 1584 permissions);
1585 1585
@@ -1607,19 +1607,129 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
1607 return ERR_INVALID_MEMORY_PERMISSIONS; 1607 return ERR_INVALID_MEMORY_PERMISSIONS;
1608 } 1608 }
1609 1609
1610 auto& kernel = Core::System::GetInstance().Kernel(); 1610 auto& kernel = system.Kernel();
1611 auto process = kernel.CurrentProcess(); 1611 auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
1612 auto& handle_table = process->GetHandleTable();
1613 const auto shared_mem_handle = SharedMemory::Create(kernel, process, size, perms, perms, addr);
1614 1612
1615 CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle)); 1613 auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
1614 const auto result = handle_table.Create(std::move(transfer_mem_handle));
1615 if (result.Failed()) {
1616 return result.Code();
1617 }
1618
1619 *handle = *result;
1616 return RESULT_SUCCESS; 1620 return RESULT_SUCCESS;
1617} 1621}
1618 1622
1619static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) { 1623static ResultCode MapTransferMemory(Core::System& system, Handle handle, VAddr address, u64 size,
1624 u32 permission_raw) {
1625 LOG_DEBUG(Kernel_SVC,
1626 "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
1627 handle, address, size, permission_raw);
1628
1629 if (!Common::Is4KBAligned(address)) {
1630 LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
1631 address);
1632 return ERR_INVALID_ADDRESS;
1633 }
1634
1635 if (size == 0 || !Common::Is4KBAligned(size)) {
1636 LOG_ERROR(Kernel_SVC,
1637 "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
1638 size);
1639 return ERR_INVALID_SIZE;
1640 }
1641
1642 if (!IsValidAddressRange(address, size)) {
1643 LOG_ERROR(Kernel_SVC,
1644 "Given address and size overflows the 64-bit range (address=0x{:016X}, "
1645 "size=0x{:016X}).",
1646 address, size);
1647 return ERR_INVALID_ADDRESS_STATE;
1648 }
1649
1650 const auto permissions = static_cast<MemoryPermission>(permission_raw);
1651 if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read &&
1652 permissions != MemoryPermission::ReadWrite) {
1653 LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).",
1654 permission_raw);
1655 return ERR_INVALID_STATE;
1656 }
1657
1658 const auto& kernel = system.Kernel();
1659 const auto* const current_process = kernel.CurrentProcess();
1660 const auto& handle_table = current_process->GetHandleTable();
1661
1662 auto transfer_memory = handle_table.Get<TransferMemory>(handle);
1663 if (!transfer_memory) {
1664 LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
1665 handle);
1666 return ERR_INVALID_HANDLE;
1667 }
1668
1669 if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
1670 LOG_ERROR(Kernel_SVC,
1671 "Given address and size don't fully fit within the ASLR region "
1672 "(address=0x{:016X}, size=0x{:016X}).",
1673 address, size);
1674 return ERR_INVALID_MEMORY_RANGE;
1675 }
1676
1677 return transfer_memory->MapMemory(address, size, permissions);
1678}
1679
1680static ResultCode UnmapTransferMemory(Core::System& system, Handle handle, VAddr address,
1681 u64 size) {
1682 LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
1683 address, size);
1684
1685 if (!Common::Is4KBAligned(address)) {
1686 LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
1687 address);
1688 return ERR_INVALID_ADDRESS;
1689 }
1690
1691 if (size == 0 || !Common::Is4KBAligned(size)) {
1692 LOG_ERROR(Kernel_SVC,
1693 "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
1694 size);
1695 return ERR_INVALID_SIZE;
1696 }
1697
1698 if (!IsValidAddressRange(address, size)) {
1699 LOG_ERROR(Kernel_SVC,
1700 "Given address and size overflows the 64-bit range (address=0x{:016X}, "
1701 "size=0x{:016X}).",
1702 address, size);
1703 return ERR_INVALID_ADDRESS_STATE;
1704 }
1705
1706 const auto& kernel = system.Kernel();
1707 const auto* const current_process = kernel.CurrentProcess();
1708 const auto& handle_table = current_process->GetHandleTable();
1709
1710 auto transfer_memory = handle_table.Get<TransferMemory>(handle);
1711 if (!transfer_memory) {
1712 LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
1713 handle);
1714 return ERR_INVALID_HANDLE;
1715 }
1716
1717 if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
1718 LOG_ERROR(Kernel_SVC,
1719 "Given address and size don't fully fit within the ASLR region "
1720 "(address=0x{:016X}, size=0x{:016X}).",
1721 address, size);
1722 return ERR_INVALID_MEMORY_RANGE;
1723 }
1724
1725 return transfer_memory->UnmapMemory(address, size);
1726}
1727
1728static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,
1729 u64* mask) {
1620 LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); 1730 LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
1621 1731
1622 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1732 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1623 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1733 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1624 if (!thread) { 1734 if (!thread) {
1625 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", 1735 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1633,11 +1743,12 @@ static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask)
1633 return RESULT_SUCCESS; 1743 return RESULT_SUCCESS;
1634} 1744}
1635 1745
1636static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) { 1746static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
1747 u64 mask) {
1637 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle, 1748 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
1638 mask, core); 1749 mask, core);
1639 1750
1640 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1751 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1641 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1752 const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1642 if (!thread) { 1753 if (!thread) {
1643 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", 1754 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1682,8 +1793,8 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
1682 return RESULT_SUCCESS; 1793 return RESULT_SUCCESS;
1683} 1794}
1684 1795
1685static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permissions, 1796static ResultCode CreateSharedMemory(Core::System& system, Handle* handle, u64 size,
1686 u32 remote_permissions) { 1797 u32 local_permissions, u32 remote_permissions) {
1687 LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size, 1798 LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size,
1688 local_permissions, remote_permissions); 1799 local_permissions, remote_permissions);
1689 if (size == 0) { 1800 if (size == 0) {
@@ -1719,7 +1830,7 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
1719 return ERR_INVALID_MEMORY_PERMISSIONS; 1830 return ERR_INVALID_MEMORY_PERMISSIONS;
1720 } 1831 }
1721 1832
1722 auto& kernel = Core::System::GetInstance().Kernel(); 1833 auto& kernel = system.Kernel();
1723 auto process = kernel.CurrentProcess(); 1834 auto process = kernel.CurrentProcess();
1724 auto& handle_table = process->GetHandleTable(); 1835 auto& handle_table = process->GetHandleTable();
1725 auto shared_mem_handle = SharedMemory::Create(kernel, process, size, local_perms, remote_perms); 1836 auto shared_mem_handle = SharedMemory::Create(kernel, process, size, local_perms, remote_perms);
@@ -1728,10 +1839,10 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
1728 return RESULT_SUCCESS; 1839 return RESULT_SUCCESS;
1729} 1840}
1730 1841
1731static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) { 1842static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) {
1732 LOG_DEBUG(Kernel_SVC, "called"); 1843 LOG_DEBUG(Kernel_SVC, "called");
1733 1844
1734 auto& kernel = Core::System::GetInstance().Kernel(); 1845 auto& kernel = system.Kernel();
1735 const auto [readable_event, writable_event] = 1846 const auto [readable_event, writable_event] =
1736 WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent"); 1847 WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
1737 1848
@@ -1756,10 +1867,10 @@ static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) {
1756 return RESULT_SUCCESS; 1867 return RESULT_SUCCESS;
1757} 1868}
1758 1869
1759static ResultCode ClearEvent(Handle handle) { 1870static ResultCode ClearEvent(Core::System& system, Handle handle) {
1760 LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle); 1871 LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle);
1761 1872
1762 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1873 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1763 1874
1764 auto writable_event = handle_table.Get<WritableEvent>(handle); 1875 auto writable_event = handle_table.Get<WritableEvent>(handle);
1765 if (writable_event) { 1876 if (writable_event) {
@@ -1777,10 +1888,10 @@ static ResultCode ClearEvent(Handle handle) {
1777 return ERR_INVALID_HANDLE; 1888 return ERR_INVALID_HANDLE;
1778} 1889}
1779 1890
1780static ResultCode SignalEvent(Handle handle) { 1891static ResultCode SignalEvent(Core::System& system, Handle handle) {
1781 LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle); 1892 LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle);
1782 1893
1783 HandleTable& handle_table = Core::CurrentProcess()->GetHandleTable(); 1894 HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1784 auto writable_event = handle_table.Get<WritableEvent>(handle); 1895 auto writable_event = handle_table.Get<WritableEvent>(handle);
1785 1896
1786 if (!writable_event) { 1897 if (!writable_event) {
@@ -1792,7 +1903,7 @@ static ResultCode SignalEvent(Handle handle) {
1792 return RESULT_SUCCESS; 1903 return RESULT_SUCCESS;
1793} 1904}
1794 1905
1795static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) { 1906static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) {
1796 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type); 1907 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type);
1797 1908
1798 // This function currently only allows retrieving a process' status. 1909 // This function currently only allows retrieving a process' status.
@@ -1800,7 +1911,7 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
1800 Status, 1911 Status,
1801 }; 1912 };
1802 1913
1803 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 1914 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
1804 const auto process = handle_table.Get<Process>(process_handle); 1915 const auto process = handle_table.Get<Process>(process_handle);
1805 if (!process) { 1916 if (!process) {
1806 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", 1917 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
@@ -1818,10 +1929,10 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
1818 return RESULT_SUCCESS; 1929 return RESULT_SUCCESS;
1819} 1930}
1820 1931
1821static ResultCode CreateResourceLimit(Handle* out_handle) { 1932static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) {
1822 LOG_DEBUG(Kernel_SVC, "called"); 1933 LOG_DEBUG(Kernel_SVC, "called");
1823 1934
1824 auto& kernel = Core::System::GetInstance().Kernel(); 1935 auto& kernel = system.Kernel();
1825 auto resource_limit = ResourceLimit::Create(kernel); 1936 auto resource_limit = ResourceLimit::Create(kernel);
1826 1937
1827 auto* const current_process = kernel.CurrentProcess(); 1938 auto* const current_process = kernel.CurrentProcess();
@@ -1836,11 +1947,11 @@ static ResultCode CreateResourceLimit(Handle* out_handle) {
1836 return RESULT_SUCCESS; 1947 return RESULT_SUCCESS;
1837} 1948}
1838 1949
1839static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit, 1950static ResultCode GetResourceLimitLimitValue(Core::System& system, u64* out_value,
1840 u32 resource_type) { 1951 Handle resource_limit, u32 resource_type) {
1841 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); 1952 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
1842 1953
1843 const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type, 1954 const auto limit_value = RetrieveResourceLimitValue(system, resource_limit, resource_type,
1844 ResourceLimitValueType::LimitValue); 1955 ResourceLimitValueType::LimitValue);
1845 if (limit_value.Failed()) { 1956 if (limit_value.Failed()) {
1846 return limit_value.Code(); 1957 return limit_value.Code();
@@ -1850,11 +1961,11 @@ static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_lim
1850 return RESULT_SUCCESS; 1961 return RESULT_SUCCESS;
1851} 1962}
1852 1963
1853static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit, 1964static ResultCode GetResourceLimitCurrentValue(Core::System& system, u64* out_value,
1854 u32 resource_type) { 1965 Handle resource_limit, u32 resource_type) {
1855 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); 1966 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
1856 1967
1857 const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type, 1968 const auto current_value = RetrieveResourceLimitValue(system, resource_limit, resource_type,
1858 ResourceLimitValueType::CurrentValue); 1969 ResourceLimitValueType::CurrentValue);
1859 if (current_value.Failed()) { 1970 if (current_value.Failed()) {
1860 return current_value.Code(); 1971 return current_value.Code();
@@ -1864,7 +1975,8 @@ static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_l
1864 return RESULT_SUCCESS; 1975 return RESULT_SUCCESS;
1865} 1976}
1866 1977
1867static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) { 1978static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resource_limit,
1979 u32 resource_type, u64 value) {
1868 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit, 1980 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit,
1869 resource_type, value); 1981 resource_type, value);
1870 1982
@@ -1874,8 +1986,7 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
1874 return ERR_INVALID_ENUM_VALUE; 1986 return ERR_INVALID_ENUM_VALUE;
1875 } 1987 }
1876 1988
1877 auto& kernel = Core::System::GetInstance().Kernel(); 1989 auto* const current_process = system.Kernel().CurrentProcess();
1878 auto* const current_process = kernel.CurrentProcess();
1879 ASSERT(current_process != nullptr); 1990 ASSERT(current_process != nullptr);
1880 1991
1881 auto resource_limit_object = 1992 auto resource_limit_object =
@@ -1899,9 +2010,86 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
1899 return RESULT_SUCCESS; 2010 return RESULT_SUCCESS;
1900} 2011}
1901 2012
2013static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
2014 VAddr out_process_ids, u32 out_process_ids_size) {
2015 LOG_DEBUG(Kernel_SVC, "called. out_process_ids=0x{:016X}, out_process_ids_size={}",
2016 out_process_ids, out_process_ids_size);
2017
2018 // If the supplied size is negative or greater than INT32_MAX / sizeof(u64), bail.
2019 if ((out_process_ids_size & 0xF0000000) != 0) {
2020 LOG_ERROR(Kernel_SVC,
2021 "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
2022 out_process_ids_size);
2023 return ERR_OUT_OF_RANGE;
2024 }
2025
2026 const auto& kernel = system.Kernel();
2027 const auto& vm_manager = kernel.CurrentProcess()->VMManager();
2028 const auto total_copy_size = out_process_ids_size * sizeof(u64);
2029
2030 if (out_process_ids_size > 0 &&
2031 !vm_manager.IsWithinAddressSpace(out_process_ids, total_copy_size)) {
2032 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2033 out_process_ids, out_process_ids + total_copy_size);
2034 return ERR_INVALID_ADDRESS_STATE;
2035 }
2036
2037 const auto& process_list = kernel.GetProcessList();
2038 const auto num_processes = process_list.size();
2039 const auto copy_amount = std::min(std::size_t{out_process_ids_size}, num_processes);
2040
2041 for (std::size_t i = 0; i < copy_amount; ++i) {
2042 Memory::Write64(out_process_ids, process_list[i]->GetProcessID());
2043 out_process_ids += sizeof(u64);
2044 }
2045
2046 *out_num_processes = static_cast<u32>(num_processes);
2047 return RESULT_SUCCESS;
2048}
2049
2050ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
2051 u32 out_thread_ids_size, Handle debug_handle) {
2052 // TODO: Handle this case when debug events are supported.
2053 UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
2054
2055 LOG_DEBUG(Kernel_SVC, "called. out_thread_ids=0x{:016X}, out_thread_ids_size={}",
2056 out_thread_ids, out_thread_ids_size);
2057
2058 // If the size is negative or larger than INT32_MAX / sizeof(u64)
2059 if ((out_thread_ids_size & 0xF0000000) != 0) {
2060 LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
2061 out_thread_ids_size);
2062 return ERR_OUT_OF_RANGE;
2063 }
2064
2065 const auto* const current_process = system.Kernel().CurrentProcess();
2066 const auto& vm_manager = current_process->VMManager();
2067 const auto total_copy_size = out_thread_ids_size * sizeof(u64);
2068
2069 if (out_thread_ids_size > 0 &&
2070 !vm_manager.IsWithinAddressSpace(out_thread_ids, total_copy_size)) {
2071 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2072 out_thread_ids, out_thread_ids + total_copy_size);
2073 return ERR_INVALID_ADDRESS_STATE;
2074 }
2075
2076 const auto& thread_list = current_process->GetThreadList();
2077 const auto num_threads = thread_list.size();
2078 const auto copy_amount = std::min(std::size_t{out_thread_ids_size}, num_threads);
2079
2080 auto list_iter = thread_list.cbegin();
2081 for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) {
2082 Memory::Write64(out_thread_ids, (*list_iter)->GetThreadID());
2083 out_thread_ids += sizeof(u64);
2084 }
2085
2086 *out_num_threads = static_cast<u32>(num_threads);
2087 return RESULT_SUCCESS;
2088}
2089
1902namespace { 2090namespace {
1903struct FunctionDef { 2091struct FunctionDef {
1904 using Func = void(); 2092 using Func = void(Core::System&);
1905 2093
1906 u32 id; 2094 u32 id;
1907 Func* func; 2095 Func* func;
@@ -1991,8 +2179,8 @@ static const FunctionDef SVC_Table[] = {
1991 {0x4E, nullptr, "ReadWriteRegister"}, 2179 {0x4E, nullptr, "ReadWriteRegister"},
1992 {0x4F, nullptr, "SetProcessActivity"}, 2180 {0x4F, nullptr, "SetProcessActivity"},
1993 {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, 2181 {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"},
1994 {0x51, nullptr, "MapTransferMemory"}, 2182 {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"},
1995 {0x52, nullptr, "UnmapTransferMemory"}, 2183 {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"},
1996 {0x53, nullptr, "CreateInterruptEvent"}, 2184 {0x53, nullptr, "CreateInterruptEvent"},
1997 {0x54, nullptr, "QueryPhysicalAddress"}, 2185 {0x54, nullptr, "QueryPhysicalAddress"},
1998 {0x55, nullptr, "QueryIoMapping"}, 2186 {0x55, nullptr, "QueryIoMapping"},
@@ -2011,8 +2199,8 @@ static const FunctionDef SVC_Table[] = {
2011 {0x62, nullptr, "TerminateDebugProcess"}, 2199 {0x62, nullptr, "TerminateDebugProcess"},
2012 {0x63, nullptr, "GetDebugEvent"}, 2200 {0x63, nullptr, "GetDebugEvent"},
2013 {0x64, nullptr, "ContinueDebugEvent"}, 2201 {0x64, nullptr, "ContinueDebugEvent"},
2014 {0x65, nullptr, "GetProcessList"}, 2202 {0x65, SvcWrap<GetProcessList>, "GetProcessList"},
2015 {0x66, nullptr, "GetThreadList"}, 2203 {0x66, SvcWrap<GetThreadList>, "GetThreadList"},
2016 {0x67, nullptr, "GetDebugThreadContext"}, 2204 {0x67, nullptr, "GetDebugThreadContext"},
2017 {0x68, nullptr, "SetDebugThreadContext"}, 2205 {0x68, nullptr, "SetDebugThreadContext"},
2018 {0x69, nullptr, "QueryDebugProcessMemory"}, 2206 {0x69, nullptr, "QueryDebugProcessMemory"},
@@ -2050,16 +2238,16 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
2050 2238
2051MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 2239MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
2052 2240
2053void CallSVC(u32 immediate) { 2241void CallSVC(Core::System& system, u32 immediate) {
2054 MICROPROFILE_SCOPE(Kernel_SVC); 2242 MICROPROFILE_SCOPE(Kernel_SVC);
2055 2243
2056 // Lock the global kernel mutex when we enter the kernel HLE. 2244 // Lock the global kernel mutex when we enter the kernel HLE.
2057 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 2245 std::lock_guard lock{HLE::g_hle_lock};
2058 2246
2059 const FunctionDef* info = GetSVCInfo(immediate); 2247 const FunctionDef* info = GetSVCInfo(immediate);
2060 if (info) { 2248 if (info) {
2061 if (info->func) { 2249 if (info->func) {
2062 info->func(); 2250 info->func(system);
2063 } else { 2251 } else {
2064 LOG_CRITICAL(Kernel_SVC, "Unimplemented SVC function {}(..)", info->name); 2252 LOG_CRITICAL(Kernel_SVC, "Unimplemented SVC function {}(..)", info->name);
2065 } 2253 }
diff --git a/src/core/hle/kernel/svc.h b/src/core/hle/kernel/svc.h
index c37ae0f98..c5539ac1c 100644
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -6,8 +6,12 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9namespace Core {
10class System;
11}
12
9namespace Kernel { 13namespace Kernel {
10 14
11void CallSVC(u32 immediate); 15void CallSVC(Core::System& system, u32 immediate);
12 16
13} // namespace Kernel 17} // namespace Kernel
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 2a2c2c5ea..b3690b5f3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -11,270 +11,312 @@
11 11
12namespace Kernel { 12namespace Kernel {
13 13
14static inline u64 Param(int n) { 14static inline u64 Param(const Core::System& system, int n) {
15 return Core::CurrentArmInterface().GetReg(n); 15 return system.CurrentArmInterface().GetReg(n);
16} 16}
17 17
18/** 18/**
19 * HLE a function return from the current ARM userland process 19 * HLE a function return from the current ARM userland process
20 * @param res Result to return 20 * @param system System context
21 * @param result Result to return
21 */ 22 */
22static inline void FuncReturn(u64 res) { 23static inline void FuncReturn(Core::System& system, u64 result) {
23 Core::CurrentArmInterface().SetReg(0, res); 24 system.CurrentArmInterface().SetReg(0, result);
24} 25}
25 26
26//////////////////////////////////////////////////////////////////////////////////////////////////// 27////////////////////////////////////////////////////////////////////////////////////////////////////
27// Function wrappers that return type ResultCode 28// Function wrappers that return type ResultCode
28 29
29template <ResultCode func(u64)> 30template <ResultCode func(Core::System&, u64)>
30void SvcWrap() { 31void SvcWrap(Core::System& system) {
31 FuncReturn(func(Param(0)).raw); 32 FuncReturn(system, func(system, Param(system, 0)).raw);
32} 33}
33 34
34template <ResultCode func(u32)> 35template <ResultCode func(Core::System&, u32)>
35void SvcWrap() { 36void SvcWrap(Core::System& system) {
36 FuncReturn(func(static_cast<u32>(Param(0))).raw); 37 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
37} 38}
38 39
39template <ResultCode func(u32, u32)> 40template <ResultCode func(Core::System&, u32, u32)>
40void SvcWrap() { 41void SvcWrap(Core::System& system) {
41 FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw); 42 FuncReturn(
43 system,
44 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw);
42} 45}
43 46
44template <ResultCode func(u32*)> 47template <ResultCode func(Core::System&, u32*)>
45void SvcWrap() { 48void SvcWrap(Core::System& system) {
46 u32 param = 0; 49 u32 param = 0;
47 const u32 retval = func(&param).raw; 50 const u32 retval = func(system, &param).raw;
48 Core::CurrentArmInterface().SetReg(1, param); 51 system.CurrentArmInterface().SetReg(1, param);
49 FuncReturn(retval); 52 FuncReturn(system, retval);
50} 53}
51 54
52template <ResultCode func(u32*, u32)> 55template <ResultCode func(Core::System&, u32*, u32)>
53void SvcWrap() { 56void SvcWrap(Core::System& system) {
54 u32 param_1 = 0; 57 u32 param_1 = 0;
55 u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw; 58 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
56 Core::CurrentArmInterface().SetReg(1, param_1); 59 system.CurrentArmInterface().SetReg(1, param_1);
57 FuncReturn(retval); 60 FuncReturn(system, retval);
58} 61}
59 62
60template <ResultCode func(u32*, u32*)> 63template <ResultCode func(Core::System&, u32*, u32*)>
61void SvcWrap() { 64void SvcWrap(Core::System& system) {
62 u32 param_1 = 0; 65 u32 param_1 = 0;
63 u32 param_2 = 0; 66 u32 param_2 = 0;
64 const u32 retval = func(&param_1, &param_2).raw; 67 const u32 retval = func(system, &param_1, &param_2).raw;
65 68
66 auto& arm_interface = Core::CurrentArmInterface(); 69 auto& arm_interface = system.CurrentArmInterface();
67 arm_interface.SetReg(1, param_1); 70 arm_interface.SetReg(1, param_1);
68 arm_interface.SetReg(2, param_2); 71 arm_interface.SetReg(2, param_2);
69 72
70 FuncReturn(retval); 73 FuncReturn(system, retval);
71} 74}
72 75
73template <ResultCode func(u32*, u64)> 76template <ResultCode func(Core::System&, u32*, u64)>
74void SvcWrap() { 77void SvcWrap(Core::System& system) {
75 u32 param_1 = 0; 78 u32 param_1 = 0;
76 const u32 retval = func(&param_1, Param(1)).raw; 79 const u32 retval = func(system, &param_1, Param(system, 1)).raw;
77 Core::CurrentArmInterface().SetReg(1, param_1); 80 system.CurrentArmInterface().SetReg(1, param_1);
78 FuncReturn(retval); 81 FuncReturn(system, retval);
82}
83
84template <ResultCode func(Core::System&, u32*, u64, u32)>
85void SvcWrap(Core::System& system) {
86 u32 param_1 = 0;
87 const u32 retval =
88 func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw;
89
90 system.CurrentArmInterface().SetReg(1, param_1);
91 FuncReturn(system, retval);
79} 92}
80 93
81template <ResultCode func(u64*, u32)> 94template <ResultCode func(Core::System&, u64*, u32)>
82void SvcWrap() { 95void SvcWrap(Core::System& system) {
83 u64 param_1 = 0; 96 u64 param_1 = 0;
84 const u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw; 97 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
85 Core::CurrentArmInterface().SetReg(1, param_1); 98
86 FuncReturn(retval); 99 system.CurrentArmInterface().SetReg(1, param_1);
100 FuncReturn(system, retval);
87} 101}
88 102
89template <ResultCode func(u64, s32)> 103template <ResultCode func(Core::System&, u64, s32)>
90void SvcWrap() { 104void SvcWrap(Core::System& system) {
91 FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw); 105 FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
92} 106}
93 107
94template <ResultCode func(u64, u32)> 108template <ResultCode func(Core::System&, u64, u32)>
95void SvcWrap() { 109void SvcWrap(Core::System& system) {
96 FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw); 110 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
97} 111}
98 112
99template <ResultCode func(u64*, u64)> 113template <ResultCode func(Core::System&, u64*, u64)>
100void SvcWrap() { 114void SvcWrap(Core::System& system) {
101 u64 param_1 = 0; 115 u64 param_1 = 0;
102 u32 retval = func(&param_1, Param(1)).raw; 116 const u32 retval = func(system, &param_1, Param(system, 1)).raw;
103 Core::CurrentArmInterface().SetReg(1, param_1); 117
104 FuncReturn(retval); 118 system.CurrentArmInterface().SetReg(1, param_1);
119 FuncReturn(system, retval);
105} 120}
106 121
107template <ResultCode func(u64*, u32, u32)> 122template <ResultCode func(Core::System&, u64*, u32, u32)>
108void SvcWrap() { 123void SvcWrap(Core::System& system) {
109 u64 param_1 = 0; 124 u64 param_1 = 0;
110 u32 retval = func(&param_1, static_cast<u32>(Param(1)), static_cast<u32>(Param(2))).raw; 125 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)),
111 Core::CurrentArmInterface().SetReg(1, param_1); 126 static_cast<u32>(Param(system, 2)))
112 FuncReturn(retval); 127 .raw;
128
129 system.CurrentArmInterface().SetReg(1, param_1);
130 FuncReturn(system, retval);
113} 131}
114 132
115template <ResultCode func(u32, u64)> 133template <ResultCode func(Core::System&, u32, u64)>
116void SvcWrap() { 134void SvcWrap(Core::System& system) {
117 FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw); 135 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw);
118} 136}
119 137
120template <ResultCode func(u32, u32, u64)> 138template <ResultCode func(Core::System&, u32, u32, u64)>
121void SvcWrap() { 139void SvcWrap(Core::System& system) {
122 FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw); 140 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)),
141 static_cast<u32>(Param(system, 1)), Param(system, 2))
142 .raw);
123} 143}
124 144
125template <ResultCode func(u32, u32*, u64*)> 145template <ResultCode func(Core::System&, u32, u32*, u64*)>
126void SvcWrap() { 146void SvcWrap(Core::System& system) {
127 u32 param_1 = 0; 147 u32 param_1 = 0;
128 u64 param_2 = 0; 148 u64 param_2 = 0;
129 ResultCode retval = func(static_cast<u32>(Param(2)), &param_1, &param_2); 149 const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2);
130 Core::CurrentArmInterface().SetReg(1, param_1);
131 Core::CurrentArmInterface().SetReg(2, param_2);
132 FuncReturn(retval.raw);
133}
134 150
135template <ResultCode func(u64, u64, u32, u32)> 151 system.CurrentArmInterface().SetReg(1, param_1);
136void SvcWrap() { 152 system.CurrentArmInterface().SetReg(2, param_2);
137 FuncReturn( 153 FuncReturn(system, retval.raw);
138 func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw);
139} 154}
140 155
141template <ResultCode func(u64, u64, u32, u64)> 156template <ResultCode func(Core::System&, u64, u64, u32, u32)>
142void SvcWrap() { 157void SvcWrap(Core::System& system) {
143 FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2)), Param(3)).raw); 158 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
159 static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
160 .raw);
144} 161}
145 162
146template <ResultCode func(u32, u64, u32)> 163template <ResultCode func(Core::System&, u64, u64, u32, u64)>
147void SvcWrap() { 164void SvcWrap(Core::System& system) {
148 FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw); 165 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
166 static_cast<u32>(Param(system, 2)), Param(system, 3))
167 .raw);
149} 168}
150 169
151template <ResultCode func(u64, u64, u64)> 170template <ResultCode func(Core::System&, u32, u64, u32)>
152void SvcWrap() { 171void SvcWrap(Core::System& system) {
153 FuncReturn(func(Param(0), Param(1), Param(2)).raw); 172 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
173 static_cast<u32>(Param(system, 2)))
174 .raw);
154} 175}
155 176
156template <ResultCode func(u64, u64, u32)> 177template <ResultCode func(Core::System&, u64, u64, u64)>
157void SvcWrap() { 178void SvcWrap(Core::System& system) {
158 FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2))).raw); 179 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw);
159} 180}
160 181
161template <ResultCode func(u32, u64, u64, u32)> 182template <ResultCode func(Core::System&, u64, u64, u32)>
162void SvcWrap() { 183void SvcWrap(Core::System& system) {
163 FuncReturn( 184 FuncReturn(
164 func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw); 185 system,
186 func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
165} 187}
166 188
167template <ResultCode func(u32, u64, u64)> 189template <ResultCode func(Core::System&, u32, u64, u64, u32)>
168void SvcWrap() { 190void SvcWrap(Core::System& system) {
169 FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw); 191 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
192 Param(system, 2), static_cast<u32>(Param(system, 3)))
193 .raw);
170} 194}
171 195
172template <ResultCode func(u32*, u64, u64, s64)> 196template <ResultCode func(Core::System&, u32, u64, u64)>
173void SvcWrap() { 197void SvcWrap(Core::System& system) {
198 FuncReturn(
199 system,
200 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
201}
202
203template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
204void SvcWrap(Core::System& system) {
174 u32 param_1 = 0; 205 u32 param_1 = 0;
175 ResultCode retval = 206 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
176 func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))); 207 static_cast<s64>(Param(system, 3)))
177 Core::CurrentArmInterface().SetReg(1, param_1); 208 .raw;
178 FuncReturn(retval.raw); 209
210 system.CurrentArmInterface().SetReg(1, param_1);
211 FuncReturn(system, retval);
179} 212}
180 213
181template <ResultCode func(u64, u64, u32, s64)> 214template <ResultCode func(Core::System&, u64, u64, u32, s64)>
182void SvcWrap() { 215void SvcWrap(Core::System& system) {
183 FuncReturn( 216 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
184 func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw); 217 static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
218 .raw);
185} 219}
186 220
187template <ResultCode func(u64*, u64, u64, u64)> 221template <ResultCode func(Core::System&, u64*, u64, u64, u64)>
188void SvcWrap() { 222void SvcWrap(Core::System& system) {
189 u64 param_1 = 0; 223 u64 param_1 = 0;
190 u32 retval = func(&param_1, Param(1), Param(2), Param(3)).raw; 224 const u32 retval =
191 Core::CurrentArmInterface().SetReg(1, param_1); 225 func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw;
192 FuncReturn(retval); 226
227 system.CurrentArmInterface().SetReg(1, param_1);
228 FuncReturn(system, retval);
193} 229}
194 230
195template <ResultCode func(u32*, u64, u64, u64, u32, s32)> 231template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)>
196void SvcWrap() { 232void SvcWrap(Core::System& system) {
197 u32 param_1 = 0; 233 u32 param_1 = 0;
198 u32 retval = func(&param_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)), 234 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3),
199 static_cast<s32>(Param(5))) 235 static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5)))
200 .raw; 236 .raw;
201 Core::CurrentArmInterface().SetReg(1, param_1); 237
202 FuncReturn(retval); 238 system.CurrentArmInterface().SetReg(1, param_1);
239 FuncReturn(system, retval);
203} 240}
204 241
205template <ResultCode func(u32*, u64, u64, u32)> 242template <ResultCode func(Core::System&, u32*, u64, u64, u32)>
206void SvcWrap() { 243void SvcWrap(Core::System& system) {
207 u32 param_1 = 0; 244 u32 param_1 = 0;
208 u32 retval = func(&param_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw; 245 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2),
209 Core::CurrentArmInterface().SetReg(1, param_1); 246 static_cast<u32>(Param(system, 3)))
210 FuncReturn(retval); 247 .raw;
248
249 system.CurrentArmInterface().SetReg(1, param_1);
250 FuncReturn(system, retval);
211} 251}
212 252
213template <ResultCode func(Handle*, u64, u32, u32)> 253template <ResultCode func(Core::System&, Handle*, u64, u32, u32)>
214void SvcWrap() { 254void SvcWrap(Core::System& system) {
215 u32 param_1 = 0; 255 u32 param_1 = 0;
216 u32 retval = 256 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
217 func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw; 257 static_cast<u32>(Param(system, 3)))
218 Core::CurrentArmInterface().SetReg(1, param_1); 258 .raw;
219 FuncReturn(retval); 259
260 system.CurrentArmInterface().SetReg(1, param_1);
261 FuncReturn(system, retval);
220} 262}
221 263
222template <ResultCode func(u64, u32, s32, s64)> 264template <ResultCode func(Core::System&, u64, u32, s32, s64)>
223void SvcWrap() { 265void SvcWrap(Core::System& system) {
224 FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)), 266 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
225 static_cast<s64>(Param(3))) 267 static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
226 .raw); 268 .raw);
227} 269}
228 270
229template <ResultCode func(u64, u32, s32, s32)> 271template <ResultCode func(Core::System&, u64, u32, s32, s32)>
230void SvcWrap() { 272void SvcWrap(Core::System& system) {
231 FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)), 273 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
232 static_cast<s32>(Param(3))) 274 static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
233 .raw); 275 .raw);
234} 276}
235 277
236//////////////////////////////////////////////////////////////////////////////////////////////////// 278////////////////////////////////////////////////////////////////////////////////////////////////////
237// Function wrappers that return type u32 279// Function wrappers that return type u32
238 280
239template <u32 func()> 281template <u32 func(Core::System&)>
240void SvcWrap() { 282void SvcWrap(Core::System& system) {
241 FuncReturn(func()); 283 FuncReturn(system, func(system));
242} 284}
243 285
244//////////////////////////////////////////////////////////////////////////////////////////////////// 286////////////////////////////////////////////////////////////////////////////////////////////////////
245// Function wrappers that return type u64 287// Function wrappers that return type u64
246 288
247template <u64 func()> 289template <u64 func(Core::System&)>
248void SvcWrap() { 290void SvcWrap(Core::System& system) {
249 FuncReturn(func()); 291 FuncReturn(system, func(system));
250} 292}
251 293
252//////////////////////////////////////////////////////////////////////////////////////////////////// 294////////////////////////////////////////////////////////////////////////////////////////////////////
253/// Function wrappers that return type void 295/// Function wrappers that return type void
254 296
255template <void func()> 297template <void func(Core::System&)>
256void SvcWrap() { 298void SvcWrap(Core::System& system) {
257 func(); 299 func(system);
258} 300}
259 301
260template <void func(s64)> 302template <void func(Core::System&, s64)>
261void SvcWrap() { 303void SvcWrap(Core::System& system) {
262 func(static_cast<s64>(Param(0))); 304 func(system, static_cast<s64>(Param(system, 0)));
263} 305}
264 306
265template <void func(u64, u64 len)> 307template <void func(Core::System&, u64, u64)>
266void SvcWrap() { 308void SvcWrap(Core::System& system) {
267 func(Param(0), Param(1)); 309 func(system, Param(system, 0), Param(system, 1));
268} 310}
269 311
270template <void func(u64, u64, u64)> 312template <void func(Core::System&, u64, u64, u64)>
271void SvcWrap() { 313void SvcWrap(Core::System& system) {
272 func(Param(0), Param(1), Param(2)); 314 func(system, Param(system, 0), Param(system, 1), Param(system, 2));
273} 315}
274 316
275template <void func(u32, u64, u64)> 317template <void func(Core::System&, u32, u64, u64)>
276void SvcWrap() { 318void SvcWrap(Core::System& system) {
277 func(static_cast<u32>(Param(0)), Param(1), Param(2)); 319 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
278} 320}
279 321
280} // namespace Kernel 322} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d3984dfc4..1b891f632 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9 9
10#include <boost/range/algorithm_ext/erase.hpp>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -30,7 +28,7 @@
30 28
31namespace Kernel { 29namespace Kernel {
32 30
33bool Thread::ShouldWait(Thread* thread) const { 31bool Thread::ShouldWait(const Thread* thread) const {
34 return status != ThreadStatus::Dead; 32 return status != ThreadStatus::Dead;
35} 33}
36 34
@@ -43,7 +41,8 @@ Thread::~Thread() = default;
43 41
44void Thread::Stop() { 42void Thread::Stop() {
45 // Cancel any outstanding wakeup events for this thread 43 // Cancel any outstanding wakeup events for this thread
46 CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); 44 Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
45 callback_handle);
47 kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); 46 kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
48 callback_handle = 0; 47 callback_handle = 0;
49 48
@@ -63,21 +62,12 @@ void Thread::Stop() {
63 } 62 }
64 wait_objects.clear(); 63 wait_objects.clear();
65 64
65 owner_process->UnregisterThread(this);
66
66 // Mark the TLS slot in the thread's page as free. 67 // Mark the TLS slot in the thread's page as free.
67 owner_process->FreeTLSSlot(tls_address); 68 owner_process->FreeTLSSlot(tls_address);
68} 69}
69 70
70void WaitCurrentThread_Sleep() {
71 Thread* thread = GetCurrentThread();
72 thread->SetStatus(ThreadStatus::WaitSleep);
73}
74
75void ExitCurrentThread() {
76 Thread* thread = GetCurrentThread();
77 thread->Stop();
78 Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
79}
80
81void Thread::WakeAfterDelay(s64 nanoseconds) { 71void Thread::WakeAfterDelay(s64 nanoseconds) {
82 // Don't schedule a wakeup if the thread wants to wait forever 72 // Don't schedule a wakeup if the thread wants to wait forever
83 if (nanoseconds == -1) 73 if (nanoseconds == -1)
@@ -85,12 +75,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
85 75
86 // This function might be called from any thread so we have to be cautious and use the 76 // This function might be called from any thread so we have to be cautious and use the
87 // thread-safe version of ScheduleEvent. 77 // thread-safe version of ScheduleEvent.
88 CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), 78 Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
89 kernel.ThreadWakeupCallbackEventType(), callback_handle); 79 Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
80 callback_handle);
90} 81}
91 82
92void Thread::CancelWakeupTimer() { 83void Thread::CancelWakeupTimer() {
93 CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle); 84 Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
85 kernel.ThreadWakeupCallbackEventType(), callback_handle);
94} 86}
95 87
96static std::optional<s32> GetNextProcessorId(u64 mask) { 88static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -115,6 +107,7 @@ void Thread::ResumeFromWait() {
115 case ThreadStatus::WaitSleep: 107 case ThreadStatus::WaitSleep:
116 case ThreadStatus::WaitIPC: 108 case ThreadStatus::WaitIPC:
117 case ThreadStatus::WaitMutex: 109 case ThreadStatus::WaitMutex:
110 case ThreadStatus::WaitCondVar:
118 case ThreadStatus::WaitArb: 111 case ThreadStatus::WaitArb:
119 break; 112 break;
120 113
@@ -181,14 +174,13 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
181 return ERR_INVALID_PROCESSOR_ID; 174 return ERR_INVALID_PROCESSOR_ID;
182 } 175 }
183 176
184 // TODO(yuriks): Other checks, returning 0xD9001BEA
185
186 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { 177 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
187 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 178 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
188 // TODO (bunnei): Find the correct error code to use here 179 // TODO (bunnei): Find the correct error code to use here
189 return ResultCode(-1); 180 return ResultCode(-1);
190 } 181 }
191 182
183 auto& system = Core::System::GetInstance();
192 SharedPtr<Thread> thread(new Thread(kernel)); 184 SharedPtr<Thread> thread(new Thread(kernel));
193 185
194 thread->thread_id = kernel.CreateNewThreadID(); 186 thread->thread_id = kernel.CreateNewThreadID();
@@ -197,7 +189,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
197 thread->stack_top = stack_top; 189 thread->stack_top = stack_top;
198 thread->tpidr_el0 = 0; 190 thread->tpidr_el0 = 0;
199 thread->nominal_priority = thread->current_priority = priority; 191 thread->nominal_priority = thread->current_priority = priority;
200 thread->last_running_ticks = CoreTiming::GetTicks(); 192 thread->last_running_ticks = system.CoreTiming().GetTicks();
201 thread->processor_id = processor_id; 193 thread->processor_id = processor_id;
202 thread->ideal_core = processor_id; 194 thread->ideal_core = processor_id;
203 thread->affinity_mask = 1ULL << processor_id; 195 thread->affinity_mask = 1ULL << processor_id;
@@ -208,10 +200,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
208 thread->name = std::move(name); 200 thread->name = std::move(name);
209 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); 201 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
210 thread->owner_process = &owner_process; 202 thread->owner_process = &owner_process;
211 thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id); 203 thread->scheduler = &system.Scheduler(processor_id);
212 thread->scheduler->AddThread(thread, priority); 204 thread->scheduler->AddThread(thread);
213 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread); 205 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
214 206
207 thread->owner_process->RegisterThread(thread.get());
208
215 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 209 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
216 // to initialize the context 210 // to initialize the context
217 ResetThreadContext(thread->context, stack_top, entry_point, arg); 211 ResetThreadContext(thread->context, stack_top, entry_point, arg);
@@ -239,16 +233,16 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
239 context.cpu_registers[1] = output; 233 context.cpu_registers[1] = output;
240} 234}
241 235
242s32 Thread::GetWaitObjectIndex(WaitObject* object) const { 236s32 Thread::GetWaitObjectIndex(const WaitObject* object) const {
243 ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); 237 ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
244 auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); 238 const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
245 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); 239 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
246} 240}
247 241
248VAddr Thread::GetCommandBufferAddress() const { 242VAddr Thread::GetCommandBufferAddress() const {
249 // Offset from the start of TLS at which the IPC command buffer begins. 243 // Offset from the start of TLS at which the IPC command buffer begins.
250 static constexpr int CommandHeaderOffset = 0x80; 244 constexpr u64 command_header_offset = 0x80;
251 return GetTLSAddress() + CommandHeaderOffset; 245 return GetTLSAddress() + command_header_offset;
252} 246}
253 247
254void Thread::SetStatus(ThreadStatus new_status) { 248void Thread::SetStatus(ThreadStatus new_status) {
@@ -257,7 +251,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
257 } 251 }
258 252
259 if (status == ThreadStatus::Running) { 253 if (status == ThreadStatus::Running) {
260 last_running_ticks = CoreTiming::GetTicks(); 254 last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
261 } 255 }
262 256
263 status = new_status; 257 status = new_status;
@@ -267,8 +261,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
267 if (thread->lock_owner == this) { 261 if (thread->lock_owner == this) {
268 // If the thread is already waiting for this thread to release the mutex, ensure that the 262 // If the thread is already waiting for this thread to release the mutex, ensure that the
269 // waiters list is consistent and return without doing anything. 263 // waiters list is consistent and return without doing anything.
270 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 264 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
271 ASSERT(itr != wait_mutex_threads.end()); 265 ASSERT(iter != wait_mutex_threads.end());
272 return; 266 return;
273 } 267 }
274 268
@@ -276,11 +270,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
276 ASSERT(thread->lock_owner == nullptr); 270 ASSERT(thread->lock_owner == nullptr);
277 271
278 // Ensure that the thread is not already in the list of mutex waiters 272 // Ensure that the thread is not already in the list of mutex waiters
279 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 273 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
280 ASSERT(itr == wait_mutex_threads.end()); 274 ASSERT(iter == wait_mutex_threads.end());
281 275
276 // Keep the list in an ordered fashion
277 const auto insertion_point = std::find_if(
278 wait_mutex_threads.begin(), wait_mutex_threads.end(),
279 [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
280 wait_mutex_threads.insert(insertion_point, thread);
282 thread->lock_owner = this; 281 thread->lock_owner = this;
283 wait_mutex_threads.emplace_back(std::move(thread)); 282
284 UpdatePriority(); 283 UpdatePriority();
285} 284}
286 285
@@ -288,32 +287,44 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
288 ASSERT(thread->lock_owner == this); 287 ASSERT(thread->lock_owner == this);
289 288
290 // Ensure that the thread is in the list of mutex waiters 289 // Ensure that the thread is in the list of mutex waiters
291 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 290 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
292 ASSERT(itr != wait_mutex_threads.end()); 291 ASSERT(iter != wait_mutex_threads.end());
292
293 wait_mutex_threads.erase(iter);
293 294
294 boost::remove_erase(wait_mutex_threads, thread);
295 thread->lock_owner = nullptr; 295 thread->lock_owner = nullptr;
296 UpdatePriority(); 296 UpdatePriority();
297} 297}
298 298
299void Thread::UpdatePriority() { 299void Thread::UpdatePriority() {
300 // Find the highest priority among all the threads that are waiting for this thread's lock 300 // If any of the threads waiting on the mutex have a higher priority
301 // (taking into account priority inheritance), then this thread inherits
302 // that thread's priority.
301 u32 new_priority = nominal_priority; 303 u32 new_priority = nominal_priority;
302 for (const auto& thread : wait_mutex_threads) { 304 if (!wait_mutex_threads.empty()) {
303 if (thread->nominal_priority < new_priority) 305 if (wait_mutex_threads.front()->current_priority < new_priority) {
304 new_priority = thread->nominal_priority; 306 new_priority = wait_mutex_threads.front()->current_priority;
307 }
305 } 308 }
306 309
307 if (new_priority == current_priority) 310 if (new_priority == current_priority) {
308 return; 311 return;
312 }
309 313
310 scheduler->SetThreadPriority(this, new_priority); 314 scheduler->SetThreadPriority(this, new_priority);
311
312 current_priority = new_priority; 315 current_priority = new_priority;
313 316
317 if (!lock_owner) {
318 return;
319 }
320
321 // Ensure that the thread is within the correct location in the waiting list.
322 auto old_owner = lock_owner;
323 lock_owner->RemoveMutexWaiter(this);
324 old_owner->AddMutexWaiter(this);
325
314 // Recursively update the priority of the thread that depends on the priority of this one. 326 // Recursively update the priority of the thread that depends on the priority of this one.
315 if (lock_owner) 327 lock_owner->UpdatePriority();
316 lock_owner->UpdatePriority();
317} 328}
318 329
319void Thread::ChangeCore(u32 core, u64 mask) { 330void Thread::ChangeCore(u32 core, u64 mask) {
@@ -345,7 +356,7 @@ void Thread::ChangeScheduler() {
345 if (*new_processor_id != processor_id) { 356 if (*new_processor_id != processor_id) {
346 // Remove thread from previous core's scheduler 357 // Remove thread from previous core's scheduler
347 scheduler->RemoveThread(this); 358 scheduler->RemoveThread(this);
348 next_scheduler.AddThread(this, current_priority); 359 next_scheduler.AddThread(this);
349 } 360 }
350 361
351 processor_id = *new_processor_id; 362 processor_id = *new_processor_id;
@@ -360,7 +371,7 @@ void Thread::ChangeScheduler() {
360 system.CpuCore(processor_id).PrepareReschedule(); 371 system.CpuCore(processor_id).PrepareReschedule();
361} 372}
362 373
363bool Thread::AllWaitObjectsReady() { 374bool Thread::AllWaitObjectsReady() const {
364 return std::none_of( 375 return std::none_of(
365 wait_objects.begin(), wait_objects.end(), 376 wait_objects.begin(), wait_objects.end(),
366 [this](const SharedPtr<WaitObject>& object) { return object->ShouldWait(this); }); 377 [this](const SharedPtr<WaitObject>& object) { return object->ShouldWait(this); });
@@ -389,6 +400,14 @@ void Thread::SetActivity(ThreadActivity value) {
389 } 400 }
390} 401}
391 402
403void Thread::Sleep(s64 nanoseconds) {
404 // Sleep current thread and check for next thread to schedule
405 SetStatus(ThreadStatus::WaitSleep);
406
407 // Create an event to wake the thread up after the specified nanosecond delay has passed
408 WakeAfterDelay(nanoseconds);
409}
410
392//////////////////////////////////////////////////////////////////////////////////////////////////// 411////////////////////////////////////////////////////////////////////////////////////////////////////
393 412
394/** 413/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..83c83e45a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -51,7 +51,8 @@ enum class ThreadStatus {
51 WaitIPC, ///< Waiting for the reply from an IPC request 51 WaitIPC, ///< Waiting for the reply from an IPC request
52 WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false 52 WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
53 WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true 53 WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true
54 WaitMutex, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc 54 WaitMutex, ///< Waiting due to an ArbitrateLock svc
55 WaitCondVar, ///< Waiting due to an WaitProcessWideKey svc
55 WaitArb, ///< Waiting due to a SignalToAddress/WaitForAddress svc 56 WaitArb, ///< Waiting due to a SignalToAddress/WaitForAddress svc
56 Dormant, ///< Created but not yet made ready 57 Dormant, ///< Created but not yet made ready
57 Dead ///< Run to completion, or forcefully terminated 58 Dead ///< Run to completion, or forcefully terminated
@@ -105,12 +106,12 @@ public:
105 return "Thread"; 106 return "Thread";
106 } 107 }
107 108
108 static const HandleType HANDLE_TYPE = HandleType::Thread; 109 static constexpr HandleType HANDLE_TYPE = HandleType::Thread;
109 HandleType GetHandleType() const override { 110 HandleType GetHandleType() const override {
110 return HANDLE_TYPE; 111 return HANDLE_TYPE;
111 } 112 }
112 113
113 bool ShouldWait(Thread* thread) const override; 114 bool ShouldWait(const Thread* thread) const override;
114 void Acquire(Thread* thread) override; 115 void Acquire(Thread* thread) override;
115 116
116 /** 117 /**
@@ -204,7 +205,7 @@ public:
204 * object in the list. 205 * object in the list.
205 * @param object Object to query the index of. 206 * @param object Object to query the index of.
206 */ 207 */
207 s32 GetWaitObjectIndex(WaitObject* object) const; 208 s32 GetWaitObjectIndex(const WaitObject* object) const;
208 209
209 /** 210 /**
210 * Stops a thread, invalidating it from further use 211 * Stops a thread, invalidating it from further use
@@ -298,7 +299,7 @@ public:
298 } 299 }
299 300
300 /// Determines whether all the objects this thread is waiting on are ready. 301 /// Determines whether all the objects this thread is waiting on are ready.
301 bool AllWaitObjectsReady(); 302 bool AllWaitObjectsReady() const;
302 303
303 const MutexWaitingThreads& GetMutexWaitingThreads() const { 304 const MutexWaitingThreads& GetMutexWaitingThreads() const {
304 return wait_mutex_threads; 305 return wait_mutex_threads;
@@ -383,6 +384,9 @@ public:
383 384
384 void SetActivity(ThreadActivity value); 385 void SetActivity(ThreadActivity value);
385 386
387 /// Sleeps this thread for the given amount of nanoseconds.
388 void Sleep(s64 nanoseconds);
389
386private: 390private:
387 explicit Thread(KernelCore& kernel); 391 explicit Thread(KernelCore& kernel);
388 ~Thread() override; 392 ~Thread() override;
@@ -398,8 +402,14 @@ private:
398 VAddr entry_point = 0; 402 VAddr entry_point = 0;
399 VAddr stack_top = 0; 403 VAddr stack_top = 0;
400 404
401 u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application 405 /// Nominal thread priority, as set by the emulated application.
402 u32 current_priority = 0; ///< Current thread priority, can be temporarily changed 406 /// The nominal priority is the thread priority without priority
407 /// inheritance taken into account.
408 u32 nominal_priority = 0;
409
410 /// Current thread priority. This may change over the course of the
411 /// thread's lifetime in order to facilitate priority inheritance.
412 u32 current_priority = 0;
403 413
404 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. 414 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
405 u64 last_running_ticks = 0; ///< CPU tick when thread was last running 415 u64 last_running_ticks = 0; ///< CPU tick when thread was last running
@@ -460,14 +470,4 @@ private:
460 */ 470 */
461Thread* GetCurrentThread(); 471Thread* GetCurrentThread();
462 472
463/**
464 * Waits the current thread on a sleep
465 */
466void WaitCurrentThread_Sleep();
467
468/**
469 * Stops the current thread and removes it from the thread_list
470 */
471void ExitCurrentThread();
472
473} // namespace Kernel 473} // namespace Kernel
diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp
deleted file mode 100644
index 2c4f50e2b..000000000
--- a/src/core/hle/kernel/timer.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "core/core.h"
8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
10#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/object.h"
13#include "core/hle/kernel/thread.h"
14#include "core/hle/kernel/timer.h"
15
16namespace Kernel {
17
18Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {}
19Timer::~Timer() = default;
20
21SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) {
22 SharedPtr<Timer> timer(new Timer(kernel));
23
24 timer->reset_type = reset_type;
25 timer->signaled = false;
26 timer->name = std::move(name);
27 timer->initial_delay = 0;
28 timer->interval_delay = 0;
29 timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap();
30
31 return timer;
32}
33
34bool Timer::ShouldWait(Thread* thread) const {
35 return !signaled;
36}
37
38void Timer::Acquire(Thread* thread) {
39 ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
40
41 if (reset_type == ResetType::OneShot)
42 signaled = false;
43}
44
45void Timer::Set(s64 initial, s64 interval) {
46 // Ensure we get rid of any previous scheduled event
47 Cancel();
48
49 initial_delay = initial;
50 interval_delay = interval;
51
52 if (initial == 0) {
53 // Immediately invoke the callback
54 Signal(0);
55 } else {
56 CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(),
57 callback_handle);
58 }
59}
60
61void Timer::Cancel() {
62 CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle);
63}
64
65void Timer::Clear() {
66 signaled = false;
67}
68
69void Timer::WakeupAllWaitingThreads() {
70 WaitObject::WakeupAllWaitingThreads();
71}
72
73void Timer::Signal(int cycles_late) {
74 LOG_TRACE(Kernel, "Timer {} fired", GetObjectId());
75
76 signaled = true;
77
78 // Resume all waiting threads
79 WakeupAllWaitingThreads();
80
81 if (interval_delay != 0) {
82 // Reschedule the timer with the interval delay
83 CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late,
84 kernel.TimerCallbackEventType(), callback_handle);
85 }
86}
87
88} // namespace Kernel
diff --git a/src/core/hle/kernel/timer.h b/src/core/hle/kernel/timer.h
deleted file mode 100644
index 12915c1b1..000000000
--- a/src/core/hle/kernel/timer.h
+++ /dev/null
@@ -1,90 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "core/hle/kernel/object.h"
9#include "core/hle/kernel/wait_object.h"
10
11namespace Kernel {
12
13class KernelCore;
14
15class Timer final : public WaitObject {
16public:
17 /**
18 * Creates a timer
19 * @param kernel The kernel instance to create the timer callback handle for.
20 * @param reset_type ResetType describing how to create the timer
21 * @param name Optional name of timer
22 * @return The created Timer
23 */
24 static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type,
25 std::string name = "Unknown");
26
27 std::string GetTypeName() const override {
28 return "Timer";
29 }
30 std::string GetName() const override {
31 return name;
32 }
33
34 static const HandleType HANDLE_TYPE = HandleType::Timer;
35 HandleType GetHandleType() const override {
36 return HANDLE_TYPE;
37 }
38
39 ResetType GetResetType() const {
40 return reset_type;
41 }
42
43 u64 GetInitialDelay() const {
44 return initial_delay;
45 }
46
47 u64 GetIntervalDelay() const {
48 return interval_delay;
49 }
50
51 bool ShouldWait(Thread* thread) const override;
52 void Acquire(Thread* thread) override;
53
54 void WakeupAllWaitingThreads() override;
55
56 /**
57 * Starts the timer, with the specified initial delay and interval.
58 * @param initial Delay until the timer is first fired
59 * @param interval Delay until the timer is fired after the first time
60 */
61 void Set(s64 initial, s64 interval);
62
63 void Cancel();
64 void Clear();
65
66 /**
67 * Signals the timer, waking up any waiting threads and rescheduling it
68 * for the next interval.
69 * This method should not be called from outside the timer callback handler,
70 * lest multiple callback events get scheduled.
71 */
72 void Signal(int cycles_late);
73
74private:
75 explicit Timer(KernelCore& kernel);
76 ~Timer() override;
77
78 ResetType reset_type; ///< The ResetType of this timer
79
80 u64 initial_delay; ///< The delay until the timer fires for the first time
81 u64 interval_delay; ///< The delay until the timer fires after the first time
82
83 bool signaled; ///< Whether the timer has been signaled or not
84 std::string name; ///< Name of timer (optional)
85
86 /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
87 Handle callback_handle;
88};
89
90} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
new file mode 100644
index 000000000..26c4e5e67
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -0,0 +1,81 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/kernel/errors.h"
6#include "core/hle/kernel/kernel.h"
7#include "core/hle/kernel/process.h"
8#include "core/hle/kernel/shared_memory.h"
9#include "core/hle/kernel/transfer_memory.h"
10#include "core/hle/result.h"
11
12namespace Kernel {
13
14TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
15TransferMemory::~TransferMemory() = default;
16
17SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address, u64 size,
18 MemoryPermission permissions) {
19 SharedPtr<TransferMemory> transfer_memory{new TransferMemory(kernel)};
20
21 transfer_memory->base_address = base_address;
22 transfer_memory->memory_size = size;
23 transfer_memory->owner_permissions = permissions;
24 transfer_memory->owner_process = kernel.CurrentProcess();
25
26 return transfer_memory;
27}
28
29const u8* TransferMemory::GetPointer() const {
30 return backing_block.get()->data();
31}
32
33u64 TransferMemory::GetSize() const {
34 return memory_size;
35}
36
37ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission permissions) {
38 if (memory_size != size) {
39 return ERR_INVALID_SIZE;
40 }
41
42 if (owner_permissions != permissions) {
43 return ERR_INVALID_STATE;
44 }
45
46 if (is_mapped) {
47 return ERR_INVALID_STATE;
48 }
49
50 backing_block = std::make_shared<std::vector<u8>>(size);
51
52 const auto map_state = owner_permissions == MemoryPermission::None
53 ? MemoryState::TransferMemoryIsolated
54 : MemoryState::TransferMemory;
55 auto& vm_manager = owner_process->VMManager();
56 const auto map_result = vm_manager.MapMemoryBlock(address, backing_block, 0, size, map_state);
57 if (map_result.Failed()) {
58 return map_result.Code();
59 }
60
61 is_mapped = true;
62 return RESULT_SUCCESS;
63}
64
65ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) {
66 if (memory_size != size) {
67 return ERR_INVALID_SIZE;
68 }
69
70 auto& vm_manager = owner_process->VMManager();
71 const auto result = vm_manager.UnmapRange(address, size);
72
73 if (result.IsError()) {
74 return result;
75 }
76
77 is_mapped = false;
78 return RESULT_SUCCESS;
79}
80
81} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
new file mode 100644
index 000000000..a140b1e2b
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -0,0 +1,103 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9
10#include "core/hle/kernel/object.h"
11
12union ResultCode;
13
14namespace Kernel {
15
16class KernelCore;
17class Process;
18
19enum class MemoryPermission : u32;
20
21/// Defines the interface for transfer memory objects.
22///
23/// Transfer memory is typically used for the purpose of
24/// transferring memory between separate process instances,
25/// thus the name.
26///
27class TransferMemory final : public Object {
28public:
29 static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
30
31 static SharedPtr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size,
32 MemoryPermission permissions);
33
34 TransferMemory(const TransferMemory&) = delete;
35 TransferMemory& operator=(const TransferMemory&) = delete;
36
37 TransferMemory(TransferMemory&&) = delete;
38 TransferMemory& operator=(TransferMemory&&) = delete;
39
40 std::string GetTypeName() const override {
41 return "TransferMemory";
42 }
43
44 std::string GetName() const override {
45 return GetTypeName();
46 }
47
48 HandleType GetHandleType() const override {
49 return HANDLE_TYPE;
50 }
51
52 /// Gets a pointer to the backing block of this instance.
53 const u8* GetPointer() const;
54
55 /// Gets the size of the memory backing this instance in bytes.
56 u64 GetSize() const;
57
58 /// Attempts to map transfer memory with the given range and memory permissions.
59 ///
60 /// @param address The base address to being mapping memory at.
61 /// @param size The size of the memory to map, in bytes.
62 /// @param permissions The memory permissions to check against when mapping memory.
63 ///
64 /// @pre The given address, size, and memory permissions must all match
65 /// the same values that were given when creating the transfer memory
66 /// instance.
67 ///
68 ResultCode MapMemory(VAddr address, u64 size, MemoryPermission permissions);
69
70 /// Unmaps the transfer memory with the given range
71 ///
72 /// @param address The base address to begin unmapping memory at.
73 /// @param size The size of the memory to unmap, in bytes.
74 ///
75 /// @pre The given address and size must be the same as the ones used
76 /// to create the transfer memory instance.
77 ///
78 ResultCode UnmapMemory(VAddr address, u64 size);
79
80private:
81 explicit TransferMemory(KernelCore& kernel);
82 ~TransferMemory() override;
83
84 /// Memory block backing this instance.
85 std::shared_ptr<std::vector<u8>> backing_block;
86
87 /// The base address for the memory managed by this instance.
88 VAddr base_address = 0;
89
90 /// Size of the memory, in bytes, that this instance manages.
91 u64 memory_size = 0;
92
93 /// The memory permissions that are applied to this instance.
94 MemoryPermission owner_permissions{};
95
96 /// The process that this transfer memory instance was created under.
97 Process* owner_process = nullptr;
98
99 /// Whether or not this transfer memory instance has mapped memory.
100 bool is_mapped = false;
101};
102
103} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..ec0a480ce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,34 +7,42 @@
7#include <utility> 7#include <utility>
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/memory_hook.h"
10#include "core/arm/arm_interface.h" 11#include "core/arm/arm_interface.h"
11#include "core/core.h" 12#include "core/core.h"
12#include "core/file_sys/program_metadata.h" 13#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/vm_manager.h" 15#include "core/hle/kernel/vm_manager.h"
15#include "core/memory.h" 16#include "core/memory.h"
16#include "core/memory_hook.h"
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
20 20namespace {
21static const char* GetMemoryStateName(MemoryState state) { 21const char* GetMemoryStateName(MemoryState state) {
22 static constexpr const char* names[] = { 22 static constexpr const char* names[] = {
23 "Unmapped", "Io", 23 "Unmapped", "Io",
24 "Normal", "CodeStatic", 24 "Normal", "Code",
25 "CodeMutable", "Heap", 25 "CodeData", "Heap",
26 "Shared", "Unknown1", 26 "Shared", "Unknown1",
27 "ModuleCodeStatic", "ModuleCodeMutable", 27 "ModuleCode", "ModuleCodeData",
28 "IpcBuffer0", "Stack", 28 "IpcBuffer0", "Stack",
29 "ThreadLocal", "TransferMemoryIsolated", 29 "ThreadLocal", "TransferMemoryIsolated",
30 "TransferMemory", "ProcessMemory", 30 "TransferMemory", "ProcessMemory",
31 "Inaccessible", "IpcBuffer1", 31 "Inaccessible", "IpcBuffer1",
32 "IpcBuffer3", "KernelStack", 32 "IpcBuffer3", "KernelStack",
33 }; 33 };
34 34
35 return names[ToSvcMemoryState(state)]; 35 return names[ToSvcMemoryState(state)];
36} 36}
37 37
38// Checks if a given address range lies within a larger address range.
39constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
40 VAddr address_range_end) {
41 const VAddr end_address = address + size - 1;
42 return address_range_begin <= address && end_address <= address_range_end - 1;
43}
44} // Anonymous namespace
45
38bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 46bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
39 ASSERT(base + size == next.base); 47 ASSERT(base + size == next.base);
40 if (permissions != next.permissions || state != next.state || attribute != next.attribute || 48 if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -169,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
169 177
170ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size, 178ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
171 MemoryState state, 179 MemoryState state,
172 Memory::MemoryHookPointer mmio_handler) { 180 Common::MemoryHookPointer mmio_handler) {
173 // This is the appropriately sized VMA that will turn into our allocation. 181 // This is the appropriately sized VMA that will turn into our allocation.
174 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); 182 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
175 VirtualMemoryArea& final_vma = vma_handle->second; 183 VirtualMemoryArea& final_vma = vma_handle->second;
@@ -248,59 +256,50 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
248 return RESULT_SUCCESS; 256 return RESULT_SUCCESS;
249} 257}
250 258
251ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { 259ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
252 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 260 if (size > GetHeapRegionSize()) {
253 target + size < target) { 261 return ERR_OUT_OF_MEMORY;
254 return ERR_INVALID_ADDRESS; 262 }
263
264 // No need to do any additional work if the heap is already the given size.
265 if (size == GetCurrentHeapSize()) {
266 return MakeResult(heap_region_base);
255 } 267 }
256 268
257 if (heap_memory == nullptr) { 269 if (heap_memory == nullptr) {
258 // Initialize heap 270 // Initialize heap
259 heap_memory = std::make_shared<std::vector<u8>>(); 271 heap_memory = std::make_shared<std::vector<u8>>(size);
260 heap_start = heap_end = target; 272 heap_end = heap_region_base + size;
261 } else { 273 } else {
262 UnmapRange(heap_start, heap_end - heap_start); 274 UnmapRange(heap_region_base, GetCurrentHeapSize());
263 }
264
265 // If necessary, expand backing vector to cover new heap extents.
266 if (target < heap_start) {
267 heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
268 heap_start = target;
269 RefreshMemoryBlockMappings(heap_memory.get());
270 }
271 if (target + size > heap_end) {
272 heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
273 heap_end = target + size;
274 RefreshMemoryBlockMappings(heap_memory.get());
275 } 275 }
276 ASSERT(heap_end - heap_start == heap_memory->size());
277 276
278 CASCADE_RESULT(auto vma, MapMemoryBlock(target, heap_memory, target - heap_start, size, 277 // If necessary, expand backing vector to cover new heap extents in
279 MemoryState::Heap)); 278 // the case of allocating. Otherwise, shrink the backing memory,
280 Reprotect(vma, perms); 279 // if a smaller heap has been requested.
280 const u64 old_heap_size = GetCurrentHeapSize();
281 if (size > old_heap_size) {
282 const u64 alloc_size = size - old_heap_size;
281 283
282 heap_used = size; 284 heap_memory->insert(heap_memory->end(), alloc_size, 0);
283 285 RefreshMemoryBlockMappings(heap_memory.get());
284 return MakeResult<VAddr>(heap_end - size); 286 } else if (size < old_heap_size) {
285} 287 heap_memory->resize(size);
288 heap_memory->shrink_to_fit();
286 289
287ResultCode VMManager::HeapFree(VAddr target, u64 size) { 290 RefreshMemoryBlockMappings(heap_memory.get());
288 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
289 target + size < target) {
290 return ERR_INVALID_ADDRESS;
291 } 291 }
292 292
293 if (size == 0) { 293 heap_end = heap_region_base + size;
294 return RESULT_SUCCESS; 294 ASSERT(GetCurrentHeapSize() == heap_memory->size());
295 }
296 295
297 const ResultCode result = UnmapRange(target, size); 296 const auto mapping_result =
298 if (result.IsError()) { 297 MapMemoryBlock(heap_region_base, heap_memory, 0, size, MemoryState::Heap);
299 return result; 298 if (mapping_result.Failed()) {
299 return mapping_result.Code();
300 } 300 }
301 301
302 heap_used -= size; 302 return MakeResult<VAddr>(heap_region_base);
303 return RESULT_SUCCESS;
304} 303}
305 304
306MemoryInfo VMManager::QueryMemory(VAddr address) const { 305MemoryInfo VMManager::QueryMemory(VAddr address) const {
@@ -592,6 +591,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
592 591
593 heap_region_base = map_region_end; 592 heap_region_base = map_region_end;
594 heap_region_end = heap_region_base + heap_region_size; 593 heap_region_end = heap_region_base + heap_region_size;
594 heap_end = heap_region_base;
595 595
596 new_map_region_base = heap_region_end; 596 new_map_region_base = heap_region_end;
597 new_map_region_end = new_map_region_base + new_map_region_size; 597 new_map_region_end = new_map_region_base + new_map_region_size;
@@ -618,7 +618,7 @@ void VMManager::ClearPageTable() {
618 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 618 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
619 page_table.special_regions.clear(); 619 page_table.special_regions.clear();
620 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 620 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
621 Memory::PageType::Unmapped); 621 Common::PageType::Unmapped);
622} 622}
623 623
624VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask, 624VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -686,10 +686,6 @@ u64 VMManager::GetTotalMemoryUsage() const {
686 return 0xF8000000; 686 return 0xF8000000;
687} 687}
688 688
689u64 VMManager::GetTotalHeapUsage() const {
690 return heap_used;
691}
692
693VAddr VMManager::GetAddressSpaceBaseAddress() const { 689VAddr VMManager::GetAddressSpaceBaseAddress() const {
694 return address_space_base; 690 return address_space_base;
695} 691}
@@ -706,6 +702,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
706 return address_space_width; 702 return address_space_width;
707} 703}
708 704
705bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
706 return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
707 GetAddressSpaceEndAddress());
708}
709
709VAddr VMManager::GetASLRRegionBaseAddress() const { 710VAddr VMManager::GetASLRRegionBaseAddress() const {
710 return aslr_region_base; 711 return aslr_region_base;
711} 712}
@@ -750,6 +751,11 @@ u64 VMManager::GetCodeRegionSize() const {
750 return code_region_end - code_region_base; 751 return code_region_end - code_region_base;
751} 752}
752 753
754bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
755 return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
756 GetCodeRegionEndAddress());
757}
758
753VAddr VMManager::GetHeapRegionBaseAddress() const { 759VAddr VMManager::GetHeapRegionBaseAddress() const {
754 return heap_region_base; 760 return heap_region_base;
755} 761}
@@ -762,6 +768,15 @@ u64 VMManager::GetHeapRegionSize() const {
762 return heap_region_end - heap_region_base; 768 return heap_region_end - heap_region_base;
763} 769}
764 770
771u64 VMManager::GetCurrentHeapSize() const {
772 return heap_end - heap_region_base;
773}
774
775bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
776 return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
777 GetHeapRegionEndAddress());
778}
779
765VAddr VMManager::GetMapRegionBaseAddress() const { 780VAddr VMManager::GetMapRegionBaseAddress() const {
766 return map_region_base; 781 return map_region_base;
767} 782}
@@ -774,6 +789,10 @@ u64 VMManager::GetMapRegionSize() const {
774 return map_region_end - map_region_base; 789 return map_region_end - map_region_base;
775} 790}
776 791
792bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
793 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
794}
795
777VAddr VMManager::GetNewMapRegionBaseAddress() const { 796VAddr VMManager::GetNewMapRegionBaseAddress() const {
778 return new_map_region_base; 797 return new_map_region_base;
779} 798}
@@ -786,6 +805,11 @@ u64 VMManager::GetNewMapRegionSize() const {
786 return new_map_region_end - new_map_region_base; 805 return new_map_region_end - new_map_region_base;
787} 806}
788 807
808bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
809 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
810 GetNewMapRegionEndAddress());
811}
812
789VAddr VMManager::GetTLSIORegionBaseAddress() const { 813VAddr VMManager::GetTLSIORegionBaseAddress() const {
790 return tls_io_region_base; 814 return tls_io_region_base;
791} 815}
@@ -798,4 +822,9 @@ u64 VMManager::GetTLSIORegionSize() const {
798 return tls_io_region_end - tls_io_region_base; 822 return tls_io_region_end - tls_io_region_base;
799} 823}
800 824
825bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
826 return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
827 GetTLSIORegionEndAddress());
828}
829
801} // namespace Kernel 830} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..6f484b7bf 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
9#include <tuple> 9#include <tuple>
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
13#include "common/page_table.h"
12#include "core/hle/result.h" 14#include "core/hle/result.h"
13#include "core/memory.h" 15#include "core/memory.h"
14#include "core/memory_hook.h"
15 16
16namespace FileSys { 17namespace FileSys {
17enum class ProgramAddressSpaceType : u8; 18enum class ProgramAddressSpaceType : u8;
@@ -164,12 +165,12 @@ enum class MemoryState : u32 {
164 Unmapped = 0x00, 165 Unmapped = 0x00,
165 Io = 0x01 | FlagMapped, 166 Io = 0x01 | FlagMapped,
166 Normal = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed, 167 Normal = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed,
167 CodeStatic = 0x03 | CodeFlags | FlagMapProcess, 168 Code = 0x03 | CodeFlags | FlagMapProcess,
168 CodeMutable = 0x04 | CodeFlags | FlagMapProcess | FlagCodeMemory, 169 CodeData = 0x04 | DataFlags | FlagMapProcess | FlagCodeMemory,
169 Heap = 0x05 | DataFlags | FlagCodeMemory, 170 Heap = 0x05 | DataFlags | FlagCodeMemory,
170 Shared = 0x06 | FlagMapped | FlagMemoryPoolAllocated, 171 Shared = 0x06 | FlagMapped | FlagMemoryPoolAllocated,
171 ModuleCodeStatic = 0x08 | CodeFlags | FlagModule | FlagMapProcess, 172 ModuleCode = 0x08 | CodeFlags | FlagModule | FlagMapProcess,
172 ModuleCodeMutable = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory, 173 ModuleCodeData = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory,
173 174
174 IpcBuffer0 = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated | 175 IpcBuffer0 = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated |
175 IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned, 176 IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned,
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 // Settings for type = MMIO 291 // Settings for type = MMIO
291 /// Physical address of the register area this VMA maps to. 292 /// Physical address of the register area this VMA maps to.
292 PAddr paddr = 0; 293 PAddr paddr = 0;
293 Memory::MemoryHookPointer mmio_handler = nullptr; 294 Common::MemoryHookPointer mmio_handler = nullptr;
294 295
295 /// Tests if this area can be merged to the right with `next`. 296 /// Tests if this area can be merged to the right with `next`.
296 bool CanBeMergedWith(const VirtualMemoryArea& next) const; 297 bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
368 * @param mmio_handler The handler that will implement read and write for this MMIO region. 369 * @param mmio_handler The handler that will implement read and write for this MMIO region.
369 */ 370 */
370 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state, 371 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
371 Memory::MemoryHookPointer mmio_handler); 372 Common::MemoryHookPointer mmio_handler);
372 373
373 /// Unmaps a range of addresses, splitting VMAs as necessary. 374 /// Unmaps a range of addresses, splitting VMAs as necessary.
374 ResultCode UnmapRange(VAddr target, u64 size); 375 ResultCode UnmapRange(VAddr target, u64 size);
@@ -379,11 +380,41 @@ public:
379 /// Changes the permissions of a range of addresses, splitting VMAs as necessary. 380 /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
380 ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms); 381 ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms);
381 382
382 ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
383 ResultCode HeapFree(VAddr target, u64 size);
384
385 ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state); 383 ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state);
386 384
385 /// Attempts to allocate a heap with the given size.
386 ///
387 /// @param size The size of the heap to allocate in bytes.
388 ///
389 /// @note If a heap is currently allocated, and this is called
390 /// with a size that is equal to the size of the current heap,
391 /// then this function will do nothing and return the current
392 /// heap's starting address, as there's no need to perform
393 /// any additional heap allocation work.
394 ///
395 /// @note If a heap is currently allocated, and this is called
396 /// with a size less than the current heap's size, then
397 /// this function will attempt to shrink the heap.
398 ///
399 /// @note If a heap is currently allocated, and this is called
400 /// with a size larger than the current heap's size, then
401 /// this function will attempt to extend the size of the heap.
402 ///
403 /// @returns A result indicating either success or failure.
404 /// <p>
405 /// If successful, this function will return a result
406 /// containing the starting address to the allocated heap.
407 /// <p>
408 /// If unsuccessful, this function will return a result
409 /// containing an error code.
410 ///
411 /// @pre The given size must lie within the allowable heap
412 /// memory region managed by this VMManager instance.
413 /// Failure to abide by this will result in ERR_OUT_OF_MEMORY
414 /// being returned as the result.
415 ///
416 ResultVal<VAddr> SetHeapSize(u64 size);
417
387 /// Queries the memory manager for information about the given address. 418 /// Queries the memory manager for information about the given address.
388 /// 419 ///
389 /// @param address The address to query the memory manager about for information. 420 /// @param address The address to query the memory manager about for information.
@@ -417,9 +448,6 @@ public:
417 /// Gets the total memory usage, used by svcGetInfo 448 /// Gets the total memory usage, used by svcGetInfo
418 u64 GetTotalMemoryUsage() const; 449 u64 GetTotalMemoryUsage() const;
419 450
420 /// Gets the total heap usage, used by svcGetInfo
421 u64 GetTotalHeapUsage() const;
422
423 /// Gets the address space base address 451 /// Gets the address space base address
424 VAddr GetAddressSpaceBaseAddress() const; 452 VAddr GetAddressSpaceBaseAddress() const;
425 453
@@ -432,18 +460,21 @@ public:
432 /// Gets the address space width in bits. 460 /// Gets the address space width in bits.
433 u64 GetAddressSpaceWidth() const; 461 u64 GetAddressSpaceWidth() const;
434 462
463 /// Determines whether or not the given address range lies within the address space.
464 bool IsWithinAddressSpace(VAddr address, u64 size) const;
465
435 /// Gets the base address of the ASLR region. 466 /// Gets the base address of the ASLR region.
436 VAddr GetASLRRegionBaseAddress() const; 467 VAddr GetASLRRegionBaseAddress() const;
437 468
438 /// Gets the end address of the ASLR region. 469 /// Gets the end address of the ASLR region.
439 VAddr GetASLRRegionEndAddress() const; 470 VAddr GetASLRRegionEndAddress() const;
440 471
441 /// Determines whether or not the specified address range is within the ASLR region.
442 bool IsWithinASLRRegion(VAddr address, u64 size) const;
443
444 /// Gets the size of the ASLR region 472 /// Gets the size of the ASLR region
445 u64 GetASLRRegionSize() const; 473 u64 GetASLRRegionSize() const;
446 474
475 /// Determines whether or not the specified address range is within the ASLR region.
476 bool IsWithinASLRRegion(VAddr address, u64 size) const;
477
447 /// Gets the base address of the code region. 478 /// Gets the base address of the code region.
448 VAddr GetCodeRegionBaseAddress() const; 479 VAddr GetCodeRegionBaseAddress() const;
449 480
@@ -453,6 +484,9 @@ public:
453 /// Gets the total size of the code region in bytes. 484 /// Gets the total size of the code region in bytes.
454 u64 GetCodeRegionSize() const; 485 u64 GetCodeRegionSize() const;
455 486
487 /// Determines whether or not the specified range is within the code region.
488 bool IsWithinCodeRegion(VAddr address, u64 size) const;
489
456 /// Gets the base address of the heap region. 490 /// Gets the base address of the heap region.
457 VAddr GetHeapRegionBaseAddress() const; 491 VAddr GetHeapRegionBaseAddress() const;
458 492
@@ -462,6 +496,16 @@ public:
462 /// Gets the total size of the heap region in bytes. 496 /// Gets the total size of the heap region in bytes.
463 u64 GetHeapRegionSize() const; 497 u64 GetHeapRegionSize() const;
464 498
499 /// Gets the total size of the current heap in bytes.
500 ///
501 /// @note This is the current allocated heap size, not the size
502 /// of the region it's allowed to exist within.
503 ///
504 u64 GetCurrentHeapSize() const;
505
506 /// Determines whether or not the specified range is within the heap region.
507 bool IsWithinHeapRegion(VAddr address, u64 size) const;
508
465 /// Gets the base address of the map region. 509 /// Gets the base address of the map region.
466 VAddr GetMapRegionBaseAddress() const; 510 VAddr GetMapRegionBaseAddress() const;
467 511
@@ -471,6 +515,9 @@ public:
471 /// Gets the total size of the map region in bytes. 515 /// Gets the total size of the map region in bytes.
472 u64 GetMapRegionSize() const; 516 u64 GetMapRegionSize() const;
473 517
518 /// Determines whether or not the specified range is within the map region.
519 bool IsWithinMapRegion(VAddr address, u64 size) const;
520
474 /// Gets the base address of the new map region. 521 /// Gets the base address of the new map region.
475 VAddr GetNewMapRegionBaseAddress() const; 522 VAddr GetNewMapRegionBaseAddress() const;
476 523
@@ -480,6 +527,9 @@ public:
480 /// Gets the total size of the new map region in bytes. 527 /// Gets the total size of the new map region in bytes.
481 u64 GetNewMapRegionSize() const; 528 u64 GetNewMapRegionSize() const;
482 529
530 /// Determines whether or not the given address range is within the new map region
531 bool IsWithinNewMapRegion(VAddr address, u64 size) const;
532
483 /// Gets the base address of the TLS IO region. 533 /// Gets the base address of the TLS IO region.
484 VAddr GetTLSIORegionBaseAddress() const; 534 VAddr GetTLSIORegionBaseAddress() const;
485 535
@@ -489,9 +539,12 @@ public:
489 /// Gets the total size of the TLS IO region in bytes. 539 /// Gets the total size of the TLS IO region in bytes.
490 u64 GetTLSIORegionSize() const; 540 u64 GetTLSIORegionSize() const;
491 541
542 /// Determines if the given address range is within the TLS IO region.
543 bool IsWithinTLSIORegion(VAddr address, u64 size) const;
544
492 /// Each VMManager has its own page table, which is set as the main one when the owning process 545 /// Each VMManager has its own page table, which is set as the main one when the owning process
493 /// is scheduled. 546 /// is scheduled.
494 Memory::PageTable page_table; 547 Common::PageTable page_table{Memory::PAGE_BITS};
495 548
496private: 549private:
497 using VMAIter = VMAMap::iterator; 550 using VMAIter = VMAMap::iterator;
@@ -606,9 +659,9 @@ private:
606 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous 659 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
607 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. 660 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
608 std::shared_ptr<std::vector<u8>> heap_memory; 661 std::shared_ptr<std::vector<u8>> heap_memory;
609 // The left/right bounds of the address space covered by heap_memory. 662
610 VAddr heap_start = 0; 663 // The end of the currently allocated heap. This is not an inclusive
664 // end of the range. This is essentially 'base_address + current_size'.
611 VAddr heap_end = 0; 665 VAddr heap_end = 0;
612 u64 heap_used = 0;
613}; 666};
614} // namespace Kernel 667} // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h
index d70b67893..04464a51a 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -24,7 +24,7 @@ public:
24 * @param thread The thread about which we're deciding. 24 * @param thread The thread about which we're deciding.
25 * @return True if the current thread should wait due to this object being unavailable 25 * @return True if the current thread should wait due to this object being unavailable
26 */ 26 */
27 virtual bool ShouldWait(Thread* thread) const = 0; 27 virtual bool ShouldWait(const Thread* thread) const = 0;
28 28
29 /// Acquire/lock the object for the specified thread if it is available 29 /// Acquire/lock the object for the specified thread if it is available
30 virtual void Acquire(Thread* thread) = 0; 30 virtual void Acquire(Thread* thread) = 0;
@@ -33,19 +33,19 @@ public:
33 * Add a thread to wait on this object 33 * Add a thread to wait on this object
34 * @param thread Pointer to thread to add 34 * @param thread Pointer to thread to add
35 */ 35 */
36 virtual void AddWaitingThread(SharedPtr<Thread> thread); 36 void AddWaitingThread(SharedPtr<Thread> thread);
37 37
38 /** 38 /**
39 * Removes a thread from waiting on this object (e.g. if it was resumed already) 39 * Removes a thread from waiting on this object (e.g. if it was resumed already)
40 * @param thread Pointer to thread to remove 40 * @param thread Pointer to thread to remove
41 */ 41 */
42 virtual void RemoveWaitingThread(Thread* thread); 42 void RemoveWaitingThread(Thread* thread);
43 43
44 /** 44 /**
45 * Wake up all threads waiting on this object that can be awoken, in priority order, 45 * Wake up all threads waiting on this object that can be awoken, in priority order,
46 * and set the synchronization result and output of the thread. 46 * and set the synchronization result and output of the thread.
47 */ 47 */
48 virtual void WakeupAllWaitingThreads(); 48 void WakeupAllWaitingThreads();
49 49
50 /** 50 /**
51 * Wakes up a single thread waiting on this object. 51 * Wakes up a single thread waiting on this object.
diff --git a/src/core/hle/kernel/writable_event.h b/src/core/hle/kernel/writable_event.h
index c9068dd3d..d00c92a6b 100644
--- a/src/core/hle/kernel/writable_event.h
+++ b/src/core/hle/kernel/writable_event.h
@@ -37,7 +37,7 @@ public:
37 return name; 37 return name;
38 } 38 }
39 39
40 static const HandleType HANDLE_TYPE = HandleType::WritableEvent; 40 static constexpr HandleType HANDLE_TYPE = HandleType::WritableEvent;
41 HandleType GetHandleType() const override { 41 HandleType GetHandleType() const override {
42 return HANDLE_TYPE; 42 return HANDLE_TYPE;
43 } 43 }
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..8a3701151 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,20 +8,11 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
15 14
16/** 15/**
17 * Detailed description of the error. Code 0 always means success.
18 */
19enum class ErrorDescription : u32 {
20 Success = 0,
21 RemoteProcessDead = 301,
22};
23
24/**
25 * Identifies the module which caused the error. Error codes can be propagated through a call 16 * Identifies the module which caused the error. Error codes can be propagated through a call
26 * chain, meaning that this doesn't always correspond to the module where the API call made is 17 * chain, meaning that this doesn't always correspond to the module where the API call made is
27 * contained. 18 * contained.
@@ -121,30 +112,18 @@ enum class ErrorModule : u32 {
121 ShopN = 811, 112 ShopN = 811,
122}; 113};
123 114
124/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields. 115/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
125union ResultCode { 116union ResultCode {
126 u32 raw; 117 u32 raw;
127 118
128 BitField<0, 9, ErrorModule> module; 119 BitField<0, 9, ErrorModule> module;
129 BitField<9, 13, u32> description; 120 BitField<9, 13, u32> description;
130 121
131 // The last bit of `level` is checked by apps and the kernel to determine if a result code is an
132 // error
133 BitField<31, 1, u32> is_error;
134
135 constexpr explicit ResultCode(u32 raw) : raw(raw) {} 122 constexpr explicit ResultCode(u32 raw) : raw(raw) {}
136 123
137 constexpr ResultCode(ErrorModule module, ErrorDescription description)
138 : ResultCode(module, static_cast<u32>(description)) {}
139
140 constexpr ResultCode(ErrorModule module_, u32 description_) 124 constexpr ResultCode(ErrorModule module_, u32 description_)
141 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {} 125 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}
142 126
143 constexpr ResultCode& operator=(const ResultCode& o) {
144 raw = o.raw;
145 return *this;
146 }
147
148 constexpr bool IsSuccess() const { 127 constexpr bool IsSuccess() const {
149 return raw == 0; 128 return raw == 0;
150 } 129 }
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index d1cbe0e44..85271d418 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <array> 6#include <array>
6#include <cinttypes> 7#include <cinttypes>
7#include <cstring> 8#include <cstring>
8#include <stack>
9#include "audio_core/audio_renderer.h" 9#include "audio_core/audio_renderer.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/file_sys/savedata_factory.h" 11#include "core/file_sys/savedata_factory.h"
@@ -13,7 +13,7 @@
13#include "core/hle/kernel/kernel.h" 13#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/process.h" 14#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/readable_event.h" 15#include "core/hle/kernel/readable_event.h"
16#include "core/hle/kernel/shared_memory.h" 16#include "core/hle/kernel/transfer_memory.h"
17#include "core/hle/kernel/writable_event.h" 17#include "core/hle/kernel/writable_event.h"
18#include "core/hle/service/acc/profile_manager.h" 18#include "core/hle/service/acc/profile_manager.h"
19#include "core/hle/service/am/am.h" 19#include "core/hle/service/am/am.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
93} 93}
94 94
95IAudioController::IAudioController() : ServiceFramework("IAudioController") { 95IAudioController::IAudioController() : ServiceFramework("IAudioController") {
96 // clang-format off
96 static const FunctionInfo functions[] = { 97 static const FunctionInfo functions[] = {
97 {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"}, 98 {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
98 {1, &IAudioController::GetMainAppletExpectedMasterVolume, 99 {1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
99 "GetMainAppletExpectedMasterVolume"}, 100 {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
100 {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, 101 {3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
101 "GetLibraryAppletExpectedMasterVolume"}, 102 {4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
102 {3, nullptr, "ChangeMainAppletMasterVolume"},
103 {4, nullptr, "SetTransparentVolumeRate"},
104 }; 103 };
104 // clang-format on
105
105 RegisterHandlers(functions); 106 RegisterHandlers(functions);
106} 107}
107 108
108IAudioController::~IAudioController() = default; 109IAudioController::~IAudioController() = default;
109 110
110void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 111void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
111 LOG_WARNING(Service_AM, "(STUBBED) called"); 112 IPC::RequestParser rp{ctx};
113 const float main_applet_volume_tmp = rp.Pop<float>();
114 const float library_applet_volume_tmp = rp.Pop<float>();
115
116 LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
117 main_applet_volume_tmp, library_applet_volume_tmp);
118
119 // Ensure the volume values remain within the 0-100% range
120 main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
121 library_applet_volume =
122 std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
123
112 IPC::ResponseBuilder rb{ctx, 2}; 124 IPC::ResponseBuilder rb{ctx, 2};
113 rb.Push(RESULT_SUCCESS); 125 rb.Push(RESULT_SUCCESS);
114} 126}
115 127
116void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 128void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
117 LOG_WARNING(Service_AM, "(STUBBED) called"); 129 LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
118 IPC::ResponseBuilder rb{ctx, 3}; 130 IPC::ResponseBuilder rb{ctx, 3};
119 rb.Push(RESULT_SUCCESS); 131 rb.Push(RESULT_SUCCESS);
120 rb.Push(volume); 132 rb.Push(main_applet_volume);
121} 133}
122 134
123void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 135void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
124 LOG_WARNING(Service_AM, "(STUBBED) called"); 136 LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
125 IPC::ResponseBuilder rb{ctx, 3}; 137 IPC::ResponseBuilder rb{ctx, 3};
126 rb.Push(RESULT_SUCCESS); 138 rb.Push(RESULT_SUCCESS);
127 rb.Push(volume); 139 rb.Push(library_applet_volume);
140}
141
142void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
143 struct Parameters {
144 float volume;
145 s64 fade_time_ns;
146 };
147 static_assert(sizeof(Parameters) == 16);
148
149 IPC::RequestParser rp{ctx};
150 const auto parameters = rp.PopRaw<Parameters>();
151
152 LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
153 parameters.fade_time_ns);
154
155 main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
156 fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
157
158 IPC::ResponseBuilder rb{ctx, 2};
159 rb.Push(RESULT_SUCCESS);
160}
161
162void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
163 IPC::RequestParser rp{ctx};
164 const float transparent_volume_rate_tmp = rp.Pop<float>();
165
166 LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
167
168 // Clamp volume range to 0-100%.
169 transparent_volume_rate =
170 std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
171
172 IPC::ResponseBuilder rb{ctx, 2};
173 rb.Push(RESULT_SUCCESS);
128} 174}
129 175
130IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") { 176IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
@@ -169,7 +215,21 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
169 215
170IDisplayController::~IDisplayController() = default; 216IDisplayController::~IDisplayController() = default;
171 217
172IDebugFunctions::IDebugFunctions() : ServiceFramework("IDebugFunctions") {} 218IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
219 // clang-format off
220 static const FunctionInfo functions[] = {
221 {0, nullptr, "NotifyMessageToHomeMenuForDebug"},
222 {1, nullptr, "OpenMainApplication"},
223 {10, nullptr, "EmulateButtonEvent"},
224 {20, nullptr, "InvalidateTransitionLayer"},
225 {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
226 {40, nullptr, "GetAppletResourceUsageInfo"},
227 };
228 // clang-format on
229
230 RegisterHandlers(functions);
231}
232
173IDebugFunctions::~IDebugFunctions() = default; 233IDebugFunctions::~IDebugFunctions() = default;
174 234
175ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger) 235ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
@@ -179,8 +239,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
179 {0, nullptr, "Exit"}, 239 {0, nullptr, "Exit"},
180 {1, &ISelfController::LockExit, "LockExit"}, 240 {1, &ISelfController::LockExit, "LockExit"},
181 {2, &ISelfController::UnlockExit, "UnlockExit"}, 241 {2, &ISelfController::UnlockExit, "UnlockExit"},
182 {3, nullptr, "EnterFatalSection"}, 242 {3, &ISelfController::EnterFatalSection, "EnterFatalSection"},
183 {4, nullptr, "LeaveFatalSection"}, 243 {4, &ISelfController::LeaveFatalSection, "LeaveFatalSection"},
184 {9, &ISelfController::GetLibraryAppletLaunchableEvent, "GetLibraryAppletLaunchableEvent"}, 244 {9, &ISelfController::GetLibraryAppletLaunchableEvent, "GetLibraryAppletLaunchableEvent"},
185 {10, &ISelfController::SetScreenShotPermission, "SetScreenShotPermission"}, 245 {10, &ISelfController::SetScreenShotPermission, "SetScreenShotPermission"},
186 {11, &ISelfController::SetOperationModeChangedNotification, "SetOperationModeChangedNotification"}, 246 {11, &ISelfController::SetOperationModeChangedNotification, "SetOperationModeChangedNotification"},
@@ -225,41 +285,54 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
225 285
226ISelfController::~ISelfController() = default; 286ISelfController::~ISelfController() = default;
227 287
228void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) { 288void ISelfController::LockExit(Kernel::HLERequestContext& ctx) {
229 // Takes 3 input u8s with each field located immediately after the previous
230 // u8, these are bool flags. No output.
231 LOG_WARNING(Service_AM, "(STUBBED) called"); 289 LOG_WARNING(Service_AM, "(STUBBED) called");
232 290
233 IPC::RequestParser rp{ctx}; 291 IPC::ResponseBuilder rb{ctx, 2};
292 rb.Push(RESULT_SUCCESS);
293}
234 294
235 struct FocusHandlingModeParams { 295void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) {
236 u8 unknown0; 296 LOG_WARNING(Service_AM, "(STUBBED) called");
237 u8 unknown1;
238 u8 unknown2;
239 };
240 auto flags = rp.PopRaw<FocusHandlingModeParams>();
241 297
242 IPC::ResponseBuilder rb{ctx, 2}; 298 IPC::ResponseBuilder rb{ctx, 2};
243 rb.Push(RESULT_SUCCESS); 299 rb.Push(RESULT_SUCCESS);
244} 300}
245 301
246void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) { 302void ISelfController::EnterFatalSection(Kernel::HLERequestContext& ctx) {
247 LOG_WARNING(Service_AM, "(STUBBED) called"); 303 ++num_fatal_sections_entered;
304 LOG_DEBUG(Service_AM, "called. Num fatal sections entered: {}", num_fatal_sections_entered);
248 305
249 IPC::ResponseBuilder rb{ctx, 2}; 306 IPC::ResponseBuilder rb{ctx, 2};
250 rb.Push(RESULT_SUCCESS); 307 rb.Push(RESULT_SUCCESS);
251} 308}
252 309
253void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) { 310void ISelfController::LeaveFatalSection(Kernel::HLERequestContext& ctx) {
254 IPC::RequestParser rp{ctx}; 311 LOG_DEBUG(Service_AM, "called.");
255 312
256 bool flag = rp.Pop<bool>(); 313 // Entry and exit of fatal sections must be balanced.
257 LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag); 314 if (num_fatal_sections_entered == 0) {
315 IPC::ResponseBuilder rb{ctx, 2};
316 rb.Push(ResultCode{ErrorModule::AM, 512});
317 return;
318 }
319
320 --num_fatal_sections_entered;
258 321
259 IPC::ResponseBuilder rb{ctx, 2}; 322 IPC::ResponseBuilder rb{ctx, 2};
260 rb.Push(RESULT_SUCCESS); 323 rb.Push(RESULT_SUCCESS);
261} 324}
262 325
326void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) {
327 LOG_WARNING(Service_AM, "(STUBBED) called");
328
329 launchable_event.writable->Signal();
330
331 IPC::ResponseBuilder rb{ctx, 2, 1};
332 rb.Push(RESULT_SUCCESS);
333 rb.PushCopyObjects(launchable_event.readable);
334}
335
263void ISelfController::SetScreenShotPermission(Kernel::HLERequestContext& ctx) { 336void ISelfController::SetScreenShotPermission(Kernel::HLERequestContext& ctx) {
264 LOG_WARNING(Service_AM, "(STUBBED) called"); 337 LOG_WARNING(Service_AM, "(STUBBED) called");
265 338
@@ -277,40 +350,52 @@ void ISelfController::SetOperationModeChangedNotification(Kernel::HLERequestCont
277 rb.Push(RESULT_SUCCESS); 350 rb.Push(RESULT_SUCCESS);
278} 351}
279 352
280void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) { 353void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) {
281 // Takes 3 input u8s with each field located immediately after the previous
282 // u8, these are bool flags. No output.
283 IPC::RequestParser rp{ctx}; 354 IPC::RequestParser rp{ctx};
284 355
285 bool enabled = rp.Pop<bool>(); 356 bool flag = rp.Pop<bool>();
286 LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled); 357 LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag);
287 358
288 IPC::ResponseBuilder rb{ctx, 2}; 359 IPC::ResponseBuilder rb{ctx, 2};
289 rb.Push(RESULT_SUCCESS); 360 rb.Push(RESULT_SUCCESS);
290} 361}
291 362
292void ISelfController::LockExit(Kernel::HLERequestContext& ctx) { 363void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
293 LOG_WARNING(Service_AM, "(STUBBED) called"); 364 // Takes 3 input u8s with each field located immediately after the previous
365 // u8, these are bool flags. No output.
366 IPC::RequestParser rp{ctx};
367
368 struct FocusHandlingModeParams {
369 u8 unknown0;
370 u8 unknown1;
371 u8 unknown2;
372 };
373 const auto flags = rp.PopRaw<FocusHandlingModeParams>();
374
375 LOG_WARNING(Service_AM, "(STUBBED) called. unknown0={}, unknown1={}, unknown2={}",
376 flags.unknown0, flags.unknown1, flags.unknown2);
294 377
295 IPC::ResponseBuilder rb{ctx, 2}; 378 IPC::ResponseBuilder rb{ctx, 2};
296 rb.Push(RESULT_SUCCESS); 379 rb.Push(RESULT_SUCCESS);
297} 380}
298 381
299void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) { 382void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) {
300 LOG_WARNING(Service_AM, "(STUBBED) called"); 383 LOG_WARNING(Service_AM, "(STUBBED) called");
301 384
302 IPC::ResponseBuilder rb{ctx, 2}; 385 IPC::ResponseBuilder rb{ctx, 2};
303 rb.Push(RESULT_SUCCESS); 386 rb.Push(RESULT_SUCCESS);
304} 387}
305 388
306void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) { 389void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) {
307 LOG_WARNING(Service_AM, "(STUBBED) called"); 390 // Takes 3 input u8s with each field located immediately after the previous
391 // u8, these are bool flags. No output.
392 IPC::RequestParser rp{ctx};
308 393
309 launchable_event.writable->Signal(); 394 bool enabled = rp.Pop<bool>();
395 LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled);
310 396
311 IPC::ResponseBuilder rb{ctx, 2, 1}; 397 IPC::ResponseBuilder rb{ctx, 2};
312 rb.Push(RESULT_SUCCESS); 398 rb.Push(RESULT_SUCCESS);
313 rb.PushCopyObjects(launchable_event.readable);
314} 399}
315 400
316void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) { 401void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) {
@@ -322,14 +407,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c
322 407
323void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) { 408void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
324 LOG_WARNING(Service_AM, "(STUBBED) called"); 409 LOG_WARNING(Service_AM, "(STUBBED) called");
410
325 // TODO(Subv): Find out how AM determines the display to use, for now just 411 // TODO(Subv): Find out how AM determines the display to use, for now just
326 // create the layer in the Default display. 412 // create the layer in the Default display.
327 u64 display_id = nvflinger->OpenDisplay("Default"); 413 const auto display_id = nvflinger->OpenDisplay("Default");
328 u64 layer_id = nvflinger->CreateLayer(display_id); 414 const auto layer_id = nvflinger->CreateLayer(*display_id);
329 415
330 IPC::ResponseBuilder rb{ctx, 4}; 416 IPC::ResponseBuilder rb{ctx, 4};
331 rb.Push(RESULT_SUCCESS); 417 rb.Push(RESULT_SUCCESS);
332 rb.Push(layer_id); 418 rb.Push(*layer_id);
333} 419}
334 420
335void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) { 421void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {
@@ -846,19 +932,19 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
846 rp.SetCurrentOffset(3); 932 rp.SetCurrentOffset(3);
847 const auto handle{rp.Pop<Kernel::Handle>()}; 933 const auto handle{rp.Pop<Kernel::Handle>()};
848 934
849 const auto shared_mem = 935 const auto transfer_mem =
850 Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::SharedMemory>( 936 Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(
851 handle); 937 handle);
852 938
853 if (shared_mem == nullptr) { 939 if (transfer_mem == nullptr) {
854 LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle); 940 LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle);
855 IPC::ResponseBuilder rb{ctx, 2}; 941 IPC::ResponseBuilder rb{ctx, 2};
856 rb.Push(ResultCode(-1)); 942 rb.Push(ResultCode(-1));
857 return; 943 return;
858 } 944 }
859 945
860 const u8* mem_begin = shared_mem->GetPointer(); 946 const u8* const mem_begin = transfer_mem->GetPointer();
861 const u8* mem_end = mem_begin + shared_mem->GetSize(); 947 const u8* const mem_end = mem_begin + transfer_mem->GetSize();
862 std::vector<u8> memory{mem_begin, mem_end}; 948 std::vector<u8> memory{mem_begin, mem_end};
863 949
864 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 950 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index b6113cfdd..991b7d47c 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <memory> 8#include <memory>
8#include <queue> 9#include <queue>
9#include "core/hle/kernel/writable_event.h" 10#include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
81 void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx); 82 void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
82 void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx); 83 void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
83 void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx); 84 void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
85 void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
86 void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);
84 87
85 u32 volume{100}; 88 static constexpr float min_allowed_volume = 0.0f;
89 static constexpr float max_allowed_volume = 1.0f;
90
91 float main_applet_volume{0.25f};
92 float library_applet_volume{max_allowed_volume};
93 float transparent_volume_rate{min_allowed_volume};
94
95 // Volume transition fade time in nanoseconds.
96 // e.g. If the main applet volume was 0% and was changed to 50%
97 // with a fade of 50ns, then over the course of 50ns,
98 // the volume will gradually fade up to 50%
99 std::chrono::nanoseconds fade_time_ns{0};
86}; 100};
87 101
88class IDisplayController final : public ServiceFramework<IDisplayController> { 102class IDisplayController final : public ServiceFramework<IDisplayController> {
@@ -103,17 +117,19 @@ public:
103 ~ISelfController() override; 117 ~ISelfController() override;
104 118
105private: 119private:
106 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
107 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
108 void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
109 void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
110 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
111 void LockExit(Kernel::HLERequestContext& ctx); 120 void LockExit(Kernel::HLERequestContext& ctx);
112 void UnlockExit(Kernel::HLERequestContext& ctx); 121 void UnlockExit(Kernel::HLERequestContext& ctx);
122 void EnterFatalSection(Kernel::HLERequestContext& ctx);
123 void LeaveFatalSection(Kernel::HLERequestContext& ctx);
113 void GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx); 124 void GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx);
125 void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
126 void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
127 void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
128 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
129 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
130 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
114 void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx); 131 void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx);
115 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx); 132 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
116 void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
117 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
118 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
119 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
@@ -121,6 +137,7 @@ private:
121 std::shared_ptr<NVFlinger::NVFlinger> nvflinger; 137 std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
122 Kernel::EventPair launchable_event; 138 Kernel::EventPair launchable_event;
123 u32 idle_time_detection_extension = 0; 139 u32 idle_time_detection_extension = 0;
140 u64 num_fatal_sections_entered = 0;
124}; 141};
125 142
126class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { 143class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp
index 41a573a91..b888f861d 100644
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -249,7 +249,8 @@ AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
249 {300, nullptr, "OpenOverlayAppletProxy"}, 249 {300, nullptr, "OpenOverlayAppletProxy"},
250 {350, nullptr, "OpenSystemApplicationProxy"}, 250 {350, nullptr, "OpenSystemApplicationProxy"},
251 {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"}, 251 {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"},
252 {401, nullptr, "GetSystemAppletControllerForDebug"}, 252 {410, nullptr, "GetSystemAppletControllerForDebug"},
253 {1000, nullptr, "GetDebugFunctions"},
253 }; 254 };
254 // clang-format on 255 // clang-format on
255 256
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index 9b0aa7f5f..7e17df98a 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -86,7 +86,7 @@ static FileSys::VirtualFile GetManualRomFS() {
86 if (loader.ReadManualRomFS(out) == Loader::ResultStatus::Success) 86 if (loader.ReadManualRomFS(out) == Loader::ResultStatus::Success)
87 return out; 87 return out;
88 88
89 const auto& installed{FileSystem::GetUnionContents()}; 89 const auto& installed{Core::System::GetInstance().GetContentProvider()};
90 const auto res = installed.GetEntry(Core::System::GetInstance().CurrentProcess()->GetTitleID(), 90 const auto res = installed.GetEntry(Core::System::GetInstance().CurrentProcess()->GetTitleID(),
91 FileSys::ContentRecordType::Manual); 91 FileSys::ContentRecordType::Manual);
92 92
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index b506bc3dd..2d768d9fc 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -33,11 +33,11 @@ static bool CheckAOCTitleIDMatchesBase(u64 title_id, u64 base) {
33 33
34static std::vector<u64> AccumulateAOCTitleIDs() { 34static std::vector<u64> AccumulateAOCTitleIDs() {
35 std::vector<u64> add_on_content; 35 std::vector<u64> add_on_content;
36 const auto rcu = FileSystem::GetUnionContents(); 36 const auto& rcu = Core::System::GetInstance().GetContentProvider();
37 const auto list = 37 const auto list =
38 rcu.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data); 38 rcu.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data);
39 std::transform(list.begin(), list.end(), std::back_inserter(add_on_content), 39 std::transform(list.begin(), list.end(), std::back_inserter(add_on_content),
40 [](const FileSys::RegisteredCacheEntry& rce) { return rce.title_id; }); 40 [](const FileSys::ContentProviderEntry& rce) { return rce.title_id; });
41 add_on_content.erase( 41 add_on_content.erase(
42 std::remove_if( 42 std::remove_if(
43 add_on_content.begin(), add_on_content.end(), 43 add_on_content.begin(), add_on_content.end(),
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index 657010312..e5daefdde 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -2,9 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/hle_ipc.h"
8#include "core/hle/service/audio/audin_u.h" 5#include "core/hle/service/audio/audin_u.h"
9 6
10namespace Service::Audio { 7namespace Service::Audio {
@@ -12,6 +9,7 @@ namespace Service::Audio {
12class IAudioIn final : public ServiceFramework<IAudioIn> { 9class IAudioIn final : public ServiceFramework<IAudioIn> {
13public: 10public:
14 IAudioIn() : ServiceFramework("IAudioIn") { 11 IAudioIn() : ServiceFramework("IAudioIn") {
12 // clang-format off
15 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
16 {0, nullptr, "GetAudioInState"}, 14 {0, nullptr, "GetAudioInState"},
17 {1, nullptr, "StartAudioIn"}, 15 {1, nullptr, "StartAudioIn"},
@@ -28,16 +26,23 @@ public:
28 {12, nullptr, "SetAudioInDeviceGain"}, 26 {12, nullptr, "SetAudioInDeviceGain"},
29 {13, nullptr, "GetAudioInDeviceGain"}, 27 {13, nullptr, "GetAudioInDeviceGain"},
30 }; 28 };
29 // clang-format on
30
31 RegisterHandlers(functions); 31 RegisterHandlers(functions);
32 } 32 }
33 ~IAudioIn() = default;
34}; 33};
35 34
36AudInU::AudInU() : ServiceFramework("audin:u") { 35AudInU::AudInU() : ServiceFramework("audin:u") {
36 // clang-format off
37 static const FunctionInfo functions[] = { 37 static const FunctionInfo functions[] = {
38 {0, nullptr, "ListAudioIns"}, {1, nullptr, "OpenAudioIn"}, {2, nullptr, "Unknown"}, 38 {0, nullptr, "ListAudioIns"},
39 {3, nullptr, "OpenAudioInAuto"}, {4, nullptr, "ListAudioInsAuto"}, 39 {1, nullptr, "OpenAudioIn"},
40 {2, nullptr, "Unknown"},
41 {3, nullptr, "OpenAudioInAuto"},
42 {4, nullptr, "ListAudioInsAuto"},
40 }; 43 };
44 // clang-format on
45
41 RegisterHandlers(functions); 46 RegisterHandlers(functions);
42} 47}
43 48
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index dc6a6b188..39acb7b23 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
18#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
20#include "core/hle/service/audio/audout_u.h" 20#include "core/hle/service/audio/audout_u.h"
21#include "core/hle/service/audio/errors.h"
21#include "core/memory.h" 22#include "core/memory.h"
22 23
23namespace Service::Audio { 24namespace Service::Audio {
24 25
25namespace ErrCodes {
26enum {
27 ErrorUnknown = 2,
28 BufferCountExceeded = 8,
29};
30}
31
32constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; 26constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
33constexpr int DefaultSampleRate{48000}; 27constexpr int DefaultSampleRate{48000};
34 28
@@ -68,12 +62,12 @@ public:
68 RegisterHandlers(functions); 62 RegisterHandlers(functions);
69 63
70 // This is the event handle used to check if the audio buffer was released 64 // This is the event handle used to check if the audio buffer was released
71 auto& kernel = Core::System::GetInstance().Kernel(); 65 auto& system = Core::System::GetInstance();
72 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, 66 buffer_event = Kernel::WritableEvent::CreateEventPair(
73 "IAudioOutBufferReleased"); 67 system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
74 68
75 stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count, 69 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
76 std::move(unique_name), 70 audio_params.channel_count, std::move(unique_name),
77 [=]() { buffer_event.writable->Signal(); }); 71 [=]() { buffer_event.writable->Signal(); });
78 } 72 }
79 73
@@ -100,7 +94,7 @@ private:
100 94
101 if (stream->IsPlaying()) { 95 if (stream->IsPlaying()) {
102 IPC::ResponseBuilder rb{ctx, 2}; 96 IPC::ResponseBuilder rb{ctx, 2};
103 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); 97 rb.Push(ERR_OPERATION_FAILED);
104 return; 98 return;
105 } 99 }
106 100
@@ -113,7 +107,9 @@ private:
113 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
114 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
115 109
116 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
117 113
118 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
143 139
144 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { 140 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
145 IPC::ResponseBuilder rb{ctx, 2}; 141 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); 142 rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
143 return;
147 } 144 }
148 145
149 IPC::ResponseBuilder rb{ctx, 2}; 146 IPC::ResponseBuilder rb{ctx, 2};
@@ -153,7 +150,6 @@ private:
153 void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) { 150 void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) {
154 LOG_DEBUG(Service_Audio, "called {}", ctx.Description()); 151 LOG_DEBUG(Service_Audio, "called {}", ctx.Description());
155 152
156 IPC::RequestParser rp{ctx};
157 const u64 max_count{ctx.GetWriteBufferSize() / sizeof(u64)}; 153 const u64 max_count{ctx.GetWriteBufferSize() / sizeof(u64)};
158 const auto released_buffers{audio_core.GetTagsAndReleaseBuffers(stream, max_count)}; 154 const auto released_buffers{audio_core.GetTagsAndReleaseBuffers(stream, max_count)};
159 155
@@ -197,12 +193,9 @@ private:
197void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { 193void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
198 LOG_DEBUG(Service_Audio, "called"); 194 LOG_DEBUG(Service_Audio, "called");
199 195
200 IPC::RequestParser rp{ctx};
201
202 ctx.WriteBuffer(DefaultDevice); 196 ctx.WriteBuffer(DefaultDevice);
203 197
204 IPC::ResponseBuilder rb{ctx, 3}; 198 IPC::ResponseBuilder rb{ctx, 3};
205
206 rb.Push(RESULT_SUCCESS); 199 rb.Push(RESULT_SUCCESS);
207 rb.Push<u32>(1); // Amount of audio devices 200 rb.Push<u32>(1); // Amount of audio devices
208} 201}
diff --git a/src/core/hle/service/audio/audrec_u.cpp b/src/core/hle/service/audio/audrec_u.cpp
index 34974afa9..1a5aed9ed 100644
--- a/src/core/hle/service/audio/audrec_u.cpp
+++ b/src/core/hle/service/audio/audrec_u.cpp
@@ -2,9 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/hle_ipc.h"
8#include "core/hle/service/audio/audrec_u.h" 5#include "core/hle/service/audio/audrec_u.h"
9 6
10namespace Service::Audio { 7namespace Service::Audio {
@@ -12,6 +9,7 @@ namespace Service::Audio {
12class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> { 9class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> {
13public: 10public:
14 IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") { 11 IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") {
12 // clang-format off
15 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
16 {0, nullptr, "GetFinalOutputRecorderState"}, 14 {0, nullptr, "GetFinalOutputRecorderState"},
17 {1, nullptr, "StartFinalOutputRecorder"}, 15 {1, nullptr, "StartFinalOutputRecorder"},
@@ -20,13 +18,15 @@ public:
20 {4, nullptr, "RegisterBufferEvent"}, 18 {4, nullptr, "RegisterBufferEvent"},
21 {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"}, 19 {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"},
22 {6, nullptr, "ContainsFinalOutputRecorderBuffer"}, 20 {6, nullptr, "ContainsFinalOutputRecorderBuffer"},
23 {7, nullptr, "Unknown"}, 21 {7, nullptr, "GetFinalOutputRecorderBufferEndTime"},
24 {8, nullptr, "AppendFinalOutputRecorderBufferAuto"}, 22 {8, nullptr, "AppendFinalOutputRecorderBufferAuto"},
25 {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"}, 23 {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"},
24 {10, nullptr, "FlushFinalOutputRecorderBuffers"},
26 }; 25 };
26 // clang-format on
27
27 RegisterHandlers(functions); 28 RegisterHandlers(functions);
28 } 29 }
29 ~IFinalOutputRecorder() = default;
30}; 30};
31 31
32AudRecU::AudRecU() : ServiceFramework("audrec:u") { 32AudRecU::AudRecU() : ServiceFramework("audrec:u") {
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 945259c7d..1dde6edb7 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -10,6 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/string_util.h"
13#include "core/core.h" 14#include "core/core.h"
14#include "core/hle/ipc_helpers.h" 15#include "core/hle/ipc_helpers.h"
15#include "core/hle/kernel/hle_ipc.h" 16#include "core/hle/kernel/hle_ipc.h"
@@ -17,6 +18,7 @@
17#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/audio/audren_u.h" 20#include "core/hle/service/audio/audren_u.h"
21#include "core/hle/service/audio/errors.h"
20 22
21namespace Service::Audio { 23namespace Service::Audio {
22 24
@@ -37,15 +39,16 @@ public:
37 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, 39 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
38 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, 40 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
39 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, 41 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
40 {11, nullptr, "ExecuteAudioRendererRendering"}, 42 {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
41 }; 43 };
42 // clang-format on 44 // clang-format on
43 RegisterHandlers(functions); 45 RegisterHandlers(functions);
44 46
45 auto& kernel = Core::System::GetInstance().Kernel(); 47 auto& system = Core::System::GetInstance();
46 system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, 48 system_event = Kernel::WritableEvent::CreateEventPair(
47 "IAudioRenderer:SystemEvent"); 49 system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
48 renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable); 50 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
51 system_event.writable);
49 } 52 }
50 53
51private: 54private:
@@ -137,6 +140,17 @@ private:
137 rb.Push(rendering_time_limit_percent); 140 rb.Push(rendering_time_limit_percent);
138 } 141 }
139 142
143 void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
144 LOG_DEBUG(Service_Audio, "called");
145
146 // This service command currently only reports an unsupported operation
147 // error code, or aborts. Given that, we just always return an error
148 // code in this case.
149
150 IPC::ResponseBuilder rb{ctx, 2};
151 rb.Push(ERR_NOT_SUPPORTED);
152 }
153
140 Kernel::EventPair system_event; 154 Kernel::EventPair system_event;
141 std::unique_ptr<AudioCore::AudioRenderer> renderer; 155 std::unique_ptr<AudioCore::AudioRenderer> renderer;
142 u32 rendering_time_limit_percent = 100; 156 u32 rendering_time_limit_percent = 100;
@@ -171,7 +185,6 @@ public:
171private: 185private:
172 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { 186 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
173 LOG_WARNING(Service_Audio, "(STUBBED) called"); 187 LOG_WARNING(Service_Audio, "(STUBBED) called");
174 IPC::RequestParser rp{ctx};
175 188
176 constexpr std::array<char, 15> audio_interface{{"AudioInterface"}}; 189 constexpr std::array<char, 15> audio_interface{{"AudioInterface"}};
177 ctx.WriteBuffer(audio_interface); 190 ctx.WriteBuffer(audio_interface);
@@ -182,13 +195,13 @@ private:
182 } 195 }
183 196
184 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { 197 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
185 LOG_WARNING(Service_Audio, "(STUBBED) called");
186
187 IPC::RequestParser rp{ctx}; 198 IPC::RequestParser rp{ctx};
188 f32 volume = static_cast<f32>(rp.Pop<u32>()); 199 const f32 volume = rp.Pop<f32>();
200
201 const auto device_name_buffer = ctx.ReadBuffer();
202 const std::string name = Common::StringFromBuffer(device_name_buffer);
189 203
190 auto file_buffer = ctx.ReadBuffer(); 204 LOG_WARNING(Service_Audio, "(STUBBED) called. name={}, volume={}", name, volume);
191 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
192 205
193 IPC::ResponseBuilder rb{ctx, 2}; 206 IPC::ResponseBuilder rb{ctx, 2};
194 rb.Push(RESULT_SUCCESS); 207 rb.Push(RESULT_SUCCESS);
@@ -196,7 +209,6 @@ private:
196 209
197 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { 210 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
198 LOG_WARNING(Service_Audio, "(STUBBED) called"); 211 LOG_WARNING(Service_Audio, "(STUBBED) called");
199 IPC::RequestParser rp{ctx};
200 212
201 constexpr std::array<char, 12> audio_interface{{"AudioDevice"}}; 213 constexpr std::array<char, 12> audio_interface{{"AudioDevice"}};
202 ctx.WriteBuffer(audio_interface); 214 ctx.WriteBuffer(audio_interface);
@@ -229,14 +241,16 @@ private:
229}; // namespace Audio 241}; // namespace Audio
230 242
231AudRenU::AudRenU() : ServiceFramework("audren:u") { 243AudRenU::AudRenU() : ServiceFramework("audren:u") {
244 // clang-format off
232 static const FunctionInfo functions[] = { 245 static const FunctionInfo functions[] = {
233 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 246 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
234 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, 247 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
235 {2, &AudRenU::GetAudioDevice, "GetAudioDevice"}, 248 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
236 {3, nullptr, "OpenAudioRendererAuto"}, 249 {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
237 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, 250 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
238 "GetAudioDeviceServiceWithRevisionInfo"},
239 }; 251 };
252 // clang-format on
253
240 RegisterHandlers(functions); 254 RegisterHandlers(functions);
241} 255}
242 256
@@ -245,12 +259,7 @@ AudRenU::~AudRenU() = default;
245void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { 259void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
246 LOG_DEBUG(Service_Audio, "called"); 260 LOG_DEBUG(Service_Audio, "called");
247 261
248 IPC::RequestParser rp{ctx}; 262 OpenAudioRendererImpl(ctx);
249 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
250 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
251
252 rb.Push(RESULT_SUCCESS);
253 rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
254} 263}
255 264
256void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 265void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -259,20 +268,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
259 LOG_DEBUG(Service_Audio, "called"); 268 LOG_DEBUG(Service_Audio, "called");
260 269
261 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); 270 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
262 buffer_sz += params.unknown_c * 1024; 271 buffer_sz += params.submix_count * 1024;
263 buffer_sz += 0x940 * (params.unknown_c + 1); 272 buffer_sz += 0x940 * (params.submix_count + 1);
264 buffer_sz += 0x3F0 * params.voice_count; 273 buffer_sz += 0x3F0 * params.voice_count;
265 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); 274 buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
266 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 275 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
267 buffer_sz += 276 buffer_sz += Common::AlignUp(
268 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 277 (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
269 (params.mix_buffer_count + 6), 278 (params.mix_buffer_count + 6),
270 0x40); 279 0x40);
271 280
272 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 281 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
273 u32 count = params.unknown_c + 1; 282 const u32 count = params.submix_count + 1;
274 u64 node_count = Common::AlignUp(count, 0x40); 283 u64 node_count = Common::AlignUp(count, 0x40);
275 u64 node_state_buffer_sz = 284 const u64 node_state_buffer_sz =
276 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 285 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
277 u64 edge_matrix_buffer_sz = 0; 286 u64 edge_matrix_buffer_sz = 0;
278 node_count = Common::AlignUp(count * count, 0x40); 287 node_count = Common::AlignUp(count * count, 0x40);
@@ -286,19 +295,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
286 295
287 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 296 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
288 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 297 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
289 buffer_sz += 0xE0 * params.unknown_2c; 298 buffer_sz += 0xE0 * params.num_splitter_send_channels;
290 buffer_sz += 0x20 * params.splitter_count; 299 buffer_sz += 0x20 * params.splitter_count;
291 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); 300 buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
292 } 301 }
293 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 302 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
294 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 303 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
295 ((params.voice_count * 256) | 0x40); 304 ((params.voice_count * 256) | 0x40);
296 305
297 if (params.unknown_1c >= 1) { 306 if (params.performance_frame_count >= 1) {
298 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 307 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
299 16 * params.voice_count + 16) + 308 16 * params.voice_count + 16) +
300 0x658) * 309 0x658) *
301 (params.unknown_1c + 1) + 310 (params.performance_frame_count + 1) +
302 0xc0, 311 0xc0,
303 0x40) + 312 0x40) +
304 output_sz; 313 output_sz;
@@ -313,7 +322,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
313 LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz); 322 LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
314} 323}
315 324
316void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) { 325void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
317 LOG_DEBUG(Service_Audio, "called"); 326 LOG_DEBUG(Service_Audio, "called");
318 327
319 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 328 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -322,6 +331,12 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
322 rb.PushIpcInterface<Audio::IAudioDevice>(); 331 rb.PushIpcInterface<Audio::IAudioDevice>();
323} 332}
324 333
334void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
335 LOG_DEBUG(Service_Audio, "called");
336
337 OpenAudioRendererImpl(ctx);
338}
339
325void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 340void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
326 LOG_WARNING(Service_Audio, "(STUBBED) called"); 341 LOG_WARNING(Service_Audio, "(STUBBED) called");
327 342
@@ -332,6 +347,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
332 // based on the current revision 347 // based on the current revision
333} 348}
334 349
350void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
351 IPC::RequestParser rp{ctx};
352 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
353 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
354
355 rb.Push(RESULT_SUCCESS);
356 rb.PushIpcInterface<IAudioRenderer>(params);
357}
358
335bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 359bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
336 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 360 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
337 switch (feature) { 361 switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index c6bc3a90a..e55d25973 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -20,9 +20,12 @@ public:
20private: 20private:
21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx); 21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDevice(Kernel::HLERequestContext& ctx); 23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
24 void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
24 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); 25 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
25 26
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28
26 enum class AudioFeatures : u32 { 29 enum class AudioFeatures : u32 {
27 Splitter, 30 Splitter,
28 }; 31 };
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Audio {
10
11constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
12constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
13constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
14
15} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index a850cadc8..cb4a1160d 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -5,110 +5,117 @@
5#include <chrono> 5#include <chrono>
6#include <cstring> 6#include <cstring>
7#include <memory> 7#include <memory>
8#include <optional>
9#include <vector> 8#include <vector>
10 9
11#include <opus.h> 10#include <opus.h>
11#include <opus_multistream.h>
12 12
13#include "common/common_funcs.h" 13#include "common/assert.h"
14#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "core/hle/ipc_helpers.h" 15#include "core/hle/ipc_helpers.h"
16#include "core/hle/kernel/hle_ipc.h" 16#include "core/hle/kernel/hle_ipc.h"
17#include "core/hle/service/audio/hwopus.h" 17#include "core/hle/service/audio/hwopus.h"
18 18
19namespace Service::Audio { 19namespace Service::Audio {
20 20namespace {
21struct OpusDeleter { 21struct OpusDeleter {
22 void operator()(void* ptr) const { 22 void operator()(OpusMSDecoder* ptr) const {
23 operator delete(ptr); 23 opus_multistream_decoder_destroy(ptr);
24 } 24 }
25}; 25};
26 26
27class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 27using OpusDecoderPtr = std::unique_ptr<OpusMSDecoder, OpusDeleter>;
28
29struct OpusPacketHeader {
30 // Packet size in bytes.
31 u32_be size;
32 // Indicates the final range of the codec's entropy coder.
33 u32_be final_range;
34};
35static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
36
37class OpusDecoderState {
28public: 38public:
29 IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate, 39 /// Describes extra behavior that may be asked of the decoding context.
30 u32 channel_count) 40 enum class ExtraBehavior {
31 : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)), 41 /// No extra behavior.
32 sample_rate(sample_rate), channel_count(channel_count) { 42 None,
33 static const FunctionInfo functions[] = {
34 {0, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
35 {1, nullptr, "SetContext"},
36 {2, nullptr, "DecodeInterleavedForMultiStream"},
37 {3, nullptr, "SetContextForMultiStream"},
38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
39 "DecodeInterleavedWithPerformance"},
40 {5, nullptr, "Unknown5"},
41 {6, nullptr, "Unknown6"},
42 {7, nullptr, "Unknown7"},
43 };
44 RegisterHandlers(functions);
45 }
46 43
47private: 44 /// Resets the decoder context back to a freshly initialized state.
48 void DecodeInterleaved(Kernel::HLERequestContext& ctx) { 45 ResetContext,
49 LOG_DEBUG(Audio, "called"); 46 };
50 47
51 u32 consumed = 0; 48 enum class PerfTime {
52 u32 sample_count = 0; 49 Disabled,
53 std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); 50 Enabled,
54 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples)) { 51 };
55 LOG_ERROR(Audio, "Failed to decode opus data"); 52
56 IPC::ResponseBuilder rb{ctx, 2}; 53 explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
57 // TODO(ogniK): Use correct error code 54 : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
58 rb.Push(ResultCode(-1)); 55
59 return; 56 // Decodes interleaved Opus packets. Optionally allows reporting time taken to
57 // perform the decoding, as well as any relevant extra behavior.
58 void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
59 ExtraBehavior extra_behavior) {
60 if (perf_time == PerfTime::Disabled) {
61 DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
62 } else {
63 u64 performance = 0;
64 DecodeInterleavedHelper(ctx, &performance, extra_behavior);
60 } 65 }
61 IPC::ResponseBuilder rb{ctx, 4};
62 rb.Push(RESULT_SUCCESS);
63 rb.Push<u32>(consumed);
64 rb.Push<u32>(sample_count);
65 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
66 } 66 }
67 67
68 void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) { 68private:
69 LOG_DEBUG(Audio, "called"); 69 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
70 70 ExtraBehavior extra_behavior) {
71 u32 consumed = 0; 71 u32 consumed = 0;
72 u32 sample_count = 0; 72 u32 sample_count = 0;
73 u64 performance = 0;
74 std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); 73 std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
75 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, 74
76 performance)) { 75 if (extra_behavior == ExtraBehavior::ResetContext) {
76 ResetDecoderContext();
77 }
78
79 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
77 LOG_ERROR(Audio, "Failed to decode opus data"); 80 LOG_ERROR(Audio, "Failed to decode opus data");
78 IPC::ResponseBuilder rb{ctx, 2}; 81 IPC::ResponseBuilder rb{ctx, 2};
79 // TODO(ogniK): Use correct error code 82 // TODO(ogniK): Use correct error code
80 rb.Push(ResultCode(-1)); 83 rb.Push(ResultCode(-1));
81 return; 84 return;
82 } 85 }
83 IPC::ResponseBuilder rb{ctx, 6}; 86
87 const u32 param_size = performance != nullptr ? 6 : 4;
88 IPC::ResponseBuilder rb{ctx, param_size};
84 rb.Push(RESULT_SUCCESS); 89 rb.Push(RESULT_SUCCESS);
85 rb.Push<u32>(consumed); 90 rb.Push<u32>(consumed);
86 rb.Push<u32>(sample_count); 91 rb.Push<u32>(sample_count);
87 rb.Push<u64>(performance); 92 if (performance) {
93 rb.Push<u64>(*performance);
94 }
88 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 95 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
89 } 96 }
90 97
91 bool Decoder_DecodeInterleaved( 98 bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
92 u32& consumed, u32& sample_count, const std::vector<u8>& input, 99 std::vector<opus_int16>& output, u64* out_performance_time) const {
93 std::vector<opus_int16>& output,
94 std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
95 const auto start_time = std::chrono::high_resolution_clock::now(); 100 const auto start_time = std::chrono::high_resolution_clock::now();
96 std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 101 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
97 if (sizeof(OpusHeader) > input.size()) { 102 if (sizeof(OpusPacketHeader) > input.size()) {
98 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", 103 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
99 sizeof(OpusHeader), input.size()); 104 sizeof(OpusPacketHeader), input.size());
100 return false; 105 return false;
101 } 106 }
102 OpusHeader hdr{}; 107
103 std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); 108 OpusPacketHeader hdr{};
104 if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { 109 std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
110 if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
105 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", 111 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
106 sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); 112 sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
107 return false; 113 return false;
108 } 114 }
109 auto frame = input.data() + sizeof(OpusHeader); 115
110 auto decoded_sample_count = opus_packet_get_nb_samples( 116 const auto frame = input.data() + sizeof(OpusPacketHeader);
111 frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), 117 const auto decoded_sample_count = opus_packet_get_nb_samples(
118 frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
112 static_cast<opus_int32>(sample_rate)); 119 static_cast<opus_int32>(sample_rate));
113 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { 120 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
114 LOG_ERROR( 121 LOG_ERROR(
@@ -117,46 +124,117 @@ private:
117 decoded_sample_count * channel_count * sizeof(u16), raw_output_sz); 124 decoded_sample_count * channel_count * sizeof(u16), raw_output_sz);
118 return false; 125 return false;
119 } 126 }
127
120 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); 128 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
121 auto out_sample_count = 129 const auto out_sample_count =
122 opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); 130 opus_multistream_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
123 if (out_sample_count < 0) { 131 if (out_sample_count < 0) {
124 LOG_ERROR(Audio, 132 LOG_ERROR(Audio,
125 "Incorrect sample count received from opus_decode, " 133 "Incorrect sample count received from opus_decode, "
126 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", 134 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
127 out_sample_count, frame_size, static_cast<u32>(hdr.sz)); 135 out_sample_count, frame_size, static_cast<u32>(hdr.size));
128 return false; 136 return false;
129 } 137 }
138
130 const auto end_time = std::chrono::high_resolution_clock::now() - start_time; 139 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
131 sample_count = out_sample_count; 140 sample_count = out_sample_count;
132 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 141 consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
133 if (performance_time.has_value()) { 142 if (out_performance_time != nullptr) {
134 performance_time->get() = 143 *out_performance_time =
135 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); 144 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
136 } 145 }
146
137 return true; 147 return true;
138 } 148 }
139 149
140 struct OpusHeader { 150 void ResetDecoderContext() {
141 u32_be sz; // Needs to be BE for some odd reason 151 ASSERT(decoder != nullptr);
142 INSERT_PADDING_WORDS(1); 152
143 }; 153 opus_multistream_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
144 static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size"); 154 }
145 155
146 std::unique_ptr<OpusDecoder, OpusDeleter> decoder; 156 OpusDecoderPtr decoder;
147 u32 sample_rate; 157 u32 sample_rate;
148 u32 channel_count; 158 u32 channel_count;
149}; 159};
150 160
151static std::size_t WorkerBufferSize(u32 channel_count) { 161class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
162public:
163 explicit IHardwareOpusDecoderManager(OpusDecoderState decoder_state)
164 : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
165 // clang-format off
166 static const FunctionInfo functions[] = {
167 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
168 {1, nullptr, "SetContext"},
169 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
170 {3, nullptr, "SetContextForMultiStream"},
171 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
172 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
173 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
174 {7, nullptr, "DecodeInterleavedForMultiStream"},
175 };
176 // clang-format on
177
178 RegisterHandlers(functions);
179 }
180
181private:
182 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
183 LOG_DEBUG(Audio, "called");
184
185 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Disabled,
186 OpusDecoderState::ExtraBehavior::None);
187 }
188
189 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
190 LOG_DEBUG(Audio, "called");
191
192 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled,
193 OpusDecoderState::ExtraBehavior::None);
194 }
195
196 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
197 LOG_DEBUG(Audio, "called");
198
199 IPC::RequestParser rp{ctx};
200 const auto extra_behavior = rp.Pop<bool>() ? OpusDecoderState::ExtraBehavior::ResetContext
201 : OpusDecoderState::ExtraBehavior::None;
202
203 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled, extra_behavior);
204 }
205
206 OpusDecoderState decoder_state;
207};
208
209std::size_t WorkerBufferSize(u32 channel_count) {
152 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 210 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
153 return opus_decoder_get_size(static_cast<int>(channel_count)); 211 constexpr int num_streams = 1;
212 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
213 return opus_multistream_decoder_get_size(num_streams, num_stereo_streams);
154} 214}
155 215
216// Creates the mapping table that maps the input channels to the particular
217// output channels. In the stereo case, we map the left and right input channels
218// to the left and right output channels respectively.
219//
220// However, in the monophonic case, we only map the one available channel
221// to the sole output channel. We specify 255 for the would-be right channel
222// as this is a special value defined by Opus to indicate to the decoder to
223// ignore that channel.
224std::array<u8, 2> CreateMappingTable(u32 channel_count) {
225 if (channel_count == 2) {
226 return {{0, 1}};
227 }
228
229 return {{0, 255}};
230}
231} // Anonymous namespace
232
156void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { 233void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
157 IPC::RequestParser rp{ctx}; 234 IPC::RequestParser rp{ctx};
158 const auto sample_rate = rp.Pop<u32>(); 235 const auto sample_rate = rp.Pop<u32>();
159 const auto channel_count = rp.Pop<u32>(); 236 const auto channel_count = rp.Pop<u32>();
237
160 LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count); 238 LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count);
161 239
162 ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 || 240 ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
@@ -174,9 +252,10 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
174 252
175void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { 253void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
176 IPC::RequestParser rp{ctx}; 254 IPC::RequestParser rp{ctx};
177 auto sample_rate = rp.Pop<u32>(); 255 const auto sample_rate = rp.Pop<u32>();
178 auto channel_count = rp.Pop<u32>(); 256 const auto channel_count = rp.Pop<u32>();
179 auto buffer_sz = rp.Pop<u32>(); 257 const auto buffer_sz = rp.Pop<u32>();
258
180 LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate, 259 LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate,
181 channel_count, buffer_sz); 260 channel_count, buffer_sz);
182 261
@@ -185,12 +264,18 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
185 "Invalid sample rate"); 264 "Invalid sample rate");
186 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 265 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
187 266
188 std::size_t worker_sz = WorkerBufferSize(channel_count); 267 const std::size_t worker_sz = WorkerBufferSize(channel_count);
189 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); 268 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
190 std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ 269
191 static_cast<OpusDecoder*>(operator new(worker_sz))}; 270 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
192 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { 271 const auto mapping_table = CreateMappingTable(channel_count);
193 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); 272
273 int error = 0;
274 OpusDecoderPtr decoder{
275 opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
276 num_stereo_streams, mapping_table.data(), &error)};
277 if (error != OPUS_OK || decoder == nullptr) {
278 LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
194 IPC::ResponseBuilder rb{ctx, 2}; 279 IPC::ResponseBuilder rb{ctx, 2};
195 // TODO(ogniK): Use correct error code 280 // TODO(ogniK): Use correct error code
196 rb.Push(ResultCode(-1)); 281 rb.Push(ResultCode(-1));
@@ -199,8 +284,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
199 284
200 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 285 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
201 rb.Push(RESULT_SUCCESS); 286 rb.Push(RESULT_SUCCESS);
202 rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, 287 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
203 channel_count); 288 OpusDecoderState{std::move(decoder), sample_rate, channel_count});
204} 289}
205 290
206HwOpus::HwOpus() : ServiceFramework("hwopus") { 291HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 5704ca0ab..59ef603e1 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -19,16 +19,16 @@ public:
19 explicit Bt() : ServiceFramework{"bt"} { 19 explicit Bt() : ServiceFramework{"bt"} {
20 // clang-format off 20 // clang-format off
21 static const FunctionInfo functions[] = { 21 static const FunctionInfo functions[] = {
22 {0, nullptr, "Unknown0"}, 22 {0, nullptr, "LeClientReadCharacteristic"},
23 {1, nullptr, "Unknown1"}, 23 {1, nullptr, "LeClientReadDescriptor"},
24 {2, nullptr, "Unknown2"}, 24 {2, nullptr, "LeClientWriteCharacteristic"},
25 {3, nullptr, "Unknown3"}, 25 {3, nullptr, "LeClientWriteDescriptor"},
26 {4, nullptr, "Unknown4"}, 26 {4, nullptr, "LeClientRegisterNotification"},
27 {5, nullptr, "Unknown5"}, 27 {5, nullptr, "LeClientDeregisterNotification"},
28 {6, nullptr, "Unknown6"}, 28 {6, nullptr, "SetLeResponse"},
29 {7, nullptr, "Unknown7"}, 29 {7, nullptr, "LeSendIndication"},
30 {8, nullptr, "Unknown8"}, 30 {8, nullptr, "GetLeEventInfo"},
31 {9, &Bt::RegisterEvent, "RegisterEvent"}, 31 {9, &Bt::RegisterBleEvent, "RegisterBleEvent"},
32 }; 32 };
33 // clang-format on 33 // clang-format on
34 RegisterHandlers(functions); 34 RegisterHandlers(functions);
@@ -39,7 +39,7 @@ public:
39 } 39 }
40 40
41private: 41private:
42 void RegisterEvent(Kernel::HLERequestContext& ctx) { 42 void RegisterBleEvent(Kernel::HLERequestContext& ctx) {
43 LOG_WARNING(Service_BTM, "(STUBBED) called"); 43 LOG_WARNING(Service_BTM, "(STUBBED) called");
44 44
45 IPC::ResponseBuilder rb{ctx, 2, 1}; 45 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -55,11 +55,11 @@ public:
55 explicit BtDrv() : ServiceFramework{"btdrv"} { 55 explicit BtDrv() : ServiceFramework{"btdrv"} {
56 // clang-format off 56 // clang-format off
57 static const FunctionInfo functions[] = { 57 static const FunctionInfo functions[] = {
58 {0, nullptr, "Unknown"}, 58 {0, nullptr, "InitializeBluetoothDriver"},
59 {1, nullptr, "Init"}, 59 {1, nullptr, "InitializeBluetooth"},
60 {2, nullptr, "Enable"}, 60 {2, nullptr, "EnableBluetooth"},
61 {3, nullptr, "Disable"}, 61 {3, nullptr, "DisableBluetooth"},
62 {4, nullptr, "CleanupAndShutdown"}, 62 {4, nullptr, "CleanupBluetooth"},
63 {5, nullptr, "GetAdapterProperties"}, 63 {5, nullptr, "GetAdapterProperties"},
64 {6, nullptr, "GetAdapterProperty"}, 64 {6, nullptr, "GetAdapterProperty"},
65 {7, nullptr, "SetAdapterProperty"}, 65 {7, nullptr, "SetAdapterProperty"},
@@ -70,36 +70,91 @@ public:
70 {12, nullptr, "CancelBond"}, 70 {12, nullptr, "CancelBond"},
71 {13, nullptr, "PinReply"}, 71 {13, nullptr, "PinReply"},
72 {14, nullptr, "SspReply"}, 72 {14, nullptr, "SspReply"},
73 {15, nullptr, "Unknown2"}, 73 {15, nullptr, "GetEventInfo"},
74 {16, nullptr, "InitInterfaces"}, 74 {16, nullptr, "InitializeHid"},
75 {17, nullptr, "HidHostInterface_Connect"}, 75 {17, nullptr, "HidConnect"},
76 {18, nullptr, "HidHostInterface_Disconnect"}, 76 {18, nullptr, "HidDisconnect"},
77 {19, nullptr, "HidHostInterface_SendData"}, 77 {19, nullptr, "HidSendData"},
78 {20, nullptr, "HidHostInterface_SendData2"}, 78 {20, nullptr, "HidSendData2"},
79 {21, nullptr, "HidHostInterface_SetReport"}, 79 {21, nullptr, "HidSetReport"},
80 {22, nullptr, "HidHostInterface_GetReport"}, 80 {22, nullptr, "HidGetReport"},
81 {23, nullptr, "HidHostInterface_WakeController"}, 81 {23, nullptr, "HidWakeController"},
82 {24, nullptr, "HidHostInterface_AddPairedDevice"}, 82 {24, nullptr, "HidAddPairedDevice"},
83 {25, nullptr, "HidHostInterface_GetPairedDevice"}, 83 {25, nullptr, "HidGetPairedDevice"},
84 {26, nullptr, "HidHostInterface_CleanupAndShutdown"}, 84 {26, nullptr, "CleanupHid"},
85 {27, nullptr, "Unknown3"}, 85 {27, nullptr, "HidGetEventInfo"},
86 {28, nullptr, "ExtInterface_SetTSI"}, 86 {28, nullptr, "ExtSetTsi"},
87 {29, nullptr, "ExtInterface_SetBurstMode"}, 87 {29, nullptr, "ExtSetBurstMode"},
88 {30, nullptr, "ExtInterface_SetZeroRetran"}, 88 {30, nullptr, "ExtSetZeroRetran"},
89 {31, nullptr, "ExtInterface_SetMcMode"}, 89 {31, nullptr, "ExtSetMcMode"},
90 {32, nullptr, "ExtInterface_StartLlrMode"}, 90 {32, nullptr, "ExtStartLlrMode"},
91 {33, nullptr, "ExtInterface_ExitLlrMode"}, 91 {33, nullptr, "ExtExitLlrMode"},
92 {34, nullptr, "ExtInterface_SetRadio"}, 92 {34, nullptr, "ExtSetRadio"},
93 {35, nullptr, "ExtInterface_SetVisibility"}, 93 {35, nullptr, "ExtSetVisibility"},
94 {36, nullptr, "Unknown4"}, 94 {36, nullptr, "ExtSetTbfcScan"},
95 {37, nullptr, "Unknown5"}, 95 {37, nullptr, "RegisterHidReportEvent"},
96 {38, nullptr, "HidHostInterface_GetLatestPlr"}, 96 {38, nullptr, "HidGetReportEventInfo"},
97 {39, nullptr, "ExtInterface_GetPendingConnections"}, 97 {39, nullptr, "GetLatestPlr"},
98 {40, nullptr, "HidHostInterface_GetChannelMap"}, 98 {40, nullptr, "ExtGetPendingConnections"},
99 {41, nullptr, "SetIsBluetoothBoostEnabled"}, 99 {41, nullptr, "GetChannelMap"},
100 {42, nullptr, "GetIsBluetoothBoostEnabled"}, 100 {42, nullptr, "EnableBluetoothBoostSetting"},
101 {43, nullptr, "SetIsBluetoothAfhEnabled"}, 101 {43, nullptr, "IsBluetoothBoostSettingEnabled"},
102 {44, nullptr, "GetIsBluetoothAfhEnabled"}, 102 {44, nullptr, "EnableBluetoothAfhSetting"},
103 {45, nullptr, "IsBluetoothAfhSettingEnabled"},
104 {46, nullptr, "InitializeBluetoothLe"},
105 {47, nullptr, "EnableBluetoothLe"},
106 {48, nullptr, "DisableBluetoothLe"},
107 {49, nullptr, "CleanupBluetoothLe"},
108 {50, nullptr, "SetLeVisibility"},
109 {51, nullptr, "SetLeConnectionParameter"},
110 {52, nullptr, "SetLeDefaultConnectionParameter"},
111 {53, nullptr, "SetLeAdvertiseData"},
112 {54, nullptr, "SetLeAdvertiseParameter"},
113 {55, nullptr, "StartLeScan"},
114 {56, nullptr, "StopLeScan"},
115 {57, nullptr, "AddLeScanFilterCondition"},
116 {58, nullptr, "DeleteLeScanFilterCondition"},
117 {59, nullptr, "DeleteLeScanFilter"},
118 {60, nullptr, "ClearLeScanFilters"},
119 {61, nullptr, "EnableLeScanFilter"},
120 {62, nullptr, "RegisterLeClient"},
121 {63, nullptr, "UnregisterLeClient"},
122 {64, nullptr, "UnregisterLeClientAll"},
123 {65, nullptr, "LeClientConnect"},
124 {66, nullptr, "LeClientCancelConnection"},
125 {67, nullptr, "LeClientDisconnect"},
126 {68, nullptr, "LeClientGetAttributes"},
127 {69, nullptr, "LeClientDiscoverService"},
128 {70, nullptr, "LeClientConfigureMtu"},
129 {71, nullptr, "RegisterLeServer"},
130 {72, nullptr, "UnregisterLeServer"},
131 {73, nullptr, "LeServerConnect"},
132 {74, nullptr, "LeServerDisconnect"},
133 {75, nullptr, "CreateLeService"},
134 {76, nullptr, "StartLeService"},
135 {77, nullptr, "AddLeCharacteristic"},
136 {78, nullptr, "AddLeDescriptor"},
137 {79, nullptr, "GetLeCoreEventInfo"},
138 {80, nullptr, "LeGetFirstCharacteristic"},
139 {81, nullptr, "LeGetNextCharacteristic"},
140 {82, nullptr, "LeGetFirstDescriptor"},
141 {83, nullptr, "LeGetNextDescriptor"},
142 {84, nullptr, "RegisterLeCoreDataPath"},
143 {85, nullptr, "UnregisterLeCoreDataPath"},
144 {86, nullptr, "RegisterLeHidDataPath"},
145 {87, nullptr, "UnregisterLeHidDataPath"},
146 {88, nullptr, "RegisterLeDataPath"},
147 {89, nullptr, "UnregisterLeDataPath"},
148 {90, nullptr, "LeClientReadCharacteristic"},
149 {91, nullptr, "LeClientReadDescriptor"},
150 {92, nullptr, "LeClientWriteCharacteristic"},
151 {93, nullptr, "LeClientWriteDescriptor"},
152 {94, nullptr, "LeClientRegisterNotification"},
153 {95, nullptr, "LeClientDeregisterNotification"},
154 {96, nullptr, "GetLeHidEventInfo"},
155 {97, nullptr, "RegisterBleHidEvent"},
156 {98, nullptr, "SetLeScanParameter"},
157 {256, nullptr, "GetIsManufacturingMode"}
103 }; 158 };
104 // clang-format on 159 // clang-format on
105 160
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp
index ef7398a23..4f15c3f19 100644
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -20,38 +20,38 @@ public:
20 explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} { 20 explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} {
21 // clang-format off 21 // clang-format off
22 static const FunctionInfo functions[] = { 22 static const FunctionInfo functions[] = {
23 {0, &IBtmUserCore::GetScanEvent, "GetScanEvent"}, 23 {0, &IBtmUserCore::AcquireBleScanEvent, "AcquireBleScanEvent"},
24 {1, nullptr, "Unknown1"}, 24 {1, nullptr, "GetBleScanFilterParameter"},
25 {2, nullptr, "Unknown2"}, 25 {2, nullptr, "GetBleScanFilterParameter2"},
26 {3, nullptr, "Unknown3"}, 26 {3, nullptr, "StartBleScanForGeneral"},
27 {4, nullptr, "Unknown4"}, 27 {4, nullptr, "StopBleScanForGeneral"},
28 {5, nullptr, "Unknown5"}, 28 {5, nullptr, "GetBleScanResultsForGeneral"},
29 {6, nullptr, "Unknown6"}, 29 {6, nullptr, "StartBleScanForPaired"},
30 {7, nullptr, "Unknown7"}, 30 {7, nullptr, "StopBleScanForPaired"},
31 {8, nullptr, "Unknown8"}, 31 {8, nullptr, "StartBleScanForSmartDevice"},
32 {9, nullptr, "Unknown9"}, 32 {9, nullptr, "StopBleScanForSmartDevice"},
33 {10, nullptr, "Unknown10"}, 33 {10, nullptr, "GetBleScanResultsForSmartDevice"},
34 {17, &IBtmUserCore::GetConnectionEvent, "GetConnectionEvent"}, 34 {17, &IBtmUserCore::AcquireBleConnectionEvent, "AcquireBleConnectionEvent"},
35 {18, nullptr, "Unknown18"}, 35 {18, nullptr, "BleConnect"},
36 {19, nullptr, "Unknown19"}, 36 {19, nullptr, "BleDisconnect"},
37 {20, nullptr, "Unknown20"}, 37 {20, nullptr, "BleGetConnectionState"},
38 {21, nullptr, "Unknown21"}, 38 {21, nullptr, "AcquireBlePairingEvent"},
39 {22, nullptr, "Unknown22"}, 39 {22, nullptr, "BlePairDevice"},
40 {23, nullptr, "Unknown23"}, 40 {23, nullptr, "BleUnPairDevice"},
41 {24, nullptr, "Unknown24"}, 41 {24, nullptr, "BleUnPairDevice2"},
42 {25, nullptr, "Unknown25"}, 42 {25, nullptr, "BleGetPairedDevices"},
43 {26, &IBtmUserCore::GetDiscoveryEvent, "AcquireBleServiceDiscoveryEventImpl"}, 43 {26, &IBtmUserCore::AcquireBleServiceDiscoveryEvent, "AcquireBleServiceDiscoveryEvent"},
44 {27, nullptr, "Unknown27"}, 44 {27, nullptr, "GetGattServices"},
45 {28, nullptr, "Unknown28"}, 45 {28, nullptr, "GetGattService"},
46 {29, nullptr, "Unknown29"}, 46 {29, nullptr, "GetGattIncludedServices"},
47 {30, nullptr, "Unknown30"}, 47 {30, nullptr, "GetBelongingGattService"},
48 {31, nullptr, "Unknown31"}, 48 {31, nullptr, "GetGattCharacteristics"},
49 {32, nullptr, "Unknown32"}, 49 {32, nullptr, "GetGattDescriptors"},
50 {33, &IBtmUserCore::GetConfigEvent, "GetConfigEvent"}, 50 {33, &IBtmUserCore::AcquireBleMtuConfigEvent, "AcquireBleMtuConfigEvent"},
51 {34, nullptr, "Unknown34"}, 51 {34, nullptr, "ConfigureBleMtu"},
52 {35, nullptr, "Unknown35"}, 52 {35, nullptr, "GetBleMtu"},
53 {36, nullptr, "Unknown36"}, 53 {36, nullptr, "RegisterBleGattDataPath"},
54 {37, nullptr, "Unknown37"}, 54 {37, nullptr, "UnregisterBleGattDataPath"},
55 }; 55 };
56 // clang-format on 56 // clang-format on
57 RegisterHandlers(functions); 57 RegisterHandlers(functions);
@@ -68,7 +68,7 @@ public:
68 } 68 }
69 69
70private: 70private:
71 void GetScanEvent(Kernel::HLERequestContext& ctx) { 71 void AcquireBleScanEvent(Kernel::HLERequestContext& ctx) {
72 LOG_WARNING(Service_BTM, "(STUBBED) called"); 72 LOG_WARNING(Service_BTM, "(STUBBED) called");
73 73
74 IPC::ResponseBuilder rb{ctx, 2, 1}; 74 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -76,7 +76,7 @@ private:
76 rb.PushCopyObjects(scan_event.readable); 76 rb.PushCopyObjects(scan_event.readable);
77 } 77 }
78 78
79 void GetConnectionEvent(Kernel::HLERequestContext& ctx) { 79 void AcquireBleConnectionEvent(Kernel::HLERequestContext& ctx) {
80 LOG_WARNING(Service_BTM, "(STUBBED) called"); 80 LOG_WARNING(Service_BTM, "(STUBBED) called");
81 81
82 IPC::ResponseBuilder rb{ctx, 2, 1}; 82 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -84,7 +84,7 @@ private:
84 rb.PushCopyObjects(connection_event.readable); 84 rb.PushCopyObjects(connection_event.readable);
85 } 85 }
86 86
87 void GetDiscoveryEvent(Kernel::HLERequestContext& ctx) { 87 void AcquireBleServiceDiscoveryEvent(Kernel::HLERequestContext& ctx) {
88 LOG_WARNING(Service_BTM, "(STUBBED) called"); 88 LOG_WARNING(Service_BTM, "(STUBBED) called");
89 89
90 IPC::ResponseBuilder rb{ctx, 2, 1}; 90 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -92,7 +92,7 @@ private:
92 rb.PushCopyObjects(service_discovery.readable); 92 rb.PushCopyObjects(service_discovery.readable);
93 } 93 }
94 94
95 void GetConfigEvent(Kernel::HLERequestContext& ctx) { 95 void AcquireBleMtuConfigEvent(Kernel::HLERequestContext& ctx) {
96 LOG_WARNING(Service_BTM, "(STUBBED) called"); 96 LOG_WARNING(Service_BTM, "(STUBBED) called");
97 97
98 IPC::ResponseBuilder rb{ctx, 2, 1}; 98 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -111,14 +111,14 @@ public:
111 explicit BTM_USR() : ServiceFramework{"btm:u"} { 111 explicit BTM_USR() : ServiceFramework{"btm:u"} {
112 // clang-format off 112 // clang-format off
113 static const FunctionInfo functions[] = { 113 static const FunctionInfo functions[] = {
114 {0, &BTM_USR::GetCoreImpl, "GetCoreImpl"}, 114 {0, &BTM_USR::GetCore, "GetCore"},
115 }; 115 };
116 // clang-format on 116 // clang-format on
117 RegisterHandlers(functions); 117 RegisterHandlers(functions);
118 } 118 }
119 119
120private: 120private:
121 void GetCoreImpl(Kernel::HLERequestContext& ctx) { 121 void GetCore(Kernel::HLERequestContext& ctx) {
122 LOG_DEBUG(Service_BTM, "called"); 122 LOG_DEBUG(Service_BTM, "called");
123 123
124 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 124 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -134,26 +134,64 @@ public:
134 static const FunctionInfo functions[] = { 134 static const FunctionInfo functions[] = {
135 {0, nullptr, "Unknown1"}, 135 {0, nullptr, "Unknown1"},
136 {1, nullptr, "Unknown2"}, 136 {1, nullptr, "Unknown2"},
137 {2, nullptr, "RegisterSystemEventForConnectedDeviceConditionImpl"}, 137 {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"},
138 {3, nullptr, "Unknown3"}, 138 {3, nullptr, "Unknown3"},
139 {4, nullptr, "Unknown4"}, 139 {4, nullptr, "Unknown4"},
140 {5, nullptr, "Unknown5"}, 140 {5, nullptr, "Unknown5"},
141 {6, nullptr, "Unknown6"}, 141 {6, nullptr, "Unknown6"},
142 {7, nullptr, "Unknown7"}, 142 {7, nullptr, "Unknown7"},
143 {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfoImpl"}, 143 {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"},
144 {9, nullptr, "Unknown8"}, 144 {9, nullptr, "Unknown8"},
145 {10, nullptr, "Unknown9"}, 145 {10, nullptr, "Unknown9"},
146 {11, nullptr, "Unknown10"}, 146 {11, nullptr, "Unknown10"},
147 {12, nullptr, "Unknown11"}, 147 {12, nullptr, "Unknown11"},
148 {13, nullptr, "Unknown12"}, 148 {13, nullptr, "Unknown12"},
149 {14, nullptr, "EnableRadioImpl"}, 149 {14, nullptr, "EnableRadio"},
150 {15, nullptr, "DisableRadioImpl"}, 150 {15, nullptr, "DisableRadio"},
151 {16, nullptr, "Unknown13"}, 151 {16, nullptr, "Unknown13"},
152 {17, nullptr, "Unknown14"}, 152 {17, nullptr, "Unknown14"},
153 {18, nullptr, "Unknown15"}, 153 {18, nullptr, "Unknown15"},
154 {19, nullptr, "Unknown16"}, 154 {19, nullptr, "Unknown16"},
155 {20, nullptr, "Unknown17"}, 155 {20, nullptr, "Unknown17"},
156 {21, nullptr, "Unknown18"}, 156 {21, nullptr, "Unknown18"},
157 {22, nullptr, "Unknown19"},
158 {23, nullptr, "Unknown20"},
159 {24, nullptr, "Unknown21"},
160 {25, nullptr, "Unknown22"},
161 {26, nullptr, "Unknown23"},
162 {27, nullptr, "Unknown24"},
163 {28, nullptr, "Unknown25"},
164 {29, nullptr, "Unknown26"},
165 {30, nullptr, "Unknown27"},
166 {31, nullptr, "Unknown28"},
167 {32, nullptr, "Unknown29"},
168 {33, nullptr, "Unknown30"},
169 {34, nullptr, "Unknown31"},
170 {35, nullptr, "Unknown32"},
171 {36, nullptr, "Unknown33"},
172 {37, nullptr, "Unknown34"},
173 {38, nullptr, "Unknown35"},
174 {39, nullptr, "Unknown36"},
175 {40, nullptr, "Unknown37"},
176 {41, nullptr, "Unknown38"},
177 {42, nullptr, "Unknown39"},
178 {43, nullptr, "Unknown40"},
179 {44, nullptr, "Unknown41"},
180 {45, nullptr, "Unknown42"},
181 {46, nullptr, "Unknown43"},
182 {47, nullptr, "Unknown44"},
183 {48, nullptr, "Unknown45"},
184 {49, nullptr, "Unknown46"},
185 {50, nullptr, "Unknown47"},
186 {51, nullptr, "Unknown48"},
187 {52, nullptr, "Unknown49"},
188 {53, nullptr, "Unknown50"},
189 {54, nullptr, "Unknown51"},
190 {55, nullptr, "Unknown52"},
191 {56, nullptr, "Unknown53"},
192 {57, nullptr, "Unknown54"},
193 {58, nullptr, "Unknown55"},
194 {59, nullptr, "Unknown56"},
157 }; 195 };
158 // clang-format on 196 // clang-format on
159 197
@@ -166,7 +204,7 @@ public:
166 explicit BTM_DBG() : ServiceFramework{"btm:dbg"} { 204 explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
167 // clang-format off 205 // clang-format off
168 static const FunctionInfo functions[] = { 206 static const FunctionInfo functions[] = {
169 {0, nullptr, "RegisterSystemEventForDiscoveryImpl"}, 207 {0, nullptr, "RegisterSystemEventForDiscovery"},
170 {1, nullptr, "Unknown1"}, 208 {1, nullptr, "Unknown1"},
171 {2, nullptr, "Unknown2"}, 209 {2, nullptr, "Unknown2"},
172 {3, nullptr, "Unknown3"}, 210 {3, nullptr, "Unknown3"},
@@ -175,6 +213,10 @@ public:
175 {6, nullptr, "Unknown6"}, 213 {6, nullptr, "Unknown6"},
176 {7, nullptr, "Unknown7"}, 214 {7, nullptr, "Unknown7"},
177 {8, nullptr, "Unknown8"}, 215 {8, nullptr, "Unknown8"},
216 {9, nullptr, "Unknown9"},
217 {10, nullptr, "Unknown10"},
218 {11, nullptr, "Unknown11"},
219 {12, nullptr, "Unknown11"},
178 }; 220 };
179 // clang-format on 221 // clang-format on
180 222
@@ -187,16 +229,16 @@ public:
187 explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} { 229 explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} {
188 // clang-format off 230 // clang-format off
189 static const FunctionInfo functions[] = { 231 static const FunctionInfo functions[] = {
190 {0, nullptr, "StartGamepadPairingImpl"}, 232 {0, nullptr, "StartGamepadPairing"},
191 {1, nullptr, "CancelGamepadPairingImpl"}, 233 {1, nullptr, "CancelGamepadPairing"},
192 {2, nullptr, "ClearGamepadPairingDatabaseImpl"}, 234 {2, nullptr, "ClearGamepadPairingDatabase"},
193 {3, nullptr, "GetPairedGamepadCountImpl"}, 235 {3, nullptr, "GetPairedGamepadCount"},
194 {4, nullptr, "EnableRadioImpl"}, 236 {4, nullptr, "EnableRadio"},
195 {5, nullptr, "DisableRadioImpl"}, 237 {5, nullptr, "DisableRadio"},
196 {6, nullptr, "GetRadioOnOffImpl"}, 238 {6, nullptr, "GetRadioOnOff"},
197 {7, nullptr, "AcquireRadioEventImpl"}, 239 {7, nullptr, "AcquireRadioEvent"},
198 {8, nullptr, "AcquireGamepadPairingEventImpl"}, 240 {8, nullptr, "AcquireGamepadPairingEvent"},
199 {9, nullptr, "IsGamepadPairingStartedImpl"}, 241 {9, nullptr, "IsGamepadPairingStarted"},
200 }; 242 };
201 // clang-format on 243 // clang-format on
202 244
@@ -209,7 +251,7 @@ public:
209 explicit BTM_SYS() : ServiceFramework{"btm:sys"} { 251 explicit BTM_SYS() : ServiceFramework{"btm:sys"} {
210 // clang-format off 252 // clang-format off
211 static const FunctionInfo functions[] = { 253 static const FunctionInfo functions[] = {
212 {0, &BTM_SYS::GetCoreImpl, "GetCoreImpl"}, 254 {0, &BTM_SYS::GetCore, "GetCore"},
213 }; 255 };
214 // clang-format on 256 // clang-format on
215 257
@@ -217,7 +259,7 @@ public:
217 } 259 }
218 260
219private: 261private:
220 void GetCoreImpl(Kernel::HLERequestContext& ctx) { 262 void GetCore(Kernel::HLERequestContext& ctx) {
221 LOG_DEBUG(Service_BTM, "called"); 263 LOG_DEBUG(Service_BTM, "called");
222 264
223 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 265 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 770590d0b..2c229bcad 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -25,21 +25,34 @@ Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
25Module::Interface::~Interface() = default; 25Module::Interface::~Interface() = default;
26 26
27struct FatalInfo { 27struct FatalInfo {
28 std::array<u64_le, 31> registers{}; // TODO(ogniK): See if this actually is registers or 28 enum class Architecture : s32 {
29 // not(find a game which has non zero valeus) 29 AArch64,
30 u64_le unk0{}; 30 AArch32,
31 u64_le unk1{}; 31 };
32 u64_le unk2{}; 32
33 u64_le unk3{}; 33 const char* ArchAsString() const {
34 u64_le unk4{}; 34 return arch == Architecture::AArch64 ? "AArch64" : "AArch32";
35 u64_le unk5{}; 35 }
36 u64_le unk6{}; 36
37 std::array<u64_le, 31> registers{};
38 u64_le sp{};
39 u64_le pc{};
40 u64_le pstate{};
41 u64_le afsr0{};
42 u64_le afsr1{};
43 u64_le esr{};
44 u64_le far{};
37 45
38 std::array<u64_le, 32> backtrace{}; 46 std::array<u64_le, 32> backtrace{};
39 u64_le unk7{}; 47 u64_le program_entry_point{};
40 u64_le unk8{}; 48
49 // Bit flags that indicate which registers have been set with values
50 // for this context. The service itself uses these to determine which
51 // registers to specifically print out.
52 u64_le set_flags{};
53
41 u32_le backtrace_size{}; 54 u32_le backtrace_size{};
42 u32_le unk9{}; 55 Architecture arch{};
43 u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding? 56 u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding?
44}; 57};
45static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size"); 58static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size");
@@ -52,36 +65,36 @@ enum class FatalType : u32 {
52 65
53static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) { 66static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
54 const auto title_id = Core::CurrentProcess()->GetTitleID(); 67 const auto title_id = Core::CurrentProcess()->GetTitleID();
55 std::string crash_report = 68 std::string crash_report = fmt::format(
56 fmt::format("Yuzu {}-{} crash report\n" 69 "Yuzu {}-{} crash report\n"
57 "Title ID: {:016x}\n" 70 "Title ID: {:016x}\n"
58 "Result: 0x{:X} ({:04}-{:04d})\n" 71 "Result: 0x{:X} ({:04}-{:04d})\n"
59 "\n", 72 "Set flags: 0x{:16X}\n"
60 Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw, 73 "Program entry point: 0x{:16X}\n"
61 2000 + static_cast<u32>(error_code.module.Value()), 74 "\n",
62 static_cast<u32>(error_code.description.Value()), info.unk8, info.unk7); 75 Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
76 2000 + static_cast<u32>(error_code.module.Value()),
77 static_cast<u32>(error_code.description.Value()), info.set_flags, info.program_entry_point);
63 if (info.backtrace_size != 0x0) { 78 if (info.backtrace_size != 0x0) {
64 crash_report += "Registers:\n"; 79 crash_report += "Registers:\n";
65 // TODO(ogniK): This is just a guess, find a game which actually has non zero values
66 for (size_t i = 0; i < info.registers.size(); i++) { 80 for (size_t i = 0; i < info.registers.size(); i++) {
67 crash_report += 81 crash_report +=
68 fmt::format(" X[{:02d}]: {:016x}\n", i, info.registers[i]); 82 fmt::format(" X[{:02d}]: {:016x}\n", i, info.registers[i]);
69 } 83 }
70 crash_report += fmt::format(" Unknown 0: {:016x}\n", info.unk0); 84 crash_report += fmt::format(" SP: {:016x}\n", info.sp);
71 crash_report += fmt::format(" Unknown 1: {:016x}\n", info.unk1); 85 crash_report += fmt::format(" PC: {:016x}\n", info.pc);
72 crash_report += fmt::format(" Unknown 2: {:016x}\n", info.unk2); 86 crash_report += fmt::format(" PSTATE: {:016x}\n", info.pstate);
73 crash_report += fmt::format(" Unknown 3: {:016x}\n", info.unk3); 87 crash_report += fmt::format(" AFSR0: {:016x}\n", info.afsr0);
74 crash_report += fmt::format(" Unknown 4: {:016x}\n", info.unk4); 88 crash_report += fmt::format(" AFSR1: {:016x}\n", info.afsr1);
75 crash_report += fmt::format(" Unknown 5: {:016x}\n", info.unk5); 89 crash_report += fmt::format(" ESR: {:016x}\n", info.esr);
76 crash_report += fmt::format(" Unknown 6: {:016x}\n", info.unk6); 90 crash_report += fmt::format(" FAR: {:016x}\n", info.far);
77 crash_report += "\nBacktrace:\n"; 91 crash_report += "\nBacktrace:\n";
78 for (size_t i = 0; i < info.backtrace_size; i++) { 92 for (size_t i = 0; i < info.backtrace_size; i++) {
79 crash_report += 93 crash_report +=
80 fmt::format(" Backtrace[{:02d}]: {:016x}\n", i, info.backtrace[i]); 94 fmt::format(" Backtrace[{:02d}]: {:016x}\n", i, info.backtrace[i]);
81 } 95 }
82 crash_report += fmt::format("\nUnknown 7: 0x{:016x}\n", info.unk7); 96
83 crash_report += fmt::format("Unknown 8: 0x{:016x}\n", info.unk8); 97 crash_report += fmt::format("Architecture: {}\n", info.ArchAsString());
84 crash_report += fmt::format("Unknown 9: 0x{:016x}\n", info.unk9);
85 crash_report += fmt::format("Unknown 10: 0x{:016x}\n", info.unk10); 98 crash_report += fmt::format("Unknown 10: 0x{:016x}\n", info.unk10);
86 } 99 }
87 100
@@ -125,13 +138,13 @@ static void ThrowFatalError(ResultCode error_code, FatalType fatal_type, const F
125 case FatalType::ErrorReport: 138 case FatalType::ErrorReport:
126 GenerateErrorReport(error_code, info); 139 GenerateErrorReport(error_code, info);
127 break; 140 break;
128 }; 141 }
129} 142}
130 143
131void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) { 144void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
132 LOG_ERROR(Service_Fatal, "called"); 145 LOG_ERROR(Service_Fatal, "called");
133 IPC::RequestParser rp{ctx}; 146 IPC::RequestParser rp{ctx};
134 auto error_code = rp.Pop<ResultCode>(); 147 const auto error_code = rp.Pop<ResultCode>();
135 148
136 ThrowFatalError(error_code, FatalType::ErrorScreen, {}); 149 ThrowFatalError(error_code, FatalType::ErrorScreen, {});
137 IPC::ResponseBuilder rb{ctx, 2}; 150 IPC::ResponseBuilder rb{ctx, 2};
@@ -141,8 +154,8 @@ void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
141void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) { 154void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
142 LOG_ERROR(Service_Fatal, "called"); 155 LOG_ERROR(Service_Fatal, "called");
143 IPC::RequestParser rp(ctx); 156 IPC::RequestParser rp(ctx);
144 auto error_code = rp.Pop<ResultCode>(); 157 const auto error_code = rp.Pop<ResultCode>();
145 auto fatal_type = rp.PopEnum<FatalType>(); 158 const auto fatal_type = rp.PopEnum<FatalType>();
146 159
147 ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy 160 ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy
148 IPC::ResponseBuilder rb{ctx, 2}; 161 IPC::ResponseBuilder rb{ctx, 2};
@@ -152,9 +165,9 @@ void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
152void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) { 165void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
153 LOG_ERROR(Service_Fatal, "called"); 166 LOG_ERROR(Service_Fatal, "called");
154 IPC::RequestParser rp(ctx); 167 IPC::RequestParser rp(ctx);
155 auto error_code = rp.Pop<ResultCode>(); 168 const auto error_code = rp.Pop<ResultCode>();
156 auto fatal_type = rp.PopEnum<FatalType>(); 169 const auto fatal_type = rp.PopEnum<FatalType>();
157 auto fatal_info = ctx.ReadBuffer(); 170 const auto fatal_info = ctx.ReadBuffer();
158 FatalInfo info{}; 171 FatalInfo info{};
159 172
160 ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!"); 173 ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!");
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index c6da2df43..1ebfeb4bf 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -197,13 +197,16 @@ ResultCode VfsDirectoryServiceWrapper::RenameDirectory(const std::string& src_pa
197 197
198ResultVal<FileSys::VirtualFile> VfsDirectoryServiceWrapper::OpenFile(const std::string& path_, 198ResultVal<FileSys::VirtualFile> VfsDirectoryServiceWrapper::OpenFile(const std::string& path_,
199 FileSys::Mode mode) const { 199 FileSys::Mode mode) const {
200 std::string path(FileUtil::SanitizePath(path_)); 200 const std::string path(FileUtil::SanitizePath(path_));
201 auto npath = path; 201 std::string_view npath = path;
202 while (npath.size() > 0 && (npath[0] == '/' || npath[0] == '\\')) 202 while (!npath.empty() && (npath[0] == '/' || npath[0] == '\\')) {
203 npath = npath.substr(1); 203 npath.remove_prefix(1);
204 }
205
204 auto file = backing->GetFileRelative(npath); 206 auto file = backing->GetFileRelative(npath);
205 if (file == nullptr) 207 if (file == nullptr) {
206 return FileSys::ERROR_PATH_NOT_FOUND; 208 return FileSys::ERROR_PATH_NOT_FOUND;
209 }
207 210
208 if (mode == FileSys::Mode::Append) { 211 if (mode == FileSys::Mode::Append) {
209 return MakeResult<FileSys::VirtualFile>( 212 return MakeResult<FileSys::VirtualFile>(
@@ -319,15 +322,15 @@ ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId stora
319} 322}
320 323
321ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space, 324ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
322 FileSys::SaveDataDescriptor save_struct) { 325 const FileSys::SaveDataDescriptor& descriptor) {
323 LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}", 326 LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}",
324 static_cast<u8>(space), save_struct.DebugInfo()); 327 static_cast<u8>(space), descriptor.DebugInfo());
325 328
326 if (save_data_factory == nullptr) { 329 if (save_data_factory == nullptr) {
327 return FileSys::ERROR_ENTITY_NOT_FOUND; 330 return FileSys::ERROR_ENTITY_NOT_FOUND;
328 } 331 }
329 332
330 return save_data_factory->Open(space, save_struct); 333 return save_data_factory->Open(space, descriptor);
331} 334}
332 335
333ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space) { 336ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space) {
@@ -388,11 +391,6 @@ void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id,
388 save_data_factory->WriteSaveDataSize(type, title_id, user_id, new_value); 391 save_data_factory->WriteSaveDataSize(type, title_id, user_id, new_value);
389} 392}
390 393
391FileSys::RegisteredCacheUnion GetUnionContents() {
392 return FileSys::RegisteredCacheUnion{
393 {GetSystemNANDContents(), GetUserNANDContents(), GetSDMCContents()}};
394}
395
396FileSys::RegisteredCache* GetSystemNANDContents() { 394FileSys::RegisteredCache* GetSystemNANDContents() {
397 LOG_TRACE(Service_FS, "Opening System NAND Contents"); 395 LOG_TRACE(Service_FS, "Opening System NAND Contents");
398 396
@@ -457,6 +455,10 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
457 if (bis_factory == nullptr) { 455 if (bis_factory == nullptr) {
458 bis_factory = 456 bis_factory =
459 std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory); 457 std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory);
458 Core::System::GetInstance().RegisterContentProvider(
459 FileSys::ContentProviderUnionSlot::SysNAND, bis_factory->GetSystemNANDContents());
460 Core::System::GetInstance().RegisterContentProvider(
461 FileSys::ContentProviderUnionSlot::UserNAND, bis_factory->GetUserNANDContents());
460 } 462 }
461 463
462 if (save_data_factory == nullptr) { 464 if (save_data_factory == nullptr) {
@@ -465,6 +467,8 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
465 467
466 if (sdmc_factory == nullptr) { 468 if (sdmc_factory == nullptr) {
467 sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory)); 469 sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory));
470 Core::System::GetInstance().RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
471 sdmc_factory->GetSDMCContents());
468 } 472 }
469} 473}
470 474
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 6fd5e7b23..6481f237c 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -46,7 +46,7 @@ ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess();
46ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id, 46ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id,
47 FileSys::ContentRecordType type); 47 FileSys::ContentRecordType type);
48ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space, 48ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
49 FileSys::SaveDataDescriptor save_struct); 49 const FileSys::SaveDataDescriptor& descriptor);
50ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space); 50ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space);
51ResultVal<FileSys::VirtualDir> OpenSDMC(); 51ResultVal<FileSys::VirtualDir> OpenSDMC();
52 52
@@ -54,8 +54,6 @@ FileSys::SaveDataSize ReadSaveDataSize(FileSys::SaveDataType type, u64 title_id,
54void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id, 54void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id,
55 FileSys::SaveDataSize new_value); 55 FileSys::SaveDataSize new_value);
56 56
57FileSys::RegisteredCacheUnion GetUnionContents();
58
59FileSys::RegisteredCache* GetSystemNANDContents(); 57FileSys::RegisteredCache* GetSystemNANDContents();
60FileSys::RegisteredCache* GetUserNANDContents(); 58FileSys::RegisteredCache* GetUserNANDContents();
61FileSys::RegisteredCache* GetSDMCContents(); 59FileSys::RegisteredCache* GetSDMCContents();
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 74c4e583b..657baddb8 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -315,61 +315,53 @@ public:
315 void CreateFile(Kernel::HLERequestContext& ctx) { 315 void CreateFile(Kernel::HLERequestContext& ctx) {
316 IPC::RequestParser rp{ctx}; 316 IPC::RequestParser rp{ctx};
317 317
318 auto file_buffer = ctx.ReadBuffer(); 318 const auto file_buffer = ctx.ReadBuffer();
319 std::string name = Common::StringFromBuffer(file_buffer); 319 const std::string name = Common::StringFromBuffer(file_buffer);
320 320
321 u64 mode = rp.Pop<u64>(); 321 const u64 mode = rp.Pop<u64>();
322 u32 size = rp.Pop<u32>(); 322 const u32 size = rp.Pop<u32>();
323 323
324 LOG_DEBUG(Service_FS, "called file {} mode 0x{:X} size 0x{:08X}", name, mode, size); 324 LOG_DEBUG(Service_FS, "called. file={}, mode=0x{:X}, size=0x{:08X}", name, mode, size);
325 325
326 IPC::ResponseBuilder rb{ctx, 2}; 326 IPC::ResponseBuilder rb{ctx, 2};
327 rb.Push(backend.CreateFile(name, size)); 327 rb.Push(backend.CreateFile(name, size));
328 } 328 }
329 329
330 void DeleteFile(Kernel::HLERequestContext& ctx) { 330 void DeleteFile(Kernel::HLERequestContext& ctx) {
331 IPC::RequestParser rp{ctx}; 331 const auto file_buffer = ctx.ReadBuffer();
332 332 const std::string name = Common::StringFromBuffer(file_buffer);
333 auto file_buffer = ctx.ReadBuffer();
334 std::string name = Common::StringFromBuffer(file_buffer);
335 333
336 LOG_DEBUG(Service_FS, "called file {}", name); 334 LOG_DEBUG(Service_FS, "called. file={}", name);
337 335
338 IPC::ResponseBuilder rb{ctx, 2}; 336 IPC::ResponseBuilder rb{ctx, 2};
339 rb.Push(backend.DeleteFile(name)); 337 rb.Push(backend.DeleteFile(name));
340 } 338 }
341 339
342 void CreateDirectory(Kernel::HLERequestContext& ctx) { 340 void CreateDirectory(Kernel::HLERequestContext& ctx) {
343 IPC::RequestParser rp{ctx}; 341 const auto file_buffer = ctx.ReadBuffer();
344 342 const std::string name = Common::StringFromBuffer(file_buffer);
345 auto file_buffer = ctx.ReadBuffer();
346 std::string name = Common::StringFromBuffer(file_buffer);
347 343
348 LOG_DEBUG(Service_FS, "called directory {}", name); 344 LOG_DEBUG(Service_FS, "called. directory={}", name);
349 345
350 IPC::ResponseBuilder rb{ctx, 2}; 346 IPC::ResponseBuilder rb{ctx, 2};
351 rb.Push(backend.CreateDirectory(name)); 347 rb.Push(backend.CreateDirectory(name));
352 } 348 }
353 349
354 void DeleteDirectory(Kernel::HLERequestContext& ctx) { 350 void DeleteDirectory(Kernel::HLERequestContext& ctx) {
355 const IPC::RequestParser rp{ctx};
356
357 const auto file_buffer = ctx.ReadBuffer(); 351 const auto file_buffer = ctx.ReadBuffer();
358 std::string name = Common::StringFromBuffer(file_buffer); 352 const std::string name = Common::StringFromBuffer(file_buffer);
359 353
360 LOG_DEBUG(Service_FS, "called directory {}", name); 354 LOG_DEBUG(Service_FS, "called. directory={}", name);
361 355
362 IPC::ResponseBuilder rb{ctx, 2}; 356 IPC::ResponseBuilder rb{ctx, 2};
363 rb.Push(backend.DeleteDirectory(name)); 357 rb.Push(backend.DeleteDirectory(name));
364 } 358 }
365 359
366 void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) { 360 void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) {
367 const IPC::RequestParser rp{ctx};
368
369 const auto file_buffer = ctx.ReadBuffer(); 361 const auto file_buffer = ctx.ReadBuffer();
370 std::string name = Common::StringFromBuffer(file_buffer); 362 const std::string name = Common::StringFromBuffer(file_buffer);
371 363
372 LOG_DEBUG(Service_FS, "called directory {}", name); 364 LOG_DEBUG(Service_FS, "called. directory={}", name);
373 365
374 IPC::ResponseBuilder rb{ctx, 2}; 366 IPC::ResponseBuilder rb{ctx, 2};
375 rb.Push(backend.DeleteDirectoryRecursively(name)); 367 rb.Push(backend.DeleteDirectoryRecursively(name));
@@ -386,18 +378,16 @@ public:
386 } 378 }
387 379
388 void RenameFile(Kernel::HLERequestContext& ctx) { 380 void RenameFile(Kernel::HLERequestContext& ctx) {
389 IPC::RequestParser rp{ctx};
390
391 std::vector<u8> buffer; 381 std::vector<u8> buffer;
392 buffer.resize(ctx.BufferDescriptorX()[0].Size()); 382 buffer.resize(ctx.BufferDescriptorX()[0].Size());
393 Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size()); 383 Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
394 std::string src_name = Common::StringFromBuffer(buffer); 384 const std::string src_name = Common::StringFromBuffer(buffer);
395 385
396 buffer.resize(ctx.BufferDescriptorX()[1].Size()); 386 buffer.resize(ctx.BufferDescriptorX()[1].Size());
397 Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size()); 387 Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
398 std::string dst_name = Common::StringFromBuffer(buffer); 388 const std::string dst_name = Common::StringFromBuffer(buffer);
399 389
400 LOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name); 390 LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", src_name, dst_name);
401 391
402 IPC::ResponseBuilder rb{ctx, 2}; 392 IPC::ResponseBuilder rb{ctx, 2};
403 rb.Push(backend.RenameFile(src_name, dst_name)); 393 rb.Push(backend.RenameFile(src_name, dst_name));
@@ -406,12 +396,12 @@ public:
406 void OpenFile(Kernel::HLERequestContext& ctx) { 396 void OpenFile(Kernel::HLERequestContext& ctx) {
407 IPC::RequestParser rp{ctx}; 397 IPC::RequestParser rp{ctx};
408 398
409 auto file_buffer = ctx.ReadBuffer(); 399 const auto file_buffer = ctx.ReadBuffer();
410 std::string name = Common::StringFromBuffer(file_buffer); 400 const std::string name = Common::StringFromBuffer(file_buffer);
411 401
412 auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>()); 402 const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
413 403
414 LOG_DEBUG(Service_FS, "called file {} mode {}", name, static_cast<u32>(mode)); 404 LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, static_cast<u32>(mode));
415 405
416 auto result = backend.OpenFile(name, mode); 406 auto result = backend.OpenFile(name, mode);
417 if (result.Failed()) { 407 if (result.Failed()) {
@@ -430,13 +420,13 @@ public:
430 void OpenDirectory(Kernel::HLERequestContext& ctx) { 420 void OpenDirectory(Kernel::HLERequestContext& ctx) {
431 IPC::RequestParser rp{ctx}; 421 IPC::RequestParser rp{ctx};
432 422
433 auto file_buffer = ctx.ReadBuffer(); 423 const auto file_buffer = ctx.ReadBuffer();
434 std::string name = Common::StringFromBuffer(file_buffer); 424 const std::string name = Common::StringFromBuffer(file_buffer);
435 425
436 // TODO(Subv): Implement this filter. 426 // TODO(Subv): Implement this filter.
437 u32 filter_flags = rp.Pop<u32>(); 427 const u32 filter_flags = rp.Pop<u32>();
438 428
439 LOG_DEBUG(Service_FS, "called directory {} filter {}", name, filter_flags); 429 LOG_DEBUG(Service_FS, "called. directory={}, filter={}", name, filter_flags);
440 430
441 auto result = backend.OpenDirectory(name); 431 auto result = backend.OpenDirectory(name);
442 if (result.Failed()) { 432 if (result.Failed()) {
@@ -453,12 +443,10 @@ public:
453 } 443 }
454 444
455 void GetEntryType(Kernel::HLERequestContext& ctx) { 445 void GetEntryType(Kernel::HLERequestContext& ctx) {
456 IPC::RequestParser rp{ctx}; 446 const auto file_buffer = ctx.ReadBuffer();
457 447 const std::string name = Common::StringFromBuffer(file_buffer);
458 auto file_buffer = ctx.ReadBuffer();
459 std::string name = Common::StringFromBuffer(file_buffer);
460 448
461 LOG_DEBUG(Service_FS, "called file {}", name); 449 LOG_DEBUG(Service_FS, "called. file={}", name);
462 450
463 auto result = backend.GetEntryType(name); 451 auto result = backend.GetEntryType(name);
464 if (result.Failed()) { 452 if (result.Failed()) {
@@ -616,7 +604,9 @@ private:
616 u64_le save_id; 604 u64_le save_id;
617 u64_le title_id; 605 u64_le title_id;
618 u64_le save_image_size; 606 u64_le save_image_size;
619 INSERT_PADDING_BYTES(0x28); 607 u16_le index;
608 FileSys::SaveDataRank rank;
609 INSERT_PADDING_BYTES(0x25);
620 }; 610 };
621 static_assert(sizeof(SaveDataInfo) == 0x60, "SaveDataInfo has incorrect size."); 611 static_assert(sizeof(SaveDataInfo) == 0x60, "SaveDataInfo has incorrect size.");
622 612
@@ -627,8 +617,8 @@ private:
627FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { 617FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
628 // clang-format off 618 // clang-format off
629 static const FunctionInfo functions[] = { 619 static const FunctionInfo functions[] = {
630 {0, nullptr, "MountContent"}, 620 {0, nullptr, "OpenFileSystem"},
631 {1, &FSP_SRV::Initialize, "Initialize"}, 621 {1, &FSP_SRV::SetCurrentProcess, "SetCurrentProcess"},
632 {2, nullptr, "OpenDataFileSystemByCurrentProcess"}, 622 {2, nullptr, "OpenDataFileSystemByCurrentProcess"},
633 {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"}, 623 {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"},
634 {8, nullptr, "OpenFileSystemWithId"}, 624 {8, nullptr, "OpenFileSystemWithId"},
@@ -637,10 +627,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
637 {12, nullptr, "OpenBisStorage"}, 627 {12, nullptr, "OpenBisStorage"},
638 {13, nullptr, "InvalidateBisCache"}, 628 {13, nullptr, "InvalidateBisCache"},
639 {17, nullptr, "OpenHostFileSystem"}, 629 {17, nullptr, "OpenHostFileSystem"},
640 {18, &FSP_SRV::MountSdCard, "MountSdCard"}, 630 {18, &FSP_SRV::OpenSdCardFileSystem, "OpenSdCardFileSystem"},
641 {19, nullptr, "FormatSdCardFileSystem"}, 631 {19, nullptr, "FormatSdCardFileSystem"},
642 {21, nullptr, "DeleteSaveDataFileSystem"}, 632 {21, nullptr, "DeleteSaveDataFileSystem"},
643 {22, &FSP_SRV::CreateSaveData, "CreateSaveData"}, 633 {22, &FSP_SRV::CreateSaveDataFileSystem, "CreateSaveDataFileSystem"},
644 {23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"}, 634 {23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"},
645 {24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"}, 635 {24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"},
646 {25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"}, 636 {25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"},
@@ -652,7 +642,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
652 {32, nullptr, "ExtendSaveDataFileSystem"}, 642 {32, nullptr, "ExtendSaveDataFileSystem"},
653 {33, nullptr, "DeleteCacheStorage"}, 643 {33, nullptr, "DeleteCacheStorage"},
654 {34, nullptr, "GetCacheStorageSize"}, 644 {34, nullptr, "GetCacheStorageSize"},
655 {51, &FSP_SRV::MountSaveData, "MountSaveData"}, 645 {35, nullptr, "CreateSaveDataFileSystemByHashSalt"},
646 {51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"},
656 {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"}, 647 {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"},
657 {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"}, 648 {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"},
658 {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"}, 649 {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"},
@@ -664,21 +655,26 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
664 {64, nullptr, "OpenSaveDataInternalStorageFileSystem"}, 655 {64, nullptr, "OpenSaveDataInternalStorageFileSystem"},
665 {65, nullptr, "UpdateSaveDataMacForDebug"}, 656 {65, nullptr, "UpdateSaveDataMacForDebug"},
666 {66, nullptr, "WriteSaveDataFileSystemExtraData2"}, 657 {66, nullptr, "WriteSaveDataFileSystemExtraData2"},
658 {67, nullptr, "FindSaveDataWithFilter"},
659 {68, nullptr, "OpenSaveDataInfoReaderBySaveDataFilter"},
667 {80, nullptr, "OpenSaveDataMetaFile"}, 660 {80, nullptr, "OpenSaveDataMetaFile"},
668 {81, nullptr, "OpenSaveDataTransferManager"}, 661 {81, nullptr, "OpenSaveDataTransferManager"},
669 {82, nullptr, "OpenSaveDataTransferManagerVersion2"}, 662 {82, nullptr, "OpenSaveDataTransferManagerVersion2"},
670 {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"}, 663 {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"},
664 {84, nullptr, "ListApplicationAccessibleSaveDataOwnerId"},
671 {100, nullptr, "OpenImageDirectoryFileSystem"}, 665 {100, nullptr, "OpenImageDirectoryFileSystem"},
672 {110, nullptr, "OpenContentStorageFileSystem"}, 666 {110, nullptr, "OpenContentStorageFileSystem"},
667 {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"},
673 {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"}, 668 {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"},
674 {201, nullptr, "OpenDataStorageByProgramId"}, 669 {201, nullptr, "OpenDataStorageByProgramId"},
675 {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, 670 {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"},
676 {203, &FSP_SRV::OpenRomStorage, "OpenRomStorage"}, 671 {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"},
677 {400, nullptr, "OpenDeviceOperator"}, 672 {400, nullptr, "OpenDeviceOperator"},
678 {500, nullptr, "OpenSdCardDetectionEventNotifier"}, 673 {500, nullptr, "OpenSdCardDetectionEventNotifier"},
679 {501, nullptr, "OpenGameCardDetectionEventNotifier"}, 674 {501, nullptr, "OpenGameCardDetectionEventNotifier"},
680 {510, nullptr, "OpenSystemDataUpdateEventNotifier"}, 675 {510, nullptr, "OpenSystemDataUpdateEventNotifier"},
681 {511, nullptr, "NotifySystemDataUpdateEvent"}, 676 {511, nullptr, "NotifySystemDataUpdateEvent"},
677 {520, nullptr, "SimulateGameCardDetectionEvent"},
682 {600, nullptr, "SetCurrentPosixTime"}, 678 {600, nullptr, "SetCurrentPosixTime"},
683 {601, nullptr, "QuerySaveDataTotalSize"}, 679 {601, nullptr, "QuerySaveDataTotalSize"},
684 {602, nullptr, "VerifySaveDataFileSystem"}, 680 {602, nullptr, "VerifySaveDataFileSystem"},
@@ -717,6 +713,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
717 {1008, nullptr, "OpenRegisteredUpdatePartition"}, 713 {1008, nullptr, "OpenRegisteredUpdatePartition"},
718 {1009, nullptr, "GetAndClearMemoryReportInfo"}, 714 {1009, nullptr, "GetAndClearMemoryReportInfo"},
719 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, 715 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
716 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
717 {1200, nullptr, "OpenMultiCommitManager"},
720 }; 718 };
721 // clang-format on 719 // clang-format on
722 RegisterHandlers(functions); 720 RegisterHandlers(functions);
@@ -724,8 +722,11 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
724 722
725FSP_SRV::~FSP_SRV() = default; 723FSP_SRV::~FSP_SRV() = default;
726 724
727void FSP_SRV::Initialize(Kernel::HLERequestContext& ctx) { 725void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
728 LOG_WARNING(Service_FS, "(STUBBED) called"); 726 IPC::RequestParser rp{ctx};
727 current_process_id = rp.Pop<u64>();
728
729 LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);
729 730
730 IPC::ResponseBuilder rb{ctx, 2}; 731 IPC::ResponseBuilder rb{ctx, 2};
731 rb.Push(RESULT_SUCCESS); 732 rb.Push(RESULT_SUCCESS);
@@ -743,7 +744,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
743 rb.Push(ResultCode(-1)); 744 rb.Push(ResultCode(-1));
744} 745}
745 746
746void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) { 747void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
747 LOG_DEBUG(Service_FS, "called"); 748 LOG_DEBUG(Service_FS, "called");
748 749
749 IFileSystem filesystem(OpenSDMC().Unwrap()); 750 IFileSystem filesystem(OpenSDMC().Unwrap());
@@ -753,7 +754,7 @@ void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
753 rb.PushIpcInterface<IFileSystem>(std::move(filesystem)); 754 rb.PushIpcInterface<IFileSystem>(std::move(filesystem));
754} 755}
755 756
756void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) { 757void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
757 IPC::RequestParser rp{ctx}; 758 IPC::RequestParser rp{ctx};
758 759
759 auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>(); 760 auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
@@ -767,17 +768,18 @@ void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) {
767 rb.Push(RESULT_SUCCESS); 768 rb.Push(RESULT_SUCCESS);
768} 769}
769 770
770void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) { 771void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
771 IPC::RequestParser rp{ctx}; 772 LOG_INFO(Service_FS, "called.");
772
773 auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>();
774 auto unk = rp.Pop<u32>();
775 LOG_INFO(Service_FS, "called with unknown={:08X}", unk);
776 773
777 auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>(); 774 struct Parameters {
775 FileSys::SaveDataSpaceId save_data_space_id;
776 FileSys::SaveDataDescriptor descriptor;
777 };
778 778
779 auto dir = OpenSaveData(space_id, save_struct); 779 IPC::RequestParser rp{ctx};
780 const auto parameters = rp.PopRaw<Parameters>();
780 781
782 auto dir = OpenSaveData(parameters.save_data_space_id, parameters.descriptor);
781 if (dir.Failed()) { 783 if (dir.Failed()) {
782 IPC::ResponseBuilder rb{ctx, 2, 0, 0}; 784 IPC::ResponseBuilder rb{ctx, 2, 0, 0};
783 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); 785 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);
@@ -793,7 +795,7 @@ void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
793 795
794void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) { 796void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) {
795 LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem"); 797 LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem");
796 MountSaveData(ctx); 798 OpenSaveDataFileSystem(ctx);
797} 799}
798 800
799void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) { 801void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) {
@@ -881,7 +883,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
881 rb.PushIpcInterface<IStorage>(std::move(storage)); 883 rb.PushIpcInterface<IStorage>(std::move(storage));
882} 884}
883 885
884void FSP_SRV::OpenRomStorage(Kernel::HLERequestContext& ctx) { 886void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
885 IPC::RequestParser rp{ctx}; 887 IPC::RequestParser rp{ctx};
886 888
887 auto storage_id = rp.PopRaw<FileSys::StorageId>(); 889 auto storage_id = rp.PopRaw<FileSys::StorageId>();
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index e7abec0a3..d7572ba7a 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -19,19 +19,20 @@ public:
19 ~FSP_SRV() override; 19 ~FSP_SRV() override;
20 20
21private: 21private:
22 void Initialize(Kernel::HLERequestContext& ctx); 22 void SetCurrentProcess(Kernel::HLERequestContext& ctx);
23 void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx); 23 void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx);
24 void MountSdCard(Kernel::HLERequestContext& ctx); 24 void OpenSdCardFileSystem(Kernel::HLERequestContext& ctx);
25 void CreateSaveData(Kernel::HLERequestContext& ctx); 25 void CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx);
26 void MountSaveData(Kernel::HLERequestContext& ctx); 26 void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx);
27 void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); 27 void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
28 void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); 28 void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx);
29 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); 29 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
30 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 30 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
31 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); 31 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
32 void OpenRomStorage(Kernel::HLERequestContext& ctx); 32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
33 33
34 FileSys::VirtualFile romfs; 34 FileSys::VirtualFile romfs;
35 u64 current_process_id = 0;
35}; 36};
36 37
37} // namespace Service::FileSystem 38} // namespace Service::FileSystem
diff --git a/src/core/hle/service/hid/controllers/controller_base.h b/src/core/hle/service/hid/controllers/controller_base.h
index f0e092b1b..5e5097a03 100644
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/swap.h" 8#include "common/swap.h"
9 9
10namespace Core::Timing {
11class CoreTiming;
12}
13
10namespace Service::HID { 14namespace Service::HID {
11class ControllerBase { 15class ControllerBase {
12public: 16public:
@@ -20,7 +24,8 @@ public:
20 virtual void OnRelease() = 0; 24 virtual void OnRelease() = 0;
21 25
22 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
23 virtual void OnUpdate(u8* data, std::size_t size) = 0; 27 virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
28 std::size_t size) = 0;
24 29
25 // Called when input devices should be loaded 30 // Called when input devices should be loaded
26 virtual void OnLoadInputDevices() = 0; 31 virtual void OnLoadInputDevices() = 0;
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index c22357d8c..c5c2e032a 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}
21 21
22void Controller_DebugPad::OnRelease() {} 22void Controller_DebugPad::OnRelease() {}
23 23
24void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) { 24void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
25 shared_memory.header.timestamp = CoreTiming::GetTicks(); 25 std::size_t size) {
26 shared_memory.header.timestamp = core_timing.GetTicks();
26 shared_memory.header.total_entry_count = 17; 27 shared_memory.header.total_entry_count = 17;
27 28
28 if (!IsControllerActivated()) { 29 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/debug_pad.h b/src/core/hle/service/hid/controllers/debug_pad.h
index 68b734248..e584b92ec 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -26,7 +26,7 @@ public:
26 void OnRelease() override; 26 void OnRelease() override;
27 27
28 // When the controller is requesting an update for the shared memory 28 // When the controller is requesting an update for the shared memory
29 void OnUpdate(u8* data, std::size_t size) override; 29 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
30 30
31 // Called when input devices should be loaded 31 // Called when input devices should be loaded
32 void OnLoadInputDevices() override; 32 void OnLoadInputDevices() override;
@@ -41,20 +41,20 @@ private:
41 struct PadState { 41 struct PadState {
42 union { 42 union {
43 u32_le raw{}; 43 u32_le raw{};
44 BitField<0, 1, u32_le> a; 44 BitField<0, 1, u32> a;
45 BitField<1, 1, u32_le> b; 45 BitField<1, 1, u32> b;
46 BitField<2, 1, u32_le> x; 46 BitField<2, 1, u32> x;
47 BitField<3, 1, u32_le> y; 47 BitField<3, 1, u32> y;
48 BitField<4, 1, u32_le> l; 48 BitField<4, 1, u32> l;
49 BitField<5, 1, u32_le> r; 49 BitField<5, 1, u32> r;
50 BitField<6, 1, u32_le> zl; 50 BitField<6, 1, u32> zl;
51 BitField<7, 1, u32_le> zr; 51 BitField<7, 1, u32> zr;
52 BitField<8, 1, u32_le> plus; 52 BitField<8, 1, u32> plus;
53 BitField<9, 1, u32_le> minus; 53 BitField<9, 1, u32> minus;
54 BitField<10, 1, u32_le> d_left; 54 BitField<10, 1, u32> d_left;
55 BitField<11, 1, u32_le> d_up; 55 BitField<11, 1, u32> d_up;
56 BitField<12, 1, u32_le> d_right; 56 BitField<12, 1, u32> d_right;
57 BitField<13, 1, u32_le> d_down; 57 BitField<13, 1, u32> d_down;
58 }; 58 };
59 }; 59 };
60 static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size"); 60 static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size");
@@ -62,7 +62,7 @@ private:
62 struct Attributes { 62 struct Attributes {
63 union { 63 union {
64 u32_le raw{}; 64 u32_le raw{};
65 BitField<0, 1, u32_le> connected; 65 BitField<0, 1, u32> connected;
66 }; 66 };
67 }; 67 };
68 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size"); 68 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 898572277..a179252e3 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}
17 17
18void Controller_Gesture::OnRelease() {} 18void Controller_Gesture::OnRelease() {}
19 19
20void Controller_Gesture::OnUpdate(u8* data, std::size_t size) { 20void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 shared_memory.header.timestamp = CoreTiming::GetTicks(); 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks();
22 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
23 24
24 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.h b/src/core/hle/service/hid/controllers/gesture.h
index 1056ffbcd..f305fe90f 100644
--- a/src/core/hle/service/hid/controllers/gesture.h
+++ b/src/core/hle/service/hid/controllers/gesture.h
@@ -22,7 +22,7 @@ public:
22 void OnRelease() override; 22 void OnRelease() override;
23 23
24 // When the controller is requesting an update for the shared memory 24 // When the controller is requesting an update for the shared memory
25 void OnUpdate(u8* data, size_t size) override; 25 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;
26 26
27 // Called when input devices should be loaded 27 // Called when input devices should be loaded
28 void OnLoadInputDevices() override; 28 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index ca75adc2b..92d7bfb52 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}
19 19
20void Controller_Keyboard::OnRelease() {} 20void Controller_Keyboard::OnRelease() {}
21 21
22void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) { 22void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
23 shared_memory.header.timestamp = CoreTiming::GetTicks(); 23 std::size_t size) {
24 shared_memory.header.timestamp = core_timing.GetTicks();
24 shared_memory.header.total_entry_count = 17; 25 shared_memory.header.total_entry_count = 17;
25 26
26 if (!IsControllerActivated()) { 27 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.h b/src/core/hle/service/hid/controllers/keyboard.h
index f52775456..73cd2c7bb 100644
--- a/src/core/hle/service/hid/controllers/keyboard.h
+++ b/src/core/hle/service/hid/controllers/keyboard.h
@@ -25,7 +25,7 @@ public:
25 void OnRelease() override; 25 void OnRelease() override;
26 26
27 // When the controller is requesting an update for the shared memory 27 // When the controller is requesting an update for the shared memory
28 void OnUpdate(u8* data, std::size_t size) override; 28 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
29 29
30 // Called when input devices should be loaded 30 // Called when input devices should be loaded
31 void OnLoadInputDevices() override; 31 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 63391dbe9..11ab096d9 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
17void Controller_Mouse::OnInit() {} 17void Controller_Mouse::OnInit() {}
18void Controller_Mouse::OnRelease() {} 18void Controller_Mouse::OnRelease() {}
19 19
20void Controller_Mouse::OnUpdate(u8* data, std::size_t size) { 20void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 shared_memory.header.timestamp = CoreTiming::GetTicks(); 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks();
22 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
23 24
24 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.h b/src/core/hle/service/hid/controllers/mouse.h
index 70b654d07..9d46eecbe 100644
--- a/src/core/hle/service/hid/controllers/mouse.h
+++ b/src/core/hle/service/hid/controllers/mouse.h
@@ -24,7 +24,7 @@ public:
24 void OnRelease() override; 24 void OnRelease() override;
25 25
26 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
27 void OnUpdate(u8* data, std::size_t size) override; 27 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
28 28
29 // Called when input devices should be loaded 29 // Called when input devices should be loaded
30 void OnLoadInputDevices() override; 30 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 04c8c35a8..e7fc7a619 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
288 rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX); 288 rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
289} 289}
290 290
291void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) { 291void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
292 std::size_t data_len) {
292 if (!IsControllerActivated()) 293 if (!IsControllerActivated())
293 return; 294 return;
294 for (std::size_t i = 0; i < shared_memory_entries.size(); i++) { 295 for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
308 const auto& last_entry = 309 const auto& last_entry =
309 main_controller->npad[main_controller->common.last_entry_index]; 310 main_controller->npad[main_controller->common.last_entry_index];
310 311
311 main_controller->common.timestamp = CoreTiming::GetTicks(); 312 main_controller->common.timestamp = core_timing.GetTicks();
312 main_controller->common.last_entry_index = 313 main_controller->common.last_entry_index =
313 (main_controller->common.last_entry_index + 1) % 17; 314 (main_controller->common.last_entry_index + 1) % 17;
314 315
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 106cf58c8..4ff50b3cd 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -30,7 +30,7 @@ public:
30 void OnRelease() override; 30 void OnRelease() override;
31 31
32 // When the controller is requesting an update for the shared memory 32 // When the controller is requesting an update for the shared memory
33 void OnUpdate(u8* data, std::size_t size) override; 33 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
34 34
35 // Called when input devices should be loaded 35 // Called when input devices should be loaded
36 void OnLoadInputDevices() override; 36 void OnLoadInputDevices() override;
@@ -39,13 +39,13 @@ public:
39 union { 39 union {
40 u32_le raw{}; 40 u32_le raw{};
41 41
42 BitField<0, 1, u32_le> pro_controller; 42 BitField<0, 1, u32> pro_controller;
43 BitField<1, 1, u32_le> handheld; 43 BitField<1, 1, u32> handheld;
44 BitField<2, 1, u32_le> joycon_dual; 44 BitField<2, 1, u32> joycon_dual;
45 BitField<3, 1, u32_le> joycon_left; 45 BitField<3, 1, u32> joycon_left;
46 BitField<4, 1, u32_le> joycon_right; 46 BitField<4, 1, u32> joycon_right;
47 47
48 BitField<6, 1, u32_le> pokeball; // TODO(ogniK): Confirm when possible 48 BitField<6, 1, u32> pokeball; // TODO(ogniK): Confirm when possible
49 }; 49 };
50 }; 50 };
51 static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size"); 51 static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size");
@@ -150,43 +150,43 @@ private:
150 union { 150 union {
151 u64_le raw{}; 151 u64_le raw{};
152 // Button states 152 // Button states
153 BitField<0, 1, u64_le> a; 153 BitField<0, 1, u64> a;
154 BitField<1, 1, u64_le> b; 154 BitField<1, 1, u64> b;
155 BitField<2, 1, u64_le> x; 155 BitField<2, 1, u64> x;
156 BitField<3, 1, u64_le> y; 156 BitField<3, 1, u64> y;
157 BitField<4, 1, u64_le> l_stick; 157 BitField<4, 1, u64> l_stick;
158 BitField<5, 1, u64_le> r_stick; 158 BitField<5, 1, u64> r_stick;
159 BitField<6, 1, u64_le> l; 159 BitField<6, 1, u64> l;
160 BitField<7, 1, u64_le> r; 160 BitField<7, 1, u64> r;
161 BitField<8, 1, u64_le> zl; 161 BitField<8, 1, u64> zl;
162 BitField<9, 1, u64_le> zr; 162 BitField<9, 1, u64> zr;
163 BitField<10, 1, u64_le> plus; 163 BitField<10, 1, u64> plus;
164 BitField<11, 1, u64_le> minus; 164 BitField<11, 1, u64> minus;
165 165
166 // D-Pad 166 // D-Pad
167 BitField<12, 1, u64_le> d_left; 167 BitField<12, 1, u64> d_left;
168 BitField<13, 1, u64_le> d_up; 168 BitField<13, 1, u64> d_up;
169 BitField<14, 1, u64_le> d_right; 169 BitField<14, 1, u64> d_right;
170 BitField<15, 1, u64_le> d_down; 170 BitField<15, 1, u64> d_down;
171 171
172 // Left JoyStick 172 // Left JoyStick
173 BitField<16, 1, u64_le> l_stick_left; 173 BitField<16, 1, u64> l_stick_left;
174 BitField<17, 1, u64_le> l_stick_up; 174 BitField<17, 1, u64> l_stick_up;
175 BitField<18, 1, u64_le> l_stick_right; 175 BitField<18, 1, u64> l_stick_right;
176 BitField<19, 1, u64_le> l_stick_down; 176 BitField<19, 1, u64> l_stick_down;
177 177
178 // Right JoyStick 178 // Right JoyStick
179 BitField<20, 1, u64_le> r_stick_left; 179 BitField<20, 1, u64> r_stick_left;
180 BitField<21, 1, u64_le> r_stick_up; 180 BitField<21, 1, u64> r_stick_up;
181 BitField<22, 1, u64_le> r_stick_right; 181 BitField<22, 1, u64> r_stick_right;
182 BitField<23, 1, u64_le> r_stick_down; 182 BitField<23, 1, u64> r_stick_down;
183 183
184 // Not always active? 184 // Not always active?
185 BitField<24, 1, u64_le> left_sl; 185 BitField<24, 1, u64> left_sl;
186 BitField<25, 1, u64_le> left_sr; 186 BitField<25, 1, u64> left_sr;
187 187
188 BitField<26, 1, u64_le> right_sl; 188 BitField<26, 1, u64> right_sl;
189 BitField<27, 1, u64_le> right_sr; 189 BitField<27, 1, u64> right_sr;
190 }; 190 };
191 }; 191 };
192 static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size"); 192 static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size");
@@ -200,12 +200,12 @@ private:
200 struct ConnectionState { 200 struct ConnectionState {
201 union { 201 union {
202 u32_le raw{}; 202 u32_le raw{};
203 BitField<0, 1, u32_le> IsConnected; 203 BitField<0, 1, u32> IsConnected;
204 BitField<1, 1, u32_le> IsWired; 204 BitField<1, 1, u32> IsWired;
205 BitField<2, 1, u32_le> IsLeftJoyConnected; 205 BitField<2, 1, u32> IsLeftJoyConnected;
206 BitField<3, 1, u32_le> IsLeftJoyWired; 206 BitField<3, 1, u32> IsLeftJoyWired;
207 BitField<4, 1, u32_le> IsRightJoyConnected; 207 BitField<4, 1, u32> IsRightJoyConnected;
208 BitField<5, 1, u32_le> IsRightJoyWired; 208 BitField<5, 1, u32> IsRightJoyWired;
209 }; 209 };
210 }; 210 };
211 static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size"); 211 static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size");
@@ -240,23 +240,23 @@ private:
240 struct NPadProperties { 240 struct NPadProperties {
241 union { 241 union {
242 s64_le raw{}; 242 s64_le raw{};
243 BitField<11, 1, s64_le> is_vertical; 243 BitField<11, 1, s64> is_vertical;
244 BitField<12, 1, s64_le> is_horizontal; 244 BitField<12, 1, s64> is_horizontal;
245 BitField<13, 1, s64_le> use_plus; 245 BitField<13, 1, s64> use_plus;
246 BitField<14, 1, s64_le> use_minus; 246 BitField<14, 1, s64> use_minus;
247 }; 247 };
248 }; 248 };
249 249
250 struct NPadDevice { 250 struct NPadDevice {
251 union { 251 union {
252 u32_le raw{}; 252 u32_le raw{};
253 BitField<0, 1, s32_le> pro_controller; 253 BitField<0, 1, s32> pro_controller;
254 BitField<1, 1, s32_le> handheld; 254 BitField<1, 1, s32> handheld;
255 BitField<2, 1, s32_le> handheld_left; 255 BitField<2, 1, s32> handheld_left;
256 BitField<3, 1, s32_le> handheld_right; 256 BitField<3, 1, s32> handheld_right;
257 BitField<4, 1, s32_le> joycon_left; 257 BitField<4, 1, s32> joycon_left;
258 BitField<5, 1, s32_le> joycon_right; 258 BitField<5, 1, s32> joycon_right;
259 BitField<6, 1, s32_le> pokeball; 259 BitField<6, 1, s32> pokeball;
260 }; 260 };
261 }; 261 };
262 262
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 02fcfadd9..946948f5e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}
16 16
17void Controller_Stubbed::OnRelease() {} 17void Controller_Stubbed::OnRelease() {}
18 18
19void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) { 19void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
20 std::size_t size) {
20 if (!smart_update) { 21 if (!smart_update) {
21 return; 22 return;
22 } 23 }
23 24
24 CommonHeader header{}; 25 CommonHeader header{};
25 header.timestamp = CoreTiming::GetTicks(); 26 header.timestamp = core_timing.GetTicks();
26 header.total_entry_count = 17; 27 header.total_entry_count = 17;
27 header.entry_count = 0; 28 header.entry_count = 0;
28 header.last_entry_index = 0; 29 header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/stubbed.h b/src/core/hle/service/hid/controllers/stubbed.h
index 4a21c643e..24469f03e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.h
+++ b/src/core/hle/service/hid/controllers/stubbed.h
@@ -20,7 +20,7 @@ public:
20 void OnRelease() override; 20 void OnRelease() override;
21 21
22 // When the controller is requesting an update for the shared memory 22 // When the controller is requesting an update for the shared memory
23 void OnUpdate(u8* data, std::size_t size) override; 23 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
24 24
25 // Called when input devices should be loaded 25 // Called when input devices should be loaded
26 void OnLoadInputDevices() override; 26 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index f666b1bd8..1a8445a43 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}
20 20
21void Controller_Touchscreen::OnRelease() {} 21void Controller_Touchscreen::OnRelease() {}
22 22
23void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { 23void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
24 shared_memory.header.timestamp = CoreTiming::GetTicks(); 24 std::size_t size) {
25 shared_memory.header.timestamp = core_timing.GetTicks();
25 shared_memory.header.total_entry_count = 17; 26 shared_memory.header.total_entry_count = 17;
26 27
27 if (!IsControllerActivated()) { 28 if (!IsControllerActivated()) {
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
48 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; 49 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
49 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; 50 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
50 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; 51 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
51 const u64 tick = CoreTiming::GetTicks(); 52 const u64 tick = core_timing.GetTicks();
52 touch_entry.delta_time = tick - last_touch; 53 touch_entry.delta_time = tick - last_touch;
53 last_touch = tick; 54 last_touch = tick;
54 touch_entry.finger = Settings::values.touchscreen.finger; 55 touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index 94cd0eba9..76fc340e9 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -24,7 +24,7 @@ public:
24 void OnRelease() override; 24 void OnRelease() override;
25 25
26 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
27 void OnUpdate(u8* data, std::size_t size) override; 27 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
28 28
29 // Called when input devices should be loaded 29 // Called when input devices should be loaded
30 void OnLoadInputDevices() override; 30 void OnLoadInputDevices() override;
@@ -33,8 +33,8 @@ private:
33 struct Attributes { 33 struct Attributes {
34 union { 34 union {
35 u32 raw{}; 35 u32 raw{};
36 BitField<0, 1, u32_le> start_touch; 36 BitField<0, 1, u32> start_touch;
37 BitField<1, 1, u32_le> end_touch; 37 BitField<1, 1, u32> end_touch;
38 }; 38 };
39 }; 39 };
40 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size"); 40 static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index cd397c70b..1a9da9576 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}
17 17
18void Controller_XPad::OnRelease() {} 18void Controller_XPad::OnRelease() {}
19 19
20void Controller_XPad::OnUpdate(u8* data, std::size_t size) { 20void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 std::size_t size) {
21 for (auto& xpad_entry : shared_memory.shared_memory_entries) { 22 for (auto& xpad_entry : shared_memory.shared_memory_entries) {
22 xpad_entry.header.timestamp = CoreTiming::GetTicks(); 23 xpad_entry.header.timestamp = core_timing.GetTicks();
23 xpad_entry.header.total_entry_count = 17; 24 xpad_entry.header.total_entry_count = 17;
24 25
25 if (!IsControllerActivated()) { 26 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/xpad.h b/src/core/hle/service/hid/controllers/xpad.h
index ff836989f..2864e6617 100644
--- a/src/core/hle/service/hid/controllers/xpad.h
+++ b/src/core/hle/service/hid/controllers/xpad.h
@@ -22,7 +22,7 @@ public:
22 void OnRelease() override; 22 void OnRelease() override;
23 23
24 // When the controller is requesting an update for the shared memory 24 // When the controller is requesting an update for the shared memory
25 void OnUpdate(u8* data, std::size_t size) override; 25 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
26 26
27 // Called when input devices should be loaded 27 // Called when input devices should be loaded
28 void OnLoadInputDevices() override; 28 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 008bf3f02..63b55758b 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
36 36
37// Updating period for each HID device. 37// Updating period for each HID device.
38// TODO(ogniK): Find actual polling rate of hid 38// TODO(ogniK): Find actual polling rate of hid
39constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66; 39constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
40constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; 40constexpr s64 accelerometer_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
41constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; 41constexpr s64 gyroscope_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
42constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; 42constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
43 43
44IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") { 44IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -73,14 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
73 GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000); 73 GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);
74 74
75 // Register update callbacks 75 // Register update callbacks
76 auto& core_timing = Core::System::GetInstance().CoreTiming();
76 pad_update_event = 77 pad_update_event =
77 CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) { 78 core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
78 UpdateControllers(userdata, cycles_late); 79 UpdateControllers(userdata, cycles_late);
79 }); 80 });
80 81
81 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) 82 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
82 83
83 CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event); 84 core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);
84 85
85 ReloadInputDevices(); 86 ReloadInputDevices();
86} 87}
@@ -94,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
94} 95}
95 96
96IAppletResource ::~IAppletResource() { 97IAppletResource ::~IAppletResource() {
97 CoreTiming::UnscheduleEvent(pad_update_event, 0); 98 Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
98} 99}
99 100
100void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { 101void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -105,16 +106,18 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
105 rb.PushCopyObjects(shared_mem); 106 rb.PushCopyObjects(shared_mem);
106} 107}
107 108
108void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) { 109void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
110 auto& core_timing = Core::System::GetInstance().CoreTiming();
111
109 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); 112 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
110 for (const auto& controller : controllers) { 113 for (const auto& controller : controllers) {
111 if (should_reload) { 114 if (should_reload) {
112 controller->OnLoadInputDevices(); 115 controller->OnLoadInputDevices();
113 } 116 }
114 controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE); 117 controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
115 } 118 }
116 119
117 CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); 120 core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
118} 121}
119 122
120class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { 123class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index eca27c056..d3660cad2 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -4,10 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/hid/controllers/controller_base.h"
8#include "core/hle/service/service.h"
9
7#include "controllers/controller_base.h" 10#include "controllers/controller_base.h"
8#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
9 12
10namespace CoreTiming { 13namespace Core::Timing {
11struct EventType; 14struct EventType;
12} 15}
13 16
@@ -15,7 +18,7 @@ namespace Kernel {
15class SharedMemory; 18class SharedMemory;
16} 19}
17 20
18namespace SM { 21namespace Service::SM {
19class ServiceManager; 22class ServiceManager;
20} 23}
21 24
@@ -62,11 +65,11 @@ private:
62 } 65 }
63 66
64 void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx); 67 void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
65 void UpdateControllers(u64 userdata, int cycles_late); 68 void UpdateControllers(u64 userdata, s64 cycles_late);
66 69
67 Kernel::SharedPtr<Kernel::SharedMemory> shared_mem; 70 Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
68 71
69 CoreTiming::EventType* pad_update_event; 72 Core::Timing::EventType* pad_update_event;
70 73
71 std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)> 74 std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
72 controllers{}; 75 controllers{};
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 3c7f8b1ee..2c4625c99 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
98 98
99 IPC::ResponseBuilder rb{ctx, 5}; 99 IPC::ResponseBuilder rb{ctx, 5};
100 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
101 rb.PushRaw<u64>(CoreTiming::GetTicks()); 101 rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
102 rb.PushRaw<u32>(0); 102 rb.PushRaw<u32>(0);
103} 103}
104 104
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9df7ac50f..d65693fc7 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -319,15 +319,14 @@ public:
319 } 319 }
320 320
321 ASSERT(vm_manager 321 ASSERT(vm_manager
322 .MirrorMemory(*map_address, nro_addr, nro_size, 322 .MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
323 Kernel::MemoryState::ModuleCodeStatic)
324 .IsSuccess()); 323 .IsSuccess());
325 ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess()); 324 ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
326 325
327 if (bss_size > 0) { 326 if (bss_size > 0) {
328 ASSERT(vm_manager 327 ASSERT(vm_manager
329 .MirrorMemory(*map_address + nro_size, bss_addr, bss_size, 328 .MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
330 Kernel::MemoryState::ModuleCodeStatic) 329 Kernel::MemoryState::ModuleCode)
331 .IsSuccess()); 330 .IsSuccess());
332 ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess()); 331 ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
333 } 332 }
@@ -388,8 +387,7 @@ public:
388 const auto& nro_size = iter->second.size; 387 const auto& nro_size = iter->second.size;
389 388
390 ASSERT(vm_manager 389 ASSERT(vm_manager
391 .MirrorMemory(heap_addr, mapped_addr, nro_size, 390 .MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
392 Kernel::MemoryState::ModuleCodeStatic)
393 .IsSuccess()); 391 .IsSuccess());
394 ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess()); 392 ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());
395 393
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index 1f462e087..2a61593e2 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -42,7 +42,7 @@ private:
42 union { 42 union {
43 BitField<0, 16, Flags> flags; 43 BitField<0, 16, Flags> flags;
44 BitField<16, 8, Severity> severity; 44 BitField<16, 8, Severity> severity;
45 BitField<24, 8, u32_le> verbosity; 45 BitField<24, 8, u32> verbosity;
46 }; 46 };
47 u32_le payload_size; 47 u32_le payload_size;
48 48
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp
index 0297edca0..5d31f638f 100644
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -40,10 +40,10 @@ public:
40 {6, nullptr, "CloseContentStorageForcibly"}, 40 {6, nullptr, "CloseContentStorageForcibly"},
41 {7, nullptr, "CloseContentMetaDatabaseForcibly"}, 41 {7, nullptr, "CloseContentMetaDatabaseForcibly"},
42 {8, nullptr, "CleanupContentMetaDatabase"}, 42 {8, nullptr, "CleanupContentMetaDatabase"},
43 {9, nullptr, "OpenContentStorage2"}, 43 {9, nullptr, "ActivateContentStorage"},
44 {10, nullptr, "CloseContentStorage"}, 44 {10, nullptr, "InactivateContentStorage"},
45 {11, nullptr, "OpenContentMetaDatabase2"}, 45 {11, nullptr, "ActivateContentMetaDatabase"},
46 {12, nullptr, "CloseContentMetaDatabase"}, 46 {12, nullptr, "InactivateContentMetaDatabase"},
47 }; 47 };
48 // clang-format on 48 // clang-format on
49 49
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index 5c62d42ba..ca88bf97f 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -150,7 +150,7 @@ private:
150 150
151 IPC::ResponseBuilder rb{ctx, 3}; 151 IPC::ResponseBuilder rb{ctx, 3};
152 rb.Push(RESULT_SUCCESS); 152 rb.Push(RESULT_SUCCESS);
153 rb.PushRaw<u8>(Settings::values.enable_nfc); 153 rb.PushRaw<u8>(true);
154 } 154 }
155 155
156 void GetStateOld(Kernel::HLERequestContext& ctx) { 156 void GetStateOld(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 1c4482e47..c6babdd4d 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -335,7 +335,7 @@ void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
335} 335}
336 336
337bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) { 337bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
338 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 338 std::lock_guard lock{HLE::g_hle_lock};
339 if (buffer.size() < sizeof(AmiiboFile)) { 339 if (buffer.size() < sizeof(AmiiboFile)) {
340 return false; 340 return false;
341 } 341 }
diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp
index 2663f56b1..0eb04037a 100644
--- a/src/core/hle/service/ns/ns.cpp
+++ b/src/core/hle/service/ns/ns.cpp
@@ -43,7 +43,7 @@ public:
43 {11, nullptr, "CalculateApplicationOccupiedSize"}, 43 {11, nullptr, "CalculateApplicationOccupiedSize"},
44 {16, nullptr, "PushApplicationRecord"}, 44 {16, nullptr, "PushApplicationRecord"},
45 {17, nullptr, "ListApplicationRecordContentMeta"}, 45 {17, nullptr, "ListApplicationRecordContentMeta"},
46 {19, nullptr, "LaunchApplication"}, 46 {19, nullptr, "LaunchApplicationOld"},
47 {21, nullptr, "GetApplicationContentPath"}, 47 {21, nullptr, "GetApplicationContentPath"},
48 {22, nullptr, "TerminateApplication"}, 48 {22, nullptr, "TerminateApplication"},
49 {23, nullptr, "ResolveApplicationContentPath"}, 49 {23, nullptr, "ResolveApplicationContentPath"},
@@ -96,10 +96,10 @@ public:
96 {86, nullptr, "EnableApplicationCrashReport"}, 96 {86, nullptr, "EnableApplicationCrashReport"},
97 {87, nullptr, "IsApplicationCrashReportEnabled"}, 97 {87, nullptr, "IsApplicationCrashReportEnabled"},
98 {90, nullptr, "BoostSystemMemoryResourceLimit"}, 98 {90, nullptr, "BoostSystemMemoryResourceLimit"},
99 {91, nullptr, "Unknown1"}, 99 {91, nullptr, "DeprecatedLaunchApplication"},
100 {92, nullptr, "Unknown2"}, 100 {92, nullptr, "GetRunningApplicationProgramId"},
101 {93, nullptr, "GetMainApplicationProgramIndex"}, 101 {93, nullptr, "GetMainApplicationProgramIndex"},
102 {94, nullptr, "LaunchApplication2"}, 102 {94, nullptr, "LaunchApplication"},
103 {95, nullptr, "GetApplicationLaunchInfo"}, 103 {95, nullptr, "GetApplicationLaunchInfo"},
104 {96, nullptr, "AcquireApplicationLaunchInfo"}, 104 {96, nullptr, "AcquireApplicationLaunchInfo"},
105 {97, nullptr, "GetMainApplicationProgramIndex2"}, 105 {97, nullptr, "GetMainApplicationProgramIndex2"},
@@ -163,7 +163,7 @@ public:
163 {907, nullptr, "WithdrawApplicationUpdateRequest"}, 163 {907, nullptr, "WithdrawApplicationUpdateRequest"},
164 {908, nullptr, "ListApplicationRecordInstalledContentMeta"}, 164 {908, nullptr, "ListApplicationRecordInstalledContentMeta"},
165 {909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"}, 165 {909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"},
166 {910, nullptr, "Unknown3"}, 166 {910, nullptr, "HasApplicationRecord"},
167 {911, nullptr, "SetPreInstalledApplication"}, 167 {911, nullptr, "SetPreInstalledApplication"},
168 {912, nullptr, "ClearPreInstalledApplicationFlag"}, 168 {912, nullptr, "ClearPreInstalledApplicationFlag"},
169 {1000, nullptr, "RequestVerifyApplicationDeprecated"}, 169 {1000, nullptr, "RequestVerifyApplicationDeprecated"},
@@ -219,10 +219,10 @@ public:
219 {2015, nullptr, "CompareSystemDeliveryInfo"}, 219 {2015, nullptr, "CompareSystemDeliveryInfo"},
220 {2016, nullptr, "ListNotCommittedContentMeta"}, 220 {2016, nullptr, "ListNotCommittedContentMeta"},
221 {2017, nullptr, "CreateDownloadTask"}, 221 {2017, nullptr, "CreateDownloadTask"},
222 {2018, nullptr, "Unknown4"}, 222 {2018, nullptr, "GetApplicationDeliveryInfoHash"},
223 {2050, nullptr, "Unknown5"}, 223 {2050, nullptr, "GetApplicationRightsOnClient"},
224 {2100, nullptr, "Unknown6"}, 224 {2100, nullptr, "GetApplicationTerminateResult"},
225 {2101, nullptr, "Unknown7"}, 225 {2101, nullptr, "GetRawApplicationTerminateResult"},
226 {2150, nullptr, "CreateRightsEnvironment"}, 226 {2150, nullptr, "CreateRightsEnvironment"},
227 {2151, nullptr, "DestroyRightsEnvironment"}, 227 {2151, nullptr, "DestroyRightsEnvironment"},
228 {2152, nullptr, "ActivateRightsEnvironment"}, 228 {2152, nullptr, "ActivateRightsEnvironment"},
@@ -237,10 +237,10 @@ public:
237 {2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"}, 237 {2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"},
238 {2190, nullptr, "GetRightsEnvironmentHandleForApplication"}, 238 {2190, nullptr, "GetRightsEnvironmentHandleForApplication"},
239 {2199, nullptr, "GetRightsEnvironmentCountForDebug"}, 239 {2199, nullptr, "GetRightsEnvironmentCountForDebug"},
240 {2200, nullptr, "Unknown8"}, 240 {2200, nullptr, "GetGameCardApplicationCopyIdentifier"},
241 {2201, nullptr, "Unknown9"}, 241 {2201, nullptr, "GetInstalledApplicationCopyIdentifier"},
242 {2250, nullptr, "Unknown10"}, 242 {2250, nullptr, "RequestReportActiveELicence"},
243 {2300, nullptr, "Unknown11"}, 243 {2300, nullptr, "ListEventLog"},
244 }; 244 };
245 // clang-format on 245 // clang-format on
246 246
@@ -355,6 +355,7 @@ public:
355 static const FunctionInfo functions[] = { 355 static const FunctionInfo functions[] = {
356 {21, nullptr, "GetApplicationContentPath"}, 356 {21, nullptr, "GetApplicationContentPath"},
357 {23, nullptr, "ResolveApplicationContentPath"}, 357 {23, nullptr, "ResolveApplicationContentPath"},
358 {93, nullptr, "GetRunningApplicationProgramId"},
358 }; 359 };
359 // clang-format on 360 // clang-format on
360 361
@@ -389,6 +390,11 @@ public:
389 // clang-format off 390 // clang-format off
390 static const FunctionInfo functions[] = { 391 static const FunctionInfo functions[] = {
391 {0, nullptr, "RequestLinkDevice"}, 392 {0, nullptr, "RequestLinkDevice"},
393 {1, nullptr, "RequestCleanupAllPreInstalledApplications"},
394 {2, nullptr, "RequestCleanupPreInstalledApplication"},
395 {3, nullptr, "RequestSyncRights"},
396 {4, nullptr, "RequestUnlinkDevice"},
397 {5, nullptr, "RequestRevokeAllELicense"},
392 }; 398 };
393 // clang-format on 399 // clang-format on
394 400
@@ -403,7 +409,7 @@ public:
403 static const FunctionInfo functions[] = { 409 static const FunctionInfo functions[] = {
404 {100, nullptr, "ResetToFactorySettings"}, 410 {100, nullptr, "ResetToFactorySettings"},
405 {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"}, 411 {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"},
406 {102, nullptr, "ResetToFactorySettingsForRefurbishment "}, 412 {102, nullptr, "ResetToFactorySettingsForRefurbishment"},
407 }; 413 };
408 // clang-format on 414 // clang-format on
409 415
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 0f02a1a18..4f6042b00 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -19,11 +19,11 @@ public:
19 virtual ~nvdevice() = default; 19 virtual ~nvdevice() = default;
20 union Ioctl { 20 union Ioctl {
21 u32_le raw; 21 u32_le raw;
22 BitField<0, 8, u32_le> cmd; 22 BitField<0, 8, u32> cmd;
23 BitField<8, 8, u32_le> group; 23 BitField<8, 8, u32> group;
24 BitField<16, 14, u32_le> length; 24 BitField<16, 14, u32> length;
25 BitField<30, 1, u32_le> is_in; 25 BitField<30, 1, u32> is_in;
26 BitField<31, 1, u32_le> is_out; 26 BitField<31, 1, u32> is_out;
27 }; 27 };
28 28
29 /** 29 /**
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 92acc57b1..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,11 +23,11 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
23 23
24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, 24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, 25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
26 const MathUtil::Rectangle<int>& crop_rect) { 26 const Common::Rectangle<int>& crop_rect) {
27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); 27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
28 LOG_WARNING(Service, 28 LOG_TRACE(Service,
29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
30 addr, offset, width, height, stride, format); 30 addr, offset, width, height, stride, format);
31 31
32 using PixelFormat = Tegra::FramebufferConfig::PixelFormat; 32 using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
33 const Tegra::FramebufferConfig framebuffer{ 33 const Tegra::FramebufferConfig framebuffer{
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..12f3ef825 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -18,14 +18,14 @@ class nvmap;
18class nvdisp_disp0 final : public nvdevice { 18class nvdisp_disp0 final : public nvdevice {
19public: 19public:
20 explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev);
21 ~nvdisp_disp0(); 21 ~nvdisp_disp0() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
24 24
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 25 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
27 NVFlinger::BufferQueue::BufferTransformFlags transform, 27 NVFlinger::BufferQueue::BufferTransformFlags transform,
28 const MathUtil::Rectangle<int>& crop_rect); 28 const Common::Rectangle<int>& crop_rect);
29 29
30private: 30private:
31 std::shared_ptr<nvmap> nvmap_dev; 31 std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..af62d33d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
12#include "core/hle/service/nvdrv/devices/nvmap.h" 12#include "core/hle/service/nvdrv/devices/nvmap.h"
13#include "core/memory.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
@@ -88,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
88 for (const auto& entry : entries) { 89 for (const auto& entry : entries) {
89 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", 90 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
90 entry.offset, entry.nvmap_handle, entry.pages); 91 entry.offset, entry.nvmap_handle, entry.pages);
91 Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10; 92 GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
92 auto object = nvmap_dev->GetObject(entry.nvmap_handle); 93 auto object = nvmap_dev->GetObject(entry.nvmap_handle);
93 if (!object) { 94 if (!object) {
94 LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle); 95 LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -101,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
101 u64 size = static_cast<u64>(entry.pages) << 0x10; 102 u64 size = static_cast<u64>(entry.pages) << 0x10;
102 ASSERT(size <= object->size); 103 ASSERT(size <= object->size);
103 104
104 Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size); 105 GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
105 ASSERT(returned == offset); 106 ASSERT(returned == offset);
106 } 107 }
107 std::memcpy(output.data(), entries.data(), output.size()); 108 std::memcpy(output.data(), entries.data(), output.size());
@@ -172,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
172 return 0; 173 return 0;
173 } 174 }
174 175
175 auto& system_instance = Core::System::GetInstance(); 176 params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
176 177 itr->second.size);
177 // Remove this memory region from the rasterizer cache.
178 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
182
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184
185 buffer_mappings.erase(itr->second.offset); 178 buffer_mappings.erase(itr->second.offset);
186 179
187 std::memcpy(output.data(), &params, output.size()); 180 std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index d57a54ee8..45812d238 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,7 @@
5#include <cstring> 5#include <cstring>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/core_timing_util.h" 10#include "core/core_timing_util.h"
10#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
184 185
185 IoctlGetGpuTime params{}; 186 IoctlGetGpuTime params{};
186 std::memcpy(&params, input.data(), input.size()); 187 std::memcpy(&params, input.data(), input.size());
187 params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks()); 188 params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
188 std::memcpy(output.data(), &params, output.size()); 189 std::memcpy(output.data(), &params, output.size());
189 return 0; 190 return 0;
190} 191}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index fe311b069..5b4889910 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -17,7 +17,7 @@ namespace Service::Nvidia {
17class NVDRV final : public ServiceFramework<NVDRV> { 17class NVDRV final : public ServiceFramework<NVDRV> {
18public: 18public:
19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name); 19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
20 ~NVDRV(); 20 ~NVDRV() override;
21 21
22private: 22private:
23 void Open(Kernel::HLERequestContext& ctx); 23 void Open(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/nvdrv/nvmemp.h b/src/core/hle/service/nvdrv/nvmemp.h
index 5a4dfc1f9..6eafb1346 100644
--- a/src/core/hle/service/nvdrv/nvmemp.h
+++ b/src/core/hle/service/nvdrv/nvmemp.h
@@ -11,7 +11,7 @@ namespace Service::Nvidia {
11class NVMEMP final : public ServiceFramework<NVMEMP> { 11class NVMEMP final : public ServiceFramework<NVMEMP> {
12public: 12public:
13 NVMEMP(); 13 NVMEMP();
14 ~NVMEMP(); 14 ~NVMEMP() override;
15 15
16private: 16private:
17 void Cmd0(Kernel::HLERequestContext& ctx); 17 void Cmd0(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 63}
64 64
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const MathUtil::Rectangle<int>& crop_rect) { 66 const Common::Rectangle<int>& crop_rect) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 67 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 68 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 69 ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index b171f256c..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -13,10 +13,6 @@
13#include "core/hle/kernel/object.h" 13#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/writable_event.h" 14#include "core/hle/kernel/writable_event.h"
15 15
16namespace CoreTiming {
17struct EventType;
18}
19
20namespace Service::NVFlinger { 16namespace Service::NVFlinger {
21 17
22struct IGBPBuffer { 18struct IGBPBuffer {
@@ -71,14 +67,14 @@ public:
71 Status status = Status::Free; 67 Status status = Status::Free;
72 IGBPBuffer igbp_buffer; 68 IGBPBuffer igbp_buffer;
73 BufferTransformFlags transform; 69 BufferTransformFlags transform;
74 MathUtil::Rectangle<int> crop_rect; 70 Common::Rectangle<int> crop_rect;
75 }; 71 };
76 72
77 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
78 std::optional<u32> DequeueBuffer(u32 width, u32 height); 74 std::optional<u32> DequeueBuffer(u32 width, u32 height);
79 const IGBPBuffer& RequestBuffer(u32 slot) const; 75 const IGBPBuffer& RequestBuffer(u32 slot) const;
80 void QueueBuffer(u32 slot, BufferTransformFlags transform, 76 void QueueBuffer(u32 slot, BufferTransformFlags transform,
81 const MathUtil::Rectangle<int>& crop_rect); 77 const Common::Rectangle<int>& crop_rect);
82 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
83 void ReleaseBuffer(u32 slot); 79 void ReleaseBuffer(u32 slot);
84 u32 Query(QueryType type); 80 u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 6a613aeab..c7f5bbf28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -5,7 +5,6 @@
5#include <algorithm> 5#include <algorithm>
6#include <optional> 6#include <optional>
7 7
8#include "common/alignment.h"
9#include "common/assert.h" 8#include "common/assert.h"
10#include "common/logging/log.h" 9#include "common/logging/log.h"
11#include "common/microprofile.h" 10#include "common/microprofile.h"
@@ -15,124 +14,170 @@
15#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
16#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
17#include "core/hle/kernel/readable_event.h" 16#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 17#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
20#include "core/hle/service/nvdrv/nvdrv.h" 18#include "core/hle/service/nvdrv/nvdrv.h"
21#include "core/hle/service/nvflinger/buffer_queue.h" 19#include "core/hle/service/nvflinger/buffer_queue.h"
22#include "core/hle/service/nvflinger/nvflinger.h" 20#include "core/hle/service/nvflinger/nvflinger.h"
21#include "core/hle/service/vi/display/vi_display.h"
22#include "core/hle/service/vi/layer/vi_layer.h"
23#include "core/perf_stats.h" 23#include "core/perf_stats.h"
24#include "video_core/renderer_base.h" 24#include "video_core/renderer_base.h"
25#include "video_core/video_core.h"
26 25
27namespace Service::NVFlinger { 26namespace Service::NVFlinger {
28 27
29constexpr std::size_t SCREEN_REFRESH_RATE = 60; 28constexpr std::size_t SCREEN_REFRESH_RATE = 60;
30constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); 29constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
31 30
32NVFlinger::NVFlinger() { 31NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
33 // Add the different displays to the list of displays.
34 displays.emplace_back(0, "Default"); 32 displays.emplace_back(0, "Default");
35 displays.emplace_back(1, "External"); 33 displays.emplace_back(1, "External");
36 displays.emplace_back(2, "Edid"); 34 displays.emplace_back(2, "Edid");
37 displays.emplace_back(3, "Internal"); 35 displays.emplace_back(3, "Internal");
36 displays.emplace_back(4, "Null");
38 37
39 // Schedule the screen composition events 38 // Schedule the screen composition events
40 composition_event = 39 composition_event =
41 CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { 40 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
42 Compose(); 41 Compose();
43 CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event); 42 this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
44 }); 43 });
45 44
46 CoreTiming::ScheduleEvent(frame_ticks, composition_event); 45 core_timing.ScheduleEvent(frame_ticks, composition_event);
47} 46}
48 47
49NVFlinger::~NVFlinger() { 48NVFlinger::~NVFlinger() {
50 CoreTiming::UnscheduleEvent(composition_event, 0); 49 core_timing.UnscheduleEvent(composition_event, 0);
51} 50}
52 51
53void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { 52void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
54 nvdrv = std::move(instance); 53 nvdrv = std::move(instance);
55} 54}
56 55
57u64 NVFlinger::OpenDisplay(std::string_view name) { 56std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
58 LOG_WARNING(Service, "Opening display {}", name); 57 LOG_DEBUG(Service, "Opening \"{}\" display", name);
59 58
60 // TODO(Subv): Currently we only support the Default display. 59 // TODO(Subv): Currently we only support the Default display.
61 ASSERT(name == "Default"); 60 ASSERT(name == "Default");
62 61
63 auto itr = std::find_if(displays.begin(), displays.end(), 62 const auto itr =
64 [&](const Display& display) { return display.name == name; }); 63 std::find_if(displays.begin(), displays.end(),
65 64 [&](const VI::Display& display) { return display.GetName() == name; });
66 ASSERT(itr != displays.end()); 65 if (itr == displays.end()) {
66 return {};
67 }
67 68
68 return itr->id; 69 return itr->GetID();
69} 70}
70 71
71u64 NVFlinger::CreateLayer(u64 display_id) { 72std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
72 auto& display = GetDisplay(display_id); 73 auto* const display = FindDisplay(display_id);
73 74
74 ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment"); 75 if (display == nullptr) {
76 return {};
77 }
75 78
76 u64 layer_id = next_layer_id++; 79 const u64 layer_id = next_layer_id++;
77 u32 buffer_queue_id = next_buffer_queue_id++; 80 const u32 buffer_queue_id = next_buffer_queue_id++;
78 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); 81 buffer_queues.emplace_back(buffer_queue_id, layer_id);
79 display.layers.emplace_back(layer_id, buffer_queue); 82 display->CreateLayer(layer_id, buffer_queues.back());
80 buffer_queues.emplace_back(std::move(buffer_queue));
81 return layer_id; 83 return layer_id;
82} 84}
83 85
84u32 NVFlinger::GetBufferQueueId(u64 display_id, u64 layer_id) { 86std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
85 const auto& layer = GetLayer(display_id, layer_id); 87 const auto* const layer = FindLayer(display_id, layer_id);
86 return layer.buffer_queue->GetId(); 88
89 if (layer == nullptr) {
90 return {};
91 }
92
93 return layer->GetBufferQueue().GetId();
87} 94}
88 95
89Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) { 96Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
90 return GetDisplay(display_id).vsync_event.readable; 97 auto* const display = FindDisplay(display_id);
98
99 if (display == nullptr) {
100 return nullptr;
101 }
102
103 return display->GetVSyncEvent();
91} 104}
92 105
93std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const { 106BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
94 auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 107 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
95 [&](const auto& queue) { return queue->GetId() == id; }); 108 [id](const auto& queue) { return queue.GetId() == id; });
96 109
97 ASSERT(itr != buffer_queues.end()); 110 ASSERT(itr != buffer_queues.end());
98 return *itr; 111 return *itr;
99} 112}
100 113
101Display& NVFlinger::GetDisplay(u64 display_id) { 114const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
102 auto itr = std::find_if(displays.begin(), displays.end(), 115 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
103 [&](const Display& display) { return display.id == display_id; }); 116 [id](const auto& queue) { return queue.GetId() == id; });
104 117
105 ASSERT(itr != displays.end()); 118 ASSERT(itr != buffer_queues.end());
106 return *itr; 119 return *itr;
107} 120}
108 121
109Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) { 122VI::Display* NVFlinger::FindDisplay(u64 display_id) {
110 auto& display = GetDisplay(display_id); 123 const auto itr =
124 std::find_if(displays.begin(), displays.end(),
125 [&](const VI::Display& display) { return display.GetID() == display_id; });
111 126
112 auto itr = std::find_if(display.layers.begin(), display.layers.end(), 127 if (itr == displays.end()) {
113 [&](const Layer& layer) { return layer.id == layer_id; }); 128 return nullptr;
129 }
114 130
115 ASSERT(itr != display.layers.end()); 131 return &*itr;
116 return *itr; 132}
133
134const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
135 const auto itr =
136 std::find_if(displays.begin(), displays.end(),
137 [&](const VI::Display& display) { return display.GetID() == display_id; });
138
139 if (itr == displays.end()) {
140 return nullptr;
141 }
142
143 return &*itr;
144}
145
146VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
147 auto* const display = FindDisplay(display_id);
148
149 if (display == nullptr) {
150 return nullptr;
151 }
152
153 return display->FindLayer(layer_id);
154}
155
156const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
157 const auto* const display = FindDisplay(display_id);
158
159 if (display == nullptr) {
160 return nullptr;
161 }
162
163 return display->FindLayer(layer_id);
117} 164}
118 165
119void NVFlinger::Compose() { 166void NVFlinger::Compose() {
120 for (auto& display : displays) { 167 for (auto& display : displays) {
121 // Trigger vsync for this display at the end of drawing 168 // Trigger vsync for this display at the end of drawing
122 SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); 169 SCOPE_EXIT({ display.SignalVSyncEvent(); });
123 170
124 // Don't do anything for displays without layers. 171 // Don't do anything for displays without layers.
125 if (display.layers.empty()) 172 if (!display.HasLayers())
126 continue; 173 continue;
127 174
128 // TODO(Subv): Support more than 1 layer. 175 // TODO(Subv): Support more than 1 layer.
129 ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); 176 VI::Layer& layer = display.GetLayer(0);
130 177 auto& buffer_queue = layer.GetBufferQueue();
131 Layer& layer = display.layers[0];
132 auto& buffer_queue = layer.buffer_queue;
133 178
134 // Search for a queued buffer and acquire it 179 // Search for a queued buffer and acquire it
135 auto buffer = buffer_queue->AcquireBuffer(); 180 auto buffer = buffer_queue.AcquireBuffer();
136 181
137 MicroProfileFlip(); 182 MicroProfileFlip();
138 183
@@ -141,11 +186,11 @@ void NVFlinger::Compose() {
141 186
142 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
143 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
144 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
145 continue; 190 continue;
146 } 191 }
147 192
148 auto& igbp_buffer = buffer->get().igbp_buffer; 193 const auto& igbp_buffer = buffer->get().igbp_buffer;
149 194
150 // Now send the buffer to the GPU for drawing. 195 // Now send the buffer to the GPU for drawing.
151 // TODO(Subv): Support more than just disp0. The display device selection is probably based 196 // TODO(Subv): Support more than just disp0. The display device selection is probably based
@@ -157,19 +202,8 @@ void NVFlinger::Compose() {
157 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 202 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
158 buffer->get().transform, buffer->get().crop_rect); 203 buffer->get().transform, buffer->get().crop_rect);
159 204
160 buffer_queue->ReleaseBuffer(buffer->get().slot); 205 buffer_queue.ReleaseBuffer(buffer->get().slot);
161 } 206 }
162} 207}
163 208
164Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
165Layer::~Layer() = default;
166
167Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
168 auto& kernel = Core::System::GetInstance().Kernel();
169 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
170 fmt::format("Display VSync Event {}", id));
171}
172
173Display::~Display() = default;
174
175} // namespace Service::NVFlinger 209} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 9abba555b..c0a83fffb 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <optional>
8#include <string> 9#include <string>
9#include <string_view> 10#include <string_view>
10#include <vector> 11#include <vector>
@@ -12,9 +13,10 @@
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14 15
15namespace CoreTiming { 16namespace Core::Timing {
17class CoreTiming;
16struct EventType; 18struct EventType;
17} 19} // namespace Core::Timing
18 20
19namespace Kernel { 21namespace Kernel {
20class ReadableEvent; 22class ReadableEvent;
@@ -23,69 +25,72 @@ class WritableEvent;
23 25
24namespace Service::Nvidia { 26namespace Service::Nvidia {
25class Module; 27class Module;
26} 28} // namespace Service::Nvidia
29
30namespace Service::VI {
31class Display;
32class Layer;
33} // namespace Service::VI
27 34
28namespace Service::NVFlinger { 35namespace Service::NVFlinger {
29 36
30class BufferQueue; 37class BufferQueue;
31 38
32struct Layer {
33 Layer(u64 id, std::shared_ptr<BufferQueue> queue);
34 ~Layer();
35
36 u64 id;
37 std::shared_ptr<BufferQueue> buffer_queue;
38};
39
40struct Display {
41 Display(u64 id, std::string name);
42 ~Display();
43
44 u64 id;
45 std::string name;
46
47 std::vector<Layer> layers;
48 Kernel::EventPair vsync_event;
49};
50
51class NVFlinger final { 39class NVFlinger final {
52public: 40public:
53 NVFlinger(); 41 explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
54 ~NVFlinger(); 42 ~NVFlinger();
55 43
56 /// Sets the NVDrv module instance to use to send buffers to the GPU. 44 /// Sets the NVDrv module instance to use to send buffers to the GPU.
57 void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance); 45 void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);
58 46
59 /// Opens the specified display and returns the id. 47 /// Opens the specified display and returns the ID.
60 u64 OpenDisplay(std::string_view name); 48 ///
49 /// If an invalid display name is provided, then an empty optional is returned.
50 std::optional<u64> OpenDisplay(std::string_view name);
61 51
62 /// Creates a layer on the specified display and returns the layer id. 52 /// Creates a layer on the specified display and returns the layer ID.
63 u64 CreateLayer(u64 display_id); 53 ///
54 /// If an invalid display ID is specified, then an empty optional is returned.
55 std::optional<u64> CreateLayer(u64 display_id);
64 56
65 /// Gets the buffer queue id of the specified layer in the specified display. 57 /// Finds the buffer queue ID of the specified layer in the specified display.
66 u32 GetBufferQueueId(u64 display_id, u64 layer_id); 58 ///
59 /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
60 std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;
67 61
68 /// Gets the vsync event for the specified display. 62 /// Gets the vsync event for the specified display.
69 Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id); 63 ///
64 /// If an invalid display ID is provided, then nullptr is returned.
65 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
66
67 /// Obtains a buffer queue identified by the ID.
68 BufferQueue& FindBufferQueue(u32 id);
70 69
71 /// Obtains a buffer queue identified by the id. 70 /// Obtains a buffer queue identified by the ID.
72 std::shared_ptr<BufferQueue> GetBufferQueue(u32 id) const; 71 const BufferQueue& FindBufferQueue(u32 id) const;
73 72
74 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 73 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
75 /// finished. 74 /// finished.
76 void Compose(); 75 void Compose();
77 76
78private: 77private:
79 /// Returns the display identified by the specified id. 78 /// Finds the display identified by the specified ID.
80 Display& GetDisplay(u64 display_id); 79 VI::Display* FindDisplay(u64 display_id);
81 80
82 /// Returns the layer identified by the specified id in the desired display. 81 /// Finds the display identified by the specified ID.
83 Layer& GetLayer(u64 display_id, u64 layer_id); 82 const VI::Display* FindDisplay(u64 display_id) const;
83
84 /// Finds the layer identified by the specified ID in the desired display.
85 VI::Layer* FindLayer(u64 display_id, u64 layer_id);
86
87 /// Finds the layer identified by the specified ID in the desired display.
88 const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
84 89
85 std::shared_ptr<Nvidia::Module> nvdrv; 90 std::shared_ptr<Nvidia::Module> nvdrv;
86 91
87 std::vector<Display> displays; 92 std::vector<VI::Display> displays;
88 std::vector<std::shared_ptr<BufferQueue>> buffer_queues; 93 std::vector<BufferQueue> buffer_queues;
89 94
90 /// Id to use for the next layer that is created, this counter is shared among all displays. 95 /// Id to use for the next layer that is created, this counter is shared among all displays.
91 u64 next_layer_id = 1; 96 u64 next_layer_id = 1;
@@ -93,8 +98,11 @@ private:
93 /// layers. 98 /// layers.
94 u32 next_buffer_queue_id = 1; 99 u32 next_buffer_queue_id = 1;
95 100
96 /// CoreTiming event that handles screen composition. 101 /// Event that handles screen composition.
97 CoreTiming::EventType* composition_event; 102 Core::Timing::EventType* composition_event;
103
104 /// Core timing instance for registering/unregistering the composition event.
105 Core::Timing::CoreTiming& core_timing;
98}; 106};
99 107
100} // namespace Service::NVFlinger 108} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp
index 53e7da9c3..6b27dc4a3 100644
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -13,7 +13,7 @@ public:
13 explicit BootMode() : ServiceFramework{"pm:bm"} { 13 explicit BootMode() : ServiceFramework{"pm:bm"} {
14 static const FunctionInfo functions[] = { 14 static const FunctionInfo functions[] = {
15 {0, &BootMode::GetBootMode, "GetBootMode"}, 15 {0, &BootMode::GetBootMode, "GetBootMode"},
16 {1, nullptr, "SetMaintenanceBoot"}, 16 {1, &BootMode::SetMaintenanceBoot, "SetMaintenanceBoot"},
17 }; 17 };
18 RegisterHandlers(functions); 18 RegisterHandlers(functions);
19 } 19 }
@@ -24,8 +24,19 @@ private:
24 24
25 IPC::ResponseBuilder rb{ctx, 3}; 25 IPC::ResponseBuilder rb{ctx, 3};
26 rb.Push(RESULT_SUCCESS); 26 rb.Push(RESULT_SUCCESS);
27 rb.Push<u32>(static_cast<u32>(SystemBootMode::Normal)); // Normal boot mode 27 rb.PushEnum(boot_mode);
28 } 28 }
29
30 void SetMaintenanceBoot(Kernel::HLERequestContext& ctx) {
31 LOG_DEBUG(Service_PM, "called");
32
33 boot_mode = SystemBootMode::Maintenance;
34
35 IPC::ResponseBuilder rb{ctx, 2};
36 rb.Push(RESULT_SUCCESS);
37 }
38
39 SystemBootMode boot_mode = SystemBootMode::Normal;
29}; 40};
30 41
31class DebugMonitor final : public ServiceFramework<DebugMonitor> { 42class DebugMonitor final : public ServiceFramework<DebugMonitor> {
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h
index 370f2ed72..cc8d3f215 100644
--- a/src/core/hle/service/pm/pm.h
+++ b/src/core/hle/service/pm/pm.h
@@ -9,7 +9,12 @@ class ServiceManager;
9} 9}
10 10
11namespace Service::PM { 11namespace Service::PM {
12enum class SystemBootMode : u32 { Normal = 0, Maintenance = 1 }; 12
13enum class SystemBootMode {
14 Normal,
15 Maintenance,
16};
17
13/// Registers all PM services with the specified service manager. 18/// Registers all PM services with the specified service manager.
14void InstallInterfaces(SM::ServiceManager& service_manager); 19void InstallInterfaces(SM::ServiceManager& service_manager);
15 20
diff --git a/src/core/hle/service/psc/psc.cpp b/src/core/hle/service/psc/psc.cpp
index 0ba0a4076..53ec6b031 100644
--- a/src/core/hle/service/psc/psc.cpp
+++ b/src/core/hle/service/psc/psc.cpp
@@ -17,13 +17,13 @@ public:
17 explicit PSC_C() : ServiceFramework{"psc:c"} { 17 explicit PSC_C() : ServiceFramework{"psc:c"} {
18 // clang-format off 18 // clang-format off
19 static const FunctionInfo functions[] = { 19 static const FunctionInfo functions[] = {
20 {0, nullptr, "Unknown1"}, 20 {0, nullptr, "Initialize"},
21 {1, nullptr, "Unknown2"}, 21 {1, nullptr, "DispatchRequest"},
22 {2, nullptr, "Unknown3"}, 22 {2, nullptr, "GetResult"},
23 {3, nullptr, "Unknown4"}, 23 {3, nullptr, "GetState"},
24 {4, nullptr, "Unknown5"}, 24 {4, nullptr, "Cancel"},
25 {5, nullptr, "Unknown6"}, 25 {5, nullptr, "PrintModuleInformation"},
26 {6, nullptr, "Unknown7"}, 26 {6, nullptr, "GetModuleInformation"},
27 }; 27 };
28 // clang-format on 28 // clang-format on
29 29
@@ -39,7 +39,8 @@ public:
39 {0, nullptr, "Initialize"}, 39 {0, nullptr, "Initialize"},
40 {1, nullptr, "GetRequest"}, 40 {1, nullptr, "GetRequest"},
41 {2, nullptr, "Acknowledge"}, 41 {2, nullptr, "Acknowledge"},
42 {3, nullptr, "Unknown1"}, 42 {3, nullptr, "Finalize"},
43 {4, nullptr, "AcknowledgeEx"},
43 }; 44 };
44 // clang-format on 45 // clang-format on
45 46
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index d25b80ab0..00806b0ed 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -11,7 +11,6 @@
11#include "core/hle/ipc.h" 11#include "core/hle/ipc.h"
12#include "core/hle/ipc_helpers.h" 12#include "core/hle/ipc_helpers.h"
13#include "core/hle/kernel/client_port.h" 13#include "core/hle/kernel/client_port.h"
14#include "core/hle/kernel/handle_table.h"
15#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/server_port.h" 16#include "core/hle/kernel/server_port.h"
@@ -76,7 +75,8 @@ namespace Service {
76 * Creates a function string for logging, complete with the name (or header code, depending 75 * Creates a function string for logging, complete with the name (or header code, depending
77 * on what's passed in) the port name, and all the cmd_buff arguments. 76 * on what's passed in) the port name, and all the cmd_buff arguments.
78 */ 77 */
79[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name, 78[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
79 std::string_view port_name,
80 const u32* cmd_buff) { 80 const u32* cmd_buff) {
81 // Number of params == bits 0-5 + bits 6-11 81 // Number of params == bits 0-5 + bits 6-11
82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); 82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +158,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
158 return ReportUnimplementedFunction(ctx, info); 158 return ReportUnimplementedFunction(ctx, info);
159 } 159 }
160 160
161 LOG_TRACE( 161 LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
162 Service, "{}",
163 MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
164 handler_invoker(this, info->handler_callback, ctx); 162 handler_invoker(this, info->handler_callback, ctx);
165} 163}
166 164
@@ -169,7 +167,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
169 case IPC::CommandType::Close: { 167 case IPC::CommandType::Close: {
170 IPC::ResponseBuilder rb{context, 2}; 168 IPC::ResponseBuilder rb{context, 2};
171 rb.Push(RESULT_SUCCESS); 169 rb.Push(RESULT_SUCCESS);
172 return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead); 170 return IPC::ERR_REMOTE_PROCESS_DEAD;
173 } 171 }
174 case IPC::CommandType::ControlWithContext: 172 case IPC::CommandType::ControlWithContext:
175 case IPC::CommandType::Control: { 173 case IPC::CommandType::Control: {
@@ -194,10 +192,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
194// Module interface 192// Module interface
195 193
196/// Initialize ServiceManager 194/// Initialize ServiceManager
197void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) { 195void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
196 FileSys::VfsFilesystem& vfs) {
198 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it 197 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
199 // here and pass it into the respective InstallInterfaces functions. 198 // here and pass it into the respective InstallInterfaces functions.
200 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(); 199 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
201 200
202 SM::ServiceManager::InstallInterfaces(sm); 201 SM::ServiceManager::InstallInterfaces(sm);
203 202
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 029533628..abbfe5524 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -14,6 +14,14 @@
14//////////////////////////////////////////////////////////////////////////////////////////////////// 14////////////////////////////////////////////////////////////////////////////////////////////////////
15// Namespace Service 15// Namespace Service
16 16
17namespace Core {
18class System;
19}
20
21namespace FileSys {
22class VfsFilesystem;
23}
24
17namespace Kernel { 25namespace Kernel {
18class ClientPort; 26class ClientPort;
19class ServerPort; 27class ServerPort;
@@ -21,10 +29,6 @@ class ServerSession;
21class HLERequestContext; 29class HLERequestContext;
22} // namespace Kernel 30} // namespace Kernel
23 31
24namespace FileSys {
25class VfsFilesystem;
26}
27
28namespace Service { 32namespace Service {
29 33
30namespace SM { 34namespace SM {
@@ -86,7 +90,7 @@ private:
86 Kernel::HLERequestContext& ctx); 90 Kernel::HLERequestContext& ctx);
87 91
88 ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker); 92 ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker);
89 ~ServiceFrameworkBase(); 93 ~ServiceFrameworkBase() override;
90 94
91 void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n); 95 void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
92 void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info); 96 void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
@@ -178,7 +182,8 @@ private:
178}; 182};
179 183
180/// Initialize ServiceManager 184/// Initialize ServiceManager
181void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs); 185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
186 FileSys::VfsFilesystem& vfs);
182 187
183/// Shutdown ServiceManager 188/// Shutdown ServiceManager
184void Shutdown(); 189void Shutdown();
diff --git a/src/core/hle/service/set/set_cal.h b/src/core/hle/service/set/set_cal.h
index 583036eac..a0677e815 100644
--- a/src/core/hle/service/set/set_cal.h
+++ b/src/core/hle/service/set/set_cal.h
@@ -11,7 +11,7 @@ namespace Service::Set {
11class SET_CAL final : public ServiceFramework<SET_CAL> { 11class SET_CAL final : public ServiceFramework<SET_CAL> {
12public: 12public:
13 explicit SET_CAL(); 13 explicit SET_CAL();
14 ~SET_CAL(); 14 ~SET_CAL() override;
15}; 15};
16 16
17} // namespace Service::Set 17} // namespace Service::Set
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index c9b4da5b0..ecee554bf 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -2,13 +2,88 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/file_sys/errors.h"
8#include "core/file_sys/system_archive/system_version.h"
6#include "core/hle/ipc_helpers.h" 9#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/client_port.h" 10#include "core/hle/kernel/client_port.h"
11#include "core/hle/service/filesystem/filesystem.h"
8#include "core/hle/service/set/set_sys.h" 12#include "core/hle/service/set/set_sys.h"
9 13
10namespace Service::Set { 14namespace Service::Set {
11 15
16namespace {
17constexpr u64 SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET = 0x05;
18
19enum class GetFirmwareVersionType {
20 Version1,
21 Version2,
22};
23
24void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionType type) {
25 LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'",
26 FileSys::SystemArchive::GetLongDisplayVersion());
27
28 ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100,
29 "FirmwareVersion output buffer must be 0x100 bytes in size!");
30
31 // Instead of using the normal procedure of checking for the real system archive and if it
32 // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a
33 // used is using a really old or really new SystemVersion title. The synthesized one ensures
34 // consistence (currently reports as 5.1.0-0.0)
35 const auto archive = FileSys::SystemArchive::SystemVersion();
36
37 const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) {
38 LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
39 desc.c_str());
40 IPC::ResponseBuilder rb{ctx, 2};
41 rb.Push(code);
42 };
43
44 if (archive == nullptr) {
45 early_exit_failure("The system version archive couldn't be synthesized.",
46 FileSys::ERROR_FAILED_MOUNT_ARCHIVE);
47 return;
48 }
49
50 const auto ver_file = archive->GetFile("file");
51 if (ver_file == nullptr) {
52 early_exit_failure("The system version archive didn't contain the file 'file'.",
53 FileSys::ERROR_INVALID_ARGUMENT);
54 return;
55 }
56
57 auto data = ver_file->ReadAllBytes();
58 if (data.size() != 0x100) {
59 early_exit_failure("The system version file 'file' was not the correct size.",
60 FileSys::ERROR_OUT_OF_BOUNDS);
61 return;
62 }
63
64 // If the command is GetFirmwareVersion (as opposed to GetFirmwareVersion2), hardware will
65 // zero out the REVISION_MINOR field.
66 if (type == GetFirmwareVersionType::Version1) {
67 data[SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET] = 0;
68 }
69
70 ctx.WriteBuffer(data);
71
72 IPC::ResponseBuilder rb{ctx, 2};
73 rb.Push(RESULT_SUCCESS);
74}
75} // Anonymous namespace
76
77void SET_SYS::GetFirmwareVersion(Kernel::HLERequestContext& ctx) {
78 LOG_DEBUG(Service_SET, "called");
79 GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1);
80}
81
82void SET_SYS::GetFirmwareVersion2(Kernel::HLERequestContext& ctx) {
83 LOG_DEBUG(Service_SET, "called");
84 GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2);
85}
86
12void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) { 87void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) {
13 LOG_DEBUG(Service_SET, "called"); 88 LOG_DEBUG(Service_SET, "called");
14 89
@@ -33,8 +108,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
33 {0, nullptr, "SetLanguageCode"}, 108 {0, nullptr, "SetLanguageCode"},
34 {1, nullptr, "SetNetworkSettings"}, 109 {1, nullptr, "SetNetworkSettings"},
35 {2, nullptr, "GetNetworkSettings"}, 110 {2, nullptr, "GetNetworkSettings"},
36 {3, nullptr, "GetFirmwareVersion"}, 111 {3, &SET_SYS::GetFirmwareVersion, "GetFirmwareVersion"},
37 {4, nullptr, "GetFirmwareVersion2"}, 112 {4, &SET_SYS::GetFirmwareVersion2, "GetFirmwareVersion2"},
38 {5, nullptr, "GetFirmwareVersionDigest"}, 113 {5, nullptr, "GetFirmwareVersionDigest"},
39 {7, nullptr, "GetLockScreenFlag"}, 114 {7, nullptr, "GetLockScreenFlag"},
40 {8, nullptr, "SetLockScreenFlag"}, 115 {8, nullptr, "SetLockScreenFlag"},
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index f602f3c77..13ee2cf46 100644
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -20,6 +20,8 @@ private:
20 BasicBlack = 1, 20 BasicBlack = 1,
21 }; 21 };
22 22
23 void GetFirmwareVersion(Kernel::HLERequestContext& ctx);
24 void GetFirmwareVersion2(Kernel::HLERequestContext& ctx);
23 void GetColorSetId(Kernel::HLERequestContext& ctx); 25 void GetColorSetId(Kernel::HLERequestContext& ctx);
24 void SetColorSetId(Kernel::HLERequestContext& ctx); 26 void SetColorSetId(Kernel::HLERequestContext& ctx);
25 27
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index bef25433e..b9d6381b4 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -67,7 +67,7 @@ public:
67 if (port == nullptr) { 67 if (port == nullptr) {
68 return nullptr; 68 return nullptr;
69 } 69 }
70 return std::static_pointer_cast<T>(port->hle_handler); 70 return std::static_pointer_cast<T>(port->GetHLEHandler());
71 } 71 }
72 72
73 void InvokeControlRequest(Kernel::HLERequestContext& context); 73 void InvokeControlRequest(Kernel::HLERequestContext& context);
diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp
index 13ab1d31e..852e71e4b 100644
--- a/src/core/hle/service/sockets/sfdnsres.cpp
+++ b/src/core/hle/service/sockets/sfdnsres.cpp
@@ -8,12 +8,20 @@
8namespace Service::Sockets { 8namespace Service::Sockets {
9 9
10void SFDNSRES::GetAddrInfo(Kernel::HLERequestContext& ctx) { 10void SFDNSRES::GetAddrInfo(Kernel::HLERequestContext& ctx) {
11 struct Parameters {
12 u8 use_nsd_resolve;
13 u32 unknown;
14 u64 process_id;
15 };
16
11 IPC::RequestParser rp{ctx}; 17 IPC::RequestParser rp{ctx};
18 const auto parameters = rp.PopRaw<Parameters>();
12 19
13 LOG_WARNING(Service, "(STUBBED) called"); 20 LOG_WARNING(Service,
21 "(STUBBED) called. use_nsd_resolve={}, unknown=0x{:08X}, process_id=0x{:016X}",
22 parameters.use_nsd_resolve, parameters.unknown, parameters.process_id);
14 23
15 IPC::ResponseBuilder rb{ctx, 2}; 24 IPC::ResponseBuilder rb{ctx, 2};
16
17 rb.Push(RESULT_SUCCESS); 25 rb.Push(RESULT_SUCCESS);
18} 26}
19 27
diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp
index 8db0c2f13..e724d4ab8 100644
--- a/src/core/hle/service/spl/module.cpp
+++ b/src/core/hle/service/spl/module.cpp
@@ -26,9 +26,7 @@ Module::Interface::~Interface() = default;
26void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) { 26void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
27 LOG_DEBUG(Service_SPL, "called"); 27 LOG_DEBUG(Service_SPL, "called");
28 28
29 IPC::RequestParser rp{ctx}; 29 const std::size_t size = ctx.GetWriteBufferSize();
30
31 std::size_t size = ctx.GetWriteBufferSize();
32 30
33 std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max()); 31 std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max());
34 std::vector<u8> data(size); 32 std::vector<u8> data(size);
diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp
index af40a1815..f7f87a958 100644
--- a/src/core/hle/service/ssl/ssl.cpp
+++ b/src/core/hle/service/ssl/ssl.cpp
@@ -64,13 +64,19 @@ public:
64 }; 64 };
65 RegisterHandlers(functions); 65 RegisterHandlers(functions);
66 } 66 }
67 ~ISslContext() = default;
68 67
69private: 68private:
70 void SetOption(Kernel::HLERequestContext& ctx) { 69 void SetOption(Kernel::HLERequestContext& ctx) {
71 LOG_WARNING(Service_SSL, "(STUBBED) called"); 70 struct Parameters {
71 u8 enable;
72 u32 option;
73 };
72 74
73 IPC::RequestParser rp{ctx}; 75 IPC::RequestParser rp{ctx};
76 const auto parameters = rp.PopRaw<Parameters>();
77
78 LOG_WARNING(Service_SSL, "(STUBBED) called. enable={}, option={}", parameters.enable,
79 parameters.option);
74 80
75 IPC::ResponseBuilder rb{ctx, 2}; 81 IPC::ResponseBuilder rb{ctx, 2};
76 rb.Push(RESULT_SUCCESS); 82 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index c13640ad8..aa115935d 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -5,6 +5,7 @@
5#include <chrono> 5#include <chrono>
6#include <ctime> 6#include <ctime>
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/core_timing_util.h" 10#include "core/core_timing_util.h"
10#include "core/hle/ipc_helpers.h" 11#include "core/hle/ipc_helpers.h"
@@ -106,8 +107,9 @@ private:
106 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { 107 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
107 LOG_DEBUG(Service_Time, "called"); 108 LOG_DEBUG(Service_Time, "called");
108 109
109 SteadyClockTimePoint steady_clock_time_point{ 110 const auto& core_timing = Core::System::GetInstance().CoreTiming();
110 CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000}; 111 const SteadyClockTimePoint steady_clock_time_point{
112 Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
111 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; 113 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
112 rb.Push(RESULT_SUCCESS); 114 rb.Push(RESULT_SUCCESS);
113 rb.PushRaw(steady_clock_time_point); 115 rb.PushRaw(steady_clock_time_point);
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
281 return; 283 return;
282 } 284 }
283 285
286 const auto& core_timing = Core::System::GetInstance().CoreTiming();
284 const SteadyClockTimePoint steady_clock_time_point{ 287 const SteadyClockTimePoint steady_clock_time_point{
285 CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}}; 288 Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};
286 289
287 CalendarTime calendar_time{}; 290 CalendarTime calendar_time{};
288 calendar_time.year = tm->tm_year + 1900; 291 calendar_time.year = tm->tm_year + 1900;
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
new file mode 100644
index 000000000..01d80311b
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -0,0 +1,71 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "core/core.h"
12#include "core/hle/kernel/readable_event.h"
13#include "core/hle/service/vi/display/vi_display.h"
14#include "core/hle/service/vi/layer/vi_layer.h"
15
16namespace Service::VI {
17
18Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
19 auto& kernel = Core::System::GetInstance().Kernel();
20 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
21 fmt::format("Display VSync Event {}", id));
22}
23
24Display::~Display() = default;
25
26Layer& Display::GetLayer(std::size_t index) {
27 return layers.at(index);
28}
29
30const Layer& Display::GetLayer(std::size_t index) const {
31 return layers.at(index);
32}
33
34Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
35 return vsync_event.readable;
36}
37
38void Display::SignalVSyncEvent() {
39 vsync_event.writable->Signal();
40}
41
42void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
43 // TODO(Subv): Support more than 1 layer.
44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
45
46 layers.emplace_back(id, buffer_queue);
47}
48
49Layer* Display::FindLayer(u64 id) {
50 const auto itr = std::find_if(layers.begin(), layers.end(),
51 [id](const VI::Layer& layer) { return layer.GetID() == id; });
52
53 if (itr == layers.end()) {
54 return nullptr;
55 }
56
57 return &*itr;
58}
59
60const Layer* Display::FindLayer(u64 id) const {
61 const auto itr = std::find_if(layers.begin(), layers.end(),
62 [id](const VI::Layer& layer) { return layer.GetID() == id; });
63
64 if (itr == layers.end()) {
65 return nullptr;
66 }
67
68 return &*itr;
69}
70
71} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
new file mode 100644
index 000000000..2acd46ff8
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <vector>
9
10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12
13namespace Service::NVFlinger {
14class BufferQueue;
15}
16
17namespace Service::VI {
18
19class Layer;
20
21/// Represents a single display type
22class Display {
23public:
24 /// Constructs a display with a given unique ID and name.
25 ///
26 /// @param id The unique ID for this display.
27 /// @param name The name for this display.
28 ///
29 Display(u64 id, std::string name);
30 ~Display();
31
32 Display(const Display&) = delete;
33 Display& operator=(const Display&) = delete;
34
35 Display(Display&&) = default;
36 Display& operator=(Display&&) = default;
37
38 /// Gets the unique ID assigned to this display.
39 u64 GetID() const {
40 return id;
41 }
42
43 /// Gets the name of this display
44 const std::string& GetName() const {
45 return name;
46 }
47
48 /// Whether or not this display has any layers added to it.
49 bool HasLayers() const {
50 return !layers.empty();
51 }
52
53 /// Gets a layer for this display based off an index.
54 Layer& GetLayer(std::size_t index);
55
56 /// Gets a layer for this display based off an index.
57 const Layer& GetLayer(std::size_t index) const;
58
59 /// Gets the readable vsync event.
60 Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
61
62 /// Signals the internal vsync event.
63 void SignalVSyncEvent();
64
65 /// Creates and adds a layer to this display with the given ID.
66 ///
67 /// @param id The ID to assign to the created layer.
68 /// @param buffer_queue The buffer queue for the layer instance to use.
69 ///
70 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
71
72 /// Attempts to find a layer with the given ID.
73 ///
74 /// @param id The layer ID.
75 ///
76 /// @returns If found, the Layer instance with the given ID.
77 /// If not found, then nullptr is returned.
78 ///
79 Layer* FindLayer(u64 id);
80
81 /// Attempts to find a layer with the given ID.
82 ///
83 /// @param id The layer ID.
84 ///
85 /// @returns If found, the Layer instance with the given ID.
86 /// If not found, then nullptr is returned.
87 ///
88 const Layer* FindLayer(u64 id) const;
89
90private:
91 u64 id;
92 std::string name;
93
94 std::vector<Layer> layers;
95 Kernel::EventPair vsync_event;
96};
97
98} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp
new file mode 100644
index 000000000..954225c26
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/vi/layer/vi_layer.h"
6
7namespace Service::VI {
8
9Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
10
11Layer::~Layer() = default;
12
13} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h
new file mode 100644
index 000000000..c6bfd01f6
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -0,0 +1,52 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Service::NVFlinger {
10class BufferQueue;
11}
12
13namespace Service::VI {
14
15/// Represents a single display layer.
16class Layer {
17public:
18 /// Constructs a layer with a given ID and buffer queue.
19 ///
20 /// @param id The ID to assign to this layer.
21 /// @param queue The buffer queue for this layer to use.
22 ///
23 Layer(u64 id, NVFlinger::BufferQueue& queue);
24 ~Layer();
25
26 Layer(const Layer&) = delete;
27 Layer& operator=(const Layer&) = delete;
28
29 Layer(Layer&&) = default;
30 Layer& operator=(Layer&&) = delete;
31
32 /// Gets the ID for this layer.
33 u64 GetID() const {
34 return id;
35 }
36
37 /// Gets a reference to the buffer queue this layer is using.
38 NVFlinger::BufferQueue& GetBufferQueue() {
39 return buffer_queue;
40 }
41
42 /// Gets a const reference to the buffer queue this layer is using.
43 const NVFlinger::BufferQueue& GetBufferQueue() const {
44 return buffer_queue;
45 }
46
47private:
48 u64 id;
49 NVFlinger::BufferQueue& buffer_queue;
50};
51
52} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 70c933934..4e17249a9 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
24#include "core/hle/service/nvdrv/nvdrv.h" 24#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 25#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 26#include "core/hle/service/nvflinger/nvflinger.h"
27#include "core/hle/service/service.h"
27#include "core/hle/service/vi/vi.h" 28#include "core/hle/service/vi/vi.h"
28#include "core/hle/service/vi/vi_m.h" 29#include "core/hle/service/vi/vi_m.h"
29#include "core/hle/service/vi/vi_s.h" 30#include "core/hle/service/vi/vi_s.h"
@@ -33,7 +34,9 @@
33namespace Service::VI { 34namespace Service::VI {
34 35
35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; 36constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
37constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; 38constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
39constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
37 40
38struct DisplayInfo { 41struct DisplayInfo {
39 /// The name of this particular display. 42 /// The name of this particular display.
@@ -419,7 +422,7 @@ public:
419 u32_le fence_is_valid; 422 u32_le fence_is_valid;
420 std::array<Fence, 2> fences; 423 std::array<Fence, 2> fences;
421 424
422 MathUtil::Rectangle<int> GetCropRect() const { 425 Common::Rectangle<int> GetCropRect() const {
423 return {crop_left, crop_top, crop_right, crop_bottom}; 426 return {crop_left, crop_top, crop_right, crop_bottom};
424 } 427 }
425 }; 428 };
@@ -495,7 +498,6 @@ public:
495 }; 498 };
496 RegisterHandlers(functions); 499 RegisterHandlers(functions);
497 } 500 }
498 ~IHOSBinderDriver() = default;
499 501
500private: 502private:
501 enum class TransactionId { 503 enum class TransactionId {
@@ -524,7 +526,7 @@ private:
524 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 526 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
525 static_cast<u32>(transaction), flags); 527 static_cast<u32>(transaction), flags);
526 528
527 auto buffer_queue = nv_flinger->GetBufferQueue(id); 529 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
528 530
529 if (transaction == TransactionId::Connect) { 531 if (transaction == TransactionId::Connect) {
530 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 532 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -537,7 +539,7 @@ private:
537 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 539 } else if (transaction == TransactionId::SetPreallocatedBuffer) {
538 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 540 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
539 541
540 buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); 542 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
541 543
542 IGBPSetPreallocatedBufferResponseParcel response{}; 544 IGBPSetPreallocatedBufferResponseParcel response{};
543 ctx.WriteBuffer(response.Serialize()); 545 ctx.WriteBuffer(response.Serialize());
@@ -545,7 +547,7 @@ private:
545 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 547 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
546 const u32 width{request.data.width}; 548 const u32 width{request.data.width};
547 const u32 height{request.data.height}; 549 const u32 height{request.data.height};
548 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 550 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
549 551
550 if (slot) { 552 if (slot) {
551 // Buffer is available 553 // Buffer is available
@@ -558,8 +560,8 @@ private:
558 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, 560 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
559 Kernel::ThreadWakeupReason reason) { 561 Kernel::ThreadWakeupReason reason) {
560 // Repeat TransactParcel DequeueBuffer when a buffer is available 562 // Repeat TransactParcel DequeueBuffer when a buffer is available
561 auto buffer_queue = nv_flinger->GetBufferQueue(id); 563 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
562 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 564 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
563 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 565 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
564 566
565 IGBPDequeueBufferResponseParcel response{*slot}; 567 IGBPDequeueBufferResponseParcel response{*slot};
@@ -567,28 +569,28 @@ private:
567 IPC::ResponseBuilder rb{ctx, 2}; 569 IPC::ResponseBuilder rb{ctx, 2};
568 rb.Push(RESULT_SUCCESS); 570 rb.Push(RESULT_SUCCESS);
569 }, 571 },
570 buffer_queue->GetWritableBufferWaitEvent()); 572 buffer_queue.GetWritableBufferWaitEvent());
571 } 573 }
572 } else if (transaction == TransactionId::RequestBuffer) { 574 } else if (transaction == TransactionId::RequestBuffer) {
573 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 575 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
574 576
575 auto& buffer = buffer_queue->RequestBuffer(request.slot); 577 auto& buffer = buffer_queue.RequestBuffer(request.slot);
576 578
577 IGBPRequestBufferResponseParcel response{buffer}; 579 IGBPRequestBufferResponseParcel response{buffer};
578 ctx.WriteBuffer(response.Serialize()); 580 ctx.WriteBuffer(response.Serialize());
579 } else if (transaction == TransactionId::QueueBuffer) { 581 } else if (transaction == TransactionId::QueueBuffer) {
580 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 582 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
581 583
582 buffer_queue->QueueBuffer(request.data.slot, request.data.transform, 584 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
583 request.data.GetCropRect()); 585 request.data.GetCropRect());
584 586
585 IGBPQueueBufferResponseParcel response{1280, 720}; 587 IGBPQueueBufferResponseParcel response{1280, 720};
586 ctx.WriteBuffer(response.Serialize()); 588 ctx.WriteBuffer(response.Serialize());
587 } else if (transaction == TransactionId::Query) { 589 } else if (transaction == TransactionId::Query) {
588 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 590 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
589 591
590 u32 value = 592 const u32 value =
591 buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); 593 buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
592 594
593 IGBPQueryResponseParcel response{value}; 595 IGBPQueryResponseParcel response{value};
594 ctx.WriteBuffer(response.Serialize()); 596 ctx.WriteBuffer(response.Serialize());
@@ -628,12 +630,12 @@ private:
628 630
629 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 631 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
630 632
631 const auto buffer_queue = nv_flinger->GetBufferQueue(id); 633 const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
632 634
633 // TODO(Subv): Find out what this actually is. 635 // TODO(Subv): Find out what this actually is.
634 IPC::ResponseBuilder rb{ctx, 2, 1}; 636 IPC::ResponseBuilder rb{ctx, 2, 1};
635 rb.Push(RESULT_SUCCESS); 637 rb.Push(RESULT_SUCCESS);
636 rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); 638 rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
637 } 639 }
638 640
639 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 641 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -689,7 +691,6 @@ public:
689 }; 691 };
690 RegisterHandlers(functions); 692 RegisterHandlers(functions);
691 } 693 }
692 ~ISystemDisplayService() = default;
693 694
694private: 695private:
695 void SetLayerZ(Kernel::HLERequestContext& ctx) { 696 void SetLayerZ(Kernel::HLERequestContext& ctx) {
@@ -704,13 +705,14 @@ private:
704 rb.Push(RESULT_SUCCESS); 705 rb.Push(RESULT_SUCCESS);
705 } 706 }
706 707
708 // This function currently does nothing but return a success error code in
709 // the vi library itself, so do the same thing, but log out the passed in values.
707 void SetLayerVisibility(Kernel::HLERequestContext& ctx) { 710 void SetLayerVisibility(Kernel::HLERequestContext& ctx) {
708 IPC::RequestParser rp{ctx}; 711 IPC::RequestParser rp{ctx};
709 const u64 layer_id = rp.Pop<u64>(); 712 const u64 layer_id = rp.Pop<u64>();
710 const bool visibility = rp.Pop<bool>(); 713 const bool visibility = rp.Pop<bool>();
711 714
712 LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id, 715 LOG_DEBUG(Service_VI, "called, layer_id=0x{:08X}, visibility={}", layer_id, visibility);
713 visibility);
714 716
715 IPC::ResponseBuilder rb{ctx, 2}; 717 IPC::ResponseBuilder rb{ctx, 2};
716 rb.Push(RESULT_SUCCESS); 718 rb.Push(RESULT_SUCCESS);
@@ -750,6 +752,7 @@ public:
750 {1102, nullptr, "GetDisplayResolution"}, 752 {1102, nullptr, "GetDisplayResolution"},
751 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"}, 753 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
752 {2011, nullptr, "DestroyManagedLayer"}, 754 {2011, nullptr, "DestroyManagedLayer"},
755 {2012, nullptr, "CreateStrayLayer"},
753 {2050, nullptr, "CreateIndirectLayer"}, 756 {2050, nullptr, "CreateIndirectLayer"},
754 {2051, nullptr, "DestroyIndirectLayer"}, 757 {2051, nullptr, "DestroyIndirectLayer"},
755 {2052, nullptr, "CreateIndirectProducerEndPoint"}, 758 {2052, nullptr, "CreateIndirectProducerEndPoint"},
@@ -813,7 +816,6 @@ public:
813 }; 816 };
814 RegisterHandlers(functions); 817 RegisterHandlers(functions);
815 } 818 }
816 ~IManagerDisplayService() = default;
817 819
818private: 820private:
819 void CloseDisplay(Kernel::HLERequestContext& ctx) { 821 void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -837,11 +839,16 @@ private:
837 "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}", 839 "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
838 unknown, display, aruid); 840 unknown, display, aruid);
839 841
840 const u64 layer_id = nv_flinger->CreateLayer(display); 842 const auto layer_id = nv_flinger->CreateLayer(display);
843 if (!layer_id) {
844 IPC::ResponseBuilder rb{ctx, 2};
845 rb.Push(ERR_NOT_FOUND);
846 return;
847 }
841 848
842 IPC::ResponseBuilder rb{ctx, 4}; 849 IPC::ResponseBuilder rb{ctx, 4};
843 rb.Push(RESULT_SUCCESS); 850 rb.Push(RESULT_SUCCESS);
844 rb.Push(layer_id); 851 rb.Push(*layer_id);
845 } 852 }
846 853
847 void AddToLayerStack(Kernel::HLERequestContext& ctx) { 854 void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -874,7 +881,6 @@ private:
874class IApplicationDisplayService final : public ServiceFramework<IApplicationDisplayService> { 881class IApplicationDisplayService final : public ServiceFramework<IApplicationDisplayService> {
875public: 882public:
876 explicit IApplicationDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 883 explicit IApplicationDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
877 ~IApplicationDisplayService() = default;
878 884
879private: 885private:
880 enum class ConvertedScaleMode : u64 { 886 enum class ConvertedScaleMode : u64 {
@@ -949,9 +955,16 @@ private:
949 955
950 ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet"); 956 ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");
951 957
958 const auto display_id = nv_flinger->OpenDisplay(name);
959 if (!display_id) {
960 IPC::ResponseBuilder rb{ctx, 2};
961 rb.Push(ERR_NOT_FOUND);
962 return;
963 }
964
952 IPC::ResponseBuilder rb{ctx, 4}; 965 IPC::ResponseBuilder rb{ctx, 4};
953 rb.Push(RESULT_SUCCESS); 966 rb.Push(RESULT_SUCCESS);
954 rb.Push<u64>(nv_flinger->OpenDisplay(name)); 967 rb.Push<u64>(*display_id);
955 } 968 }
956 969
957 void CloseDisplay(Kernel::HLERequestContext& ctx) { 970 void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1020,7 +1033,6 @@ private:
1020 void ListDisplays(Kernel::HLERequestContext& ctx) { 1033 void ListDisplays(Kernel::HLERequestContext& ctx) {
1021 LOG_WARNING(Service_VI, "(STUBBED) called"); 1034 LOG_WARNING(Service_VI, "(STUBBED) called");
1022 1035
1023 IPC::RequestParser rp{ctx};
1024 DisplayInfo display_info; 1036 DisplayInfo display_info;
1025 display_info.width *= static_cast<u64>(Settings::values.resolution_factor); 1037 display_info.width *= static_cast<u64>(Settings::values.resolution_factor);
1026 display_info.height *= static_cast<u64>(Settings::values.resolution_factor); 1038 display_info.height *= static_cast<u64>(Settings::values.resolution_factor);
@@ -1042,10 +1054,21 @@ private:
1042 1054
1043 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid); 1055 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);
1044 1056
1045 const u64 display_id = nv_flinger->OpenDisplay(display_name); 1057 const auto display_id = nv_flinger->OpenDisplay(display_name);
1046 const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id); 1058 if (!display_id) {
1059 IPC::ResponseBuilder rb{ctx, 2};
1060 rb.Push(ERR_NOT_FOUND);
1061 return;
1062 }
1063
1064 const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
1065 if (!buffer_queue_id) {
1066 IPC::ResponseBuilder rb{ctx, 2};
1067 rb.Push(ERR_NOT_FOUND);
1068 return;
1069 }
1047 1070
1048 NativeWindow native_window{buffer_queue_id}; 1071 NativeWindow native_window{*buffer_queue_id};
1049 IPC::ResponseBuilder rb{ctx, 4}; 1072 IPC::ResponseBuilder rb{ctx, 4};
1050 rb.Push(RESULT_SUCCESS); 1073 rb.Push(RESULT_SUCCESS);
1051 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1074 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1061,13 +1084,24 @@ private:
1061 1084
1062 // TODO(Subv): What's the difference between a Stray and a Managed layer? 1085 // TODO(Subv): What's the difference between a Stray and a Managed layer?
1063 1086
1064 const u64 layer_id = nv_flinger->CreateLayer(display_id); 1087 const auto layer_id = nv_flinger->CreateLayer(display_id);
1065 const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id); 1088 if (!layer_id) {
1089 IPC::ResponseBuilder rb{ctx, 2};
1090 rb.Push(ERR_NOT_FOUND);
1091 return;
1092 }
1093
1094 const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
1095 if (!buffer_queue_id) {
1096 IPC::ResponseBuilder rb{ctx, 2};
1097 rb.Push(ERR_NOT_FOUND);
1098 return;
1099 }
1066 1100
1067 NativeWindow native_window{buffer_queue_id}; 1101 NativeWindow native_window{*buffer_queue_id};
1068 IPC::ResponseBuilder rb{ctx, 6}; 1102 IPC::ResponseBuilder rb{ctx, 6};
1069 rb.Push(RESULT_SUCCESS); 1103 rb.Push(RESULT_SUCCESS);
1070 rb.Push(layer_id); 1104 rb.Push(*layer_id);
1071 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1105 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
1072 } 1106 }
1073 1107
@@ -1087,7 +1121,12 @@ private:
1087 1121
1088 LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id); 1122 LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);
1089 1123
1090 const auto vsync_event = nv_flinger->GetVsyncEvent(display_id); 1124 const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
1125 if (!vsync_event) {
1126 IPC::ResponseBuilder rb{ctx, 2};
1127 rb.Push(ERR_NOT_FOUND);
1128 return;
1129 }
1091 1130
1092 IPC::ResponseBuilder rb{ctx, 2, 1}; 1131 IPC::ResponseBuilder rb{ctx, 2, 1};
1093 rb.Push(RESULT_SUCCESS); 1132 rb.Push(RESULT_SUCCESS);
@@ -1161,26 +1200,40 @@ IApplicationDisplayService::IApplicationDisplayService(
1161 RegisterHandlers(functions); 1200 RegisterHandlers(functions);
1162} 1201}
1163 1202
1164Module::Interface::Interface(std::shared_ptr<Module> module, const char* name, 1203static bool IsValidServiceAccess(Permission permission, Policy policy) {
1165 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 1204 if (permission == Permission::User) {
1166 : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {} 1205 return policy == Policy::User;
1206 }
1207
1208 if (permission == Permission::System || permission == Permission::Manager) {
1209 return policy == Policy::User || policy == Policy::Compositor;
1210 }
1211
1212 return false;
1213}
1167 1214
1168Module::Interface::~Interface() = default; 1215void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
1216 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
1217 Permission permission) {
1218 IPC::RequestParser rp{ctx};
1219 const auto policy = rp.PopEnum<Policy>();
1169 1220
1170void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) { 1221 if (!IsValidServiceAccess(permission, policy)) {
1171 LOG_WARNING(Service_VI, "(STUBBED) called"); 1222 IPC::ResponseBuilder rb{ctx, 2};
1223 rb.Push(ERR_PERMISSION_DENIED);
1224 return;
1225 }
1172 1226
1173 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 1227 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1174 rb.Push(RESULT_SUCCESS); 1228 rb.Push(RESULT_SUCCESS);
1175 rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger); 1229 rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
1176} 1230}
1177 1231
1178void InstallInterfaces(SM::ServiceManager& service_manager, 1232void InstallInterfaces(SM::ServiceManager& service_manager,
1179 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { 1233 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
1180 auto module = std::make_shared<Module>(); 1234 std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
1181 std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager); 1235 std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
1182 std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager); 1236 std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
1183 std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
1184} 1237}
1185 1238
1186} // namespace Service::VI 1239} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index e3963502a..6b66f8b81 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/service.h" 7#include <memory>
8#include "common/common_types.h"
9
10namespace Kernel {
11class HLERequestContext;
12}
8 13
9namespace Service::NVFlinger { 14namespace Service::NVFlinger {
10class NVFlinger; 15class NVFlinger;
11} 16}
12 17
18namespace Service::SM {
19class ServiceManager;
20}
21
13namespace Service::VI { 22namespace Service::VI {
14 23
15enum class DisplayResolution : u32 { 24enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
19 UndockedHeight = 720, 28 UndockedHeight = 720,
20}; 29};
21 30
22class Module final { 31/// Permission level for a particular VI service instance
23public: 32enum class Permission {
24 class Interface : public ServiceFramework<Interface> { 33 User,
25 public: 34 System,
26 explicit Interface(std::shared_ptr<Module> module, const char* name, 35 Manager,
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 36};
28 ~Interface() override;
29
30 void GetDisplayService(Kernel::HLERequestContext& ctx);
31 37
32 protected: 38/// A policy type that may be requested via GetDisplayService and
33 std::shared_ptr<Module> module; 39/// GetDisplayServiceWithProxyNameExchange
34 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 40enum class Policy {
35 }; 41 User,
42 Compositor,
36}; 43};
37 44
45namespace detail {
46void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
47 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
48} // namespace detail
49
38/// Registers all VI services with the specified service manager. 50/// Registers all VI services with the specified service manager.
39void InstallInterfaces(SM::ServiceManager& service_manager, 51void InstallInterfaces(SM::ServiceManager& service_manager,
40 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 52 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp
index 207c06b16..06070087f 100644
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_m.h" 7#include "core/hle/service/vi/vi_m.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {2, &VI_M::GetDisplayService, "GetDisplayService"}, 14 {2, &VI_M::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_M::~VI_M() = default; 20VI_M::~VI_M() = default;
19 21
22void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h
index 487d58d50..290e06689 100644
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_M final : public Module::Interface { 19class VI_M final : public ServiceFramework<VI_M> {
12public: 20public:
13 explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_M() override; 22 ~VI_M() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp
index 920e6a1f6..57c596cc4 100644
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_s.h" 7#include "core/hle/service/vi/vi_s.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {1, &VI_S::GetDisplayService, "GetDisplayService"}, 14 {1, &VI_S::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_S::~VI_S() = default; 20VI_S::~VI_S() = default;
19 21
22void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h
index bbc31148f..47804dc0b 100644
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_S final : public Module::Interface { 19class VI_S final : public ServiceFramework<VI_S> {
12public: 20public:
13 explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_S() override; 22 ~VI_S() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp
index d81e410d6..9d5ceb608 100644
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_u.h" 7#include "core/hle/service/vi/vi_u.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {0, &VI_U::GetDisplayService, "GetDisplayService"}, 14 {0, &VI_U::GetDisplayService, "GetDisplayService"},
13 }; 15 };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
16 18
17VI_U::~VI_U() = default; 19VI_U::~VI_U() = default;
18 20
21void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
22 LOG_DEBUG(Service_VI, "called");
23
24 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
25}
26
19} // namespace Service::VI 27} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h
index b92f28c92..19bdb73b0 100644
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_U final : public Module::Interface { 19class VI_U final : public ServiceFramework<VI_U> {
12public: 20public:
13 explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_U() override; 22 ~VI_U() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6057c7f26..46ac372f6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/file_util.h" 10#include "common/file_util.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
13#include "core/hle/kernel/vm_manager.h" 14#include "core/hle/kernel/vm_manager.h"
14#include "core/loader/elf.h" 15#include "core/loader/elf.h"
@@ -340,7 +341,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
340 } 341 }
341 342
342 codeset.entrypoint = base_addr + header->e_entry; 343 codeset.entrypoint = base_addr + header->e_entry;
343 codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image)); 344 codeset.memory = std::move(program_image);
344 345
345 LOG_DEBUG(Loader, "Done loading."); 346 LOG_DEBUG(Loader, "Done loading.");
346 347
diff --git a/src/core/loader/linker.cpp b/src/core/loader/linker.cpp
deleted file mode 100644
index 57ca8c3ee..000000000
--- a/src/core/loader/linker.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_funcs.h"
8#include "common/logging/log.h"
9#include "common/swap.h"
10#include "core/loader/linker.h"
11#include "core/memory.h"
12
13namespace Loader {
14
15enum class RelocationType : u32 { ABS64 = 257, GLOB_DAT = 1025, JUMP_SLOT = 1026, RELATIVE = 1027 };
16
17enum DynamicType : u32 {
18 DT_NULL = 0,
19 DT_PLTRELSZ = 2,
20 DT_STRTAB = 5,
21 DT_SYMTAB = 6,
22 DT_RELA = 7,
23 DT_RELASZ = 8,
24 DT_STRSZ = 10,
25 DT_JMPREL = 23,
26};
27
28struct Elf64_Rela {
29 u64_le offset;
30 RelocationType type;
31 u32_le symbol;
32 s64_le addend;
33};
34static_assert(sizeof(Elf64_Rela) == 0x18, "Elf64_Rela has incorrect size.");
35
36struct Elf64_Dyn {
37 u64_le tag;
38 u64_le value;
39};
40static_assert(sizeof(Elf64_Dyn) == 0x10, "Elf64_Dyn has incorrect size.");
41
42struct Elf64_Sym {
43 u32_le name;
44 INSERT_PADDING_BYTES(0x2);
45 u16_le shndx;
46 u64_le value;
47 u64_le size;
48};
49static_assert(sizeof(Elf64_Sym) == 0x18, "Elf64_Sym has incorrect size.");
50
51void Linker::WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
52 u64 relocation_offset, u64 size, VAddr load_base) {
53 for (u64 i = 0; i < size; i += sizeof(Elf64_Rela)) {
54 Elf64_Rela rela;
55 std::memcpy(&rela, &program_image[relocation_offset + i], sizeof(Elf64_Rela));
56
57 const Symbol& symbol = symbols[rela.symbol];
58 switch (rela.type) {
59 case RelocationType::RELATIVE: {
60 const u64 value = load_base + rela.addend;
61 if (!symbol.name.empty()) {
62 exports[symbol.name] = value;
63 }
64 std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
65 break;
66 }
67 case RelocationType::JUMP_SLOT:
68 case RelocationType::GLOB_DAT:
69 if (!symbol.value) {
70 imports[symbol.name] = {rela.offset + load_base, 0};
71 } else {
72 exports[symbol.name] = symbol.value;
73 std::memcpy(&program_image[rela.offset], &symbol.value, sizeof(u64));
74 }
75 break;
76 case RelocationType::ABS64:
77 if (!symbol.value) {
78 imports[symbol.name] = {rela.offset + load_base, rela.addend};
79 } else {
80 const u64 value = symbol.value + rela.addend;
81 exports[symbol.name] = value;
82 std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
83 }
84 break;
85 default:
86 LOG_CRITICAL(Loader, "Unknown relocation type: {}", static_cast<int>(rela.type));
87 break;
88 }
89 }
90}
91
92void Linker::Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base) {
93 std::map<u64, u64> dynamic;
94 while (dynamic_section_offset < program_image.size()) {
95 Elf64_Dyn dyn;
96 std::memcpy(&dyn, &program_image[dynamic_section_offset], sizeof(Elf64_Dyn));
97 dynamic_section_offset += sizeof(Elf64_Dyn);
98
99 if (dyn.tag == DT_NULL) {
100 break;
101 }
102 dynamic[dyn.tag] = dyn.value;
103 }
104
105 u64 offset = dynamic[DT_SYMTAB];
106 std::vector<Symbol> symbols;
107 while (offset < program_image.size()) {
108 Elf64_Sym sym;
109 std::memcpy(&sym, &program_image[offset], sizeof(Elf64_Sym));
110 offset += sizeof(Elf64_Sym);
111
112 if (sym.name >= dynamic[DT_STRSZ]) {
113 break;
114 }
115
116 std::string name = reinterpret_cast<char*>(&program_image[dynamic[DT_STRTAB] + sym.name]);
117 if (sym.value) {
118 exports[name] = load_base + sym.value;
119 symbols.emplace_back(std::move(name), load_base + sym.value);
120 } else {
121 symbols.emplace_back(std::move(name), 0);
122 }
123 }
124
125 if (dynamic.find(DT_RELA) != dynamic.end()) {
126 WriteRelocations(program_image, symbols, dynamic[DT_RELA], dynamic[DT_RELASZ], load_base);
127 }
128
129 if (dynamic.find(DT_JMPREL) != dynamic.end()) {
130 WriteRelocations(program_image, symbols, dynamic[DT_JMPREL], dynamic[DT_PLTRELSZ],
131 load_base);
132 }
133}
134
135void Linker::ResolveImports() {
136 // Resolve imports
137 for (const auto& import : imports) {
138 const auto& search = exports.find(import.first);
139 if (search != exports.end()) {
140 Memory::Write64(import.second.ea, search->second + import.second.addend);
141 } else {
142 LOG_ERROR(Loader, "Unresolved import: {}", import.first);
143 }
144 }
145}
146
147} // namespace Loader
diff --git a/src/core/loader/linker.h b/src/core/loader/linker.h
deleted file mode 100644
index 107625837..000000000
--- a/src/core/loader/linker.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <string>
9#include "common/common_types.h"
10
11namespace Loader {
12
13class Linker {
14protected:
15 struct Symbol {
16 Symbol(std::string&& name, u64 value) : name(std::move(name)), value(value) {}
17 std::string name;
18 u64 value;
19 };
20
21 struct Import {
22 VAddr ea;
23 s64 addend;
24 };
25
26 void WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
27 u64 relocation_offset, u64 size, VAddr load_base);
28 void Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base);
29
30 void ResolveImports();
31
32 std::map<std::string, Import> imports;
33 std::map<std::string, VAddr> exports;
34};
35
36} // namespace Loader
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 4fad0c0dd..31e4a0c84 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -14,6 +14,7 @@
14#include "core/file_sys/romfs_factory.h" 14#include "core/file_sys/romfs_factory.h"
15#include "core/file_sys/vfs_offset.h" 15#include "core/file_sys/vfs_offset.h"
16#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
17#include "core/hle/kernel/code_set.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
19#include "core/hle/service/filesystem/filesystem.h" 20#include "core/hle/service/filesystem/filesystem.h"
@@ -186,7 +187,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
186 program_image.resize(static_cast<u32>(program_image.size()) + bss_size); 187 program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
187 188
188 // Load codeset for current process 189 // Load codeset for current process
189 codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image)); 190 codeset.memory = std::move(program_image);
190 process.LoadModule(std::move(codeset), load_base); 191 process.LoadModule(std::move(codeset), load_base);
191 192
192 // Register module with GDBStub 193 // Register module with GDBStub
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 013d629c0..85b0ed644 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <string> 8#include <string>
8#include <vector> 9#include <vector>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "core/loader/linker.h"
11#include "core/loader/loader.h" 11#include "core/loader/loader.h"
12 12
13namespace FileSys { 13namespace FileSys {
@@ -21,7 +21,7 @@ class Process;
21namespace Loader { 21namespace Loader {
22 22
23/// Loads an NRO file 23/// Loads an NRO file
24class AppLoader_NRO final : public AppLoader, Linker { 24class AppLoader_NRO final : public AppLoader {
25public: 25public:
26 explicit AppLoader_NRO(FileSys::VirtualFile file); 26 explicit AppLoader_NRO(FileSys::VirtualFile file);
27 ~AppLoader_NRO() override; 27 ~AppLoader_NRO() override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 6ded0b707..d7c47c197 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -4,50 +4,28 @@
4 4
5#include <cinttypes> 5#include <cinttypes>
6#include <vector> 6#include <vector>
7#include <lz4.h> 7
8#include "common/common_funcs.h" 8#include "common/common_funcs.h"
9#include "common/file_util.h" 9#include "common/file_util.h"
10#include "common/hex_util.h"
10#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/lz4_compression.h"
11#include "common/swap.h" 13#include "common/swap.h"
14#include "core/core.h"
12#include "core/file_sys/patch_manager.h" 15#include "core/file_sys/patch_manager.h"
13#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
17#include "core/hle/kernel/code_set.h"
14#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
16#include "core/loader/nso.h" 20#include "core/loader/nso.h"
17#include "core/memory.h" 21#include "core/memory.h"
18#include "core/settings.h" 22#include "core/settings.h"
19 23
20namespace Loader { 24#pragma optimize("", off)
21 25
22struct NsoSegmentHeader { 26namespace Loader {
23 u32_le offset; 27namespace {
24 u32_le location; 28struct MODHeader {
25 u32_le size;
26 union {
27 u32_le alignment;
28 u32_le bss_size;
29 };
30};
31static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
32
33struct NsoHeader {
34 u32_le magic;
35 u32_le version;
36 INSERT_PADDING_WORDS(1);
37 u8 flags;
38 std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
39 std::array<u8, 0x20> build_id;
40 std::array<u32_le, 3> segments_compressed_size;
41
42 bool IsSegmentCompressed(size_t segment_num) const {
43 ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
44 return ((flags >> segment_num) & 1);
45 }
46};
47static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
48static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
49
50struct ModHeader {
51 u32_le magic; 29 u32_le magic;
52 u32_le dynamic_offset; 30 u32_le dynamic_offset;
53 u32_le bss_start_offset; 31 u32_le bss_start_offset;
@@ -56,7 +34,28 @@ struct ModHeader {
56 u32_le eh_frame_hdr_end_offset; 34 u32_le eh_frame_hdr_end_offset;
57 u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base 35 u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base
58}; 36};
59static_assert(sizeof(ModHeader) == 0x1c, "ModHeader has incorrect size."); 37static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
38
39std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
40 const NSOSegmentHeader& header) {
41 const std::vector<u8> uncompressed_data =
42 Common::Compression::DecompressDataLZ4(compressed_data, header.size);
43
44 ASSERT_MSG(uncompressed_data.size() == static_cast<int>(header.size), "{} != {}", header.size,
45 uncompressed_data.size());
46
47 return uncompressed_data;
48}
49
50constexpr u32 PageAlignSize(u32 size) {
51 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
52}
53} // Anonymous namespace
54
55bool NSOHeader::IsSegmentCompressed(size_t segment_num) const {
56 ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
57 return ((flags >> segment_num) & 1) != 0;
58}
60 59
61AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {} 60AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}
62 61
@@ -73,38 +72,22 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
73 return FileType::NSO; 72 return FileType::NSO;
74} 73}
75 74
76static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
77 const NsoSegmentHeader& header) {
78 std::vector<u8> uncompressed_data(header.size);
79 const int bytes_uncompressed =
80 LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
81 reinterpret_cast<char*>(uncompressed_data.data()),
82 static_cast<int>(compressed_data.size()), header.size);
83
84 ASSERT_MSG(bytes_uncompressed == static_cast<int>(header.size) &&
85 bytes_uncompressed == static_cast<int>(uncompressed_data.size()),
86 "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
87
88 return uncompressed_data;
89}
90
91static constexpr u32 PageAlignSize(u32 size) {
92 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
93}
94
95std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, 75std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
96 const FileSys::VfsFile& file, VAddr load_base, 76 const FileSys::VfsFile& file, VAddr load_base,
97 bool should_pass_arguments, 77 bool should_pass_arguments,
98 std::optional<FileSys::PatchManager> pm) { 78 std::optional<FileSys::PatchManager> pm) {
99 if (file.GetSize() < sizeof(NsoHeader)) 79 if (file.GetSize() < sizeof(NSOHeader)) {
100 return {}; 80 return {};
81 }
101 82
102 NsoHeader nso_header{}; 83 NSOHeader nso_header{};
103 if (sizeof(NsoHeader) != file.ReadObject(&nso_header)) 84 if (sizeof(NSOHeader) != file.ReadObject(&nso_header)) {
104 return {}; 85 return {};
86 }
105 87
106 if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) 88 if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
107 return {}; 89 return {};
90 }
108 91
109 // Build program image 92 // Build program image
110 Kernel::CodeSet codeset; 93 Kernel::CodeSet codeset;
@@ -140,10 +123,10 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
140 std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32)); 123 std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
141 124
142 // Read MOD header 125 // Read MOD header
143 ModHeader mod_header{}; 126 MODHeader mod_header{};
144 // Default .bss to size in segment header if MOD0 section doesn't exist 127 // Default .bss to size in segment header if MOD0 section doesn't exist
145 u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)}; 128 u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
146 std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader)); 129 std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader));
147 const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')}; 130 const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
148 if (has_mod_header) { 131 if (has_mod_header) {
149 // Resize program image to include .bss section and page align each section 132 // Resize program image to include .bss section and page align each section
@@ -155,17 +138,29 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
155 138
156 // Apply patches if necessary 139 // Apply patches if necessary
157 if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) { 140 if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
158 std::vector<u8> pi_header(program_image.size() + 0x100); 141 std::vector<u8> pi_header;
159 std::memcpy(pi_header.data(), &nso_header, sizeof(NsoHeader)); 142 pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
160 std::memcpy(pi_header.data() + 0x100, program_image.data(), program_image.size()); 143 reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
144 pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
145 program_image.end());
146
147 pi_header = pm->PatchNSO(pi_header, file.GetName());
161 148
162 pi_header = pm->PatchNSO(pi_header); 149 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
150 }
163 151
164 std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size()); 152 // Apply cheats if they exist and the program has a valid title ID
153 if (pm) {
154 auto& system = Core::System::GetInstance();
155 const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
156 if (!cheats.empty()) {
157 system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
158 load_base, load_base + program_image.size());
159 }
165 } 160 }
166 161
167 // Load codeset for current process 162 // Load codeset for current process
168 codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image)); 163 codeset.memory = std::move(program_image);
169 process.LoadModule(std::move(codeset), load_base); 164 process.LoadModule(std::move(codeset), load_base);
170 165
171 // Register module with GDBStub 166 // Register module with GDBStub
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 135b6ea5a..4674c3724 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,10 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <optional> 8#include <optional>
9#include <type_traits>
8#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/swap.h"
9#include "core/file_sys/patch_manager.h" 12#include "core/file_sys/patch_manager.h"
10#include "core/loader/linker.h"
11#include "core/loader/loader.h" 13#include "core/loader/loader.h"
12 14
13namespace Kernel { 15namespace Kernel {
@@ -16,6 +18,43 @@ class Process;
16 18
17namespace Loader { 19namespace Loader {
18 20
21struct NSOSegmentHeader {
22 u32_le offset;
23 u32_le location;
24 u32_le size;
25 union {
26 u32_le alignment;
27 u32_le bss_size;
28 };
29};
30static_assert(sizeof(NSOSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
31
32struct NSOHeader {
33 using SHA256Hash = std::array<u8, 0x20>;
34
35 struct RODataRelativeExtent {
36 u32_le data_offset;
37 u32_le size;
38 };
39
40 u32_le magic;
41 u32_le version;
42 u32 reserved;
43 u32_le flags;
44 std::array<NSOSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
45 std::array<u8, 0x20> build_id;
46 std::array<u32_le, 3> segments_compressed_size;
47 std::array<u8, 0x1C> padding;
48 RODataRelativeExtent api_info_extent;
49 RODataRelativeExtent dynstr_extent;
50 RODataRelativeExtent dynsyn_extent;
51 std::array<SHA256Hash, 3> segment_hashes;
52
53 bool IsSegmentCompressed(size_t segment_num) const;
54};
55static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size.");
56static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable.");
57
19constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000; 58constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000;
20 59
21struct NSOArgumentHeader { 60struct NSOArgumentHeader {
@@ -26,7 +65,7 @@ struct NSOArgumentHeader {
26static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size."); 65static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size.");
27 66
28/// Loads an NSO file 67/// Loads an NSO file
29class AppLoader_NSO final : public AppLoader, Linker { 68class AppLoader_NSO final : public AppLoader {
30public: 69public:
31 explicit AppLoader_NSO(FileSys::VirtualFile file); 70 explicit AppLoader_NSO(FileSys::VirtualFile file);
32 71
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index d6995b61e..436f7387c 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -22,7 +22,7 @@ class AppLoader_NCA;
22class AppLoader_XCI final : public AppLoader { 22class AppLoader_XCI final : public AppLoader {
23public: 23public:
24 explicit AppLoader_XCI(FileSys::VirtualFile file); 24 explicit AppLoader_XCI(FileSys::VirtualFile file);
25 ~AppLoader_XCI(); 25 ~AppLoader_XCI() override;
26 26
27 /** 27 /**
28 * Returns the type of the file 28 * Returns the type of the file
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..4e0538bc2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/page_table.h"
13#include "common/swap.h" 14#include "common/swap.h"
14#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
15#include "core/core.h" 16#include "core/core.h"
@@ -18,13 +19,14 @@
18#include "core/hle/lock.h" 19#include "core/hle/lock.h"
19#include "core/memory.h" 20#include "core/memory.h"
20#include "core/memory_setup.h" 21#include "core/memory_setup.h"
22#include "video_core/gpu.h"
21#include "video_core/renderer_base.h" 23#include "video_core/renderer_base.h"
22 24
23namespace Memory { 25namespace Memory {
24 26
25static PageTable* current_page_table = nullptr; 27static Common::PageTable* current_page_table = nullptr;
26 28
27void SetCurrentPageTable(PageTable* page_table) { 29void SetCurrentPageTable(Common::PageTable* page_table) {
28 current_page_table = page_table; 30 current_page_table = page_table;
29 31
30 auto& system = Core::System::GetInstance(); 32 auto& system = Core::System::GetInstance();
@@ -36,88 +38,76 @@ void SetCurrentPageTable(PageTable* page_table) {
36 } 38 }
37} 39}
38 40
39PageTable* GetCurrentPageTable() { 41static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
40 return current_page_table; 42 Common::PageType type) {
41}
42
43PageTable::PageTable() = default;
44
45PageTable::PageTable(std::size_t address_space_width_in_bits) {
46 Resize(address_space_width_in_bits);
47}
48
49PageTable::~PageTable() = default;
50
51void PageTable::Resize(std::size_t address_space_width_in_bits) {
52 const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
53
54 pointers.resize(num_page_table_entries);
55 attributes.resize(num_page_table_entries);
56
57 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
58 // vector size is subsequently decreased (via resize), the vector might not automatically
59 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
60 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
61
62 pointers.shrink_to_fit();
63 attributes.shrink_to_fit();
64}
65
66static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 43 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
68 (base + size) * PAGE_SIZE); 44 (base + size) * PAGE_SIZE);
69 45
70 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, 46 // During boot, current_page_table might not be set yet, in which case we need not flush
71 FlushMode::FlushAndInvalidate); 47 if (Core::System::GetInstance().IsPoweredOn()) {
48 Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
49 size * PAGE_SIZE);
50 }
72 51
73 VAddr end = base + size; 52 VAddr end = base + size;
74 while (base != end) { 53 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 54 base + page_table.pointers.size());
55
56 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
76 57
77 page_table.attributes[base] = type; 58 if (memory == nullptr) {
78 page_table.pointers[base] = memory; 59 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
60 } else {
61 while (base != end) {
62 page_table.pointers[base] = memory;
79 63
80 base += 1; 64 base += 1;
81 if (memory != nullptr)
82 memory += PAGE_SIZE; 65 memory += PAGE_SIZE;
66 }
83 } 67 }
84} 68}
85 69
86void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) { 70void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
87 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 71 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
88 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 72 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
89 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory); 73 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
90} 74}
91 75
92void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) { 76void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
77 Common::MemoryHookPointer mmio_handler) {
93 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 78 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
94 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 79 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
95 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special); 80 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
96 81
97 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 82 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
98 SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)}; 83 Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
99 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 84 page_table.special_regions.add(
85 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
100} 86}
101 87
102void UnmapRegion(PageTable& page_table, VAddr base, u64 size) { 88void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
103 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 89 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
104 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 90 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
105 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped); 91 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
106 92
107 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 93 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
108 page_table.special_regions.erase(interval); 94 page_table.special_regions.erase(interval);
109} 95}
110 96
111void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 97void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
98 Common::MemoryHookPointer hook) {
112 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 99 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
113 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 100 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
114 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 101 page_table.special_regions.add(
102 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
115} 103}
116 104
117void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 105void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
106 Common::MemoryHookPointer hook) {
118 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 107 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
119 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 108 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
120 page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region})); 109 page_table.special_regions.subtract(
110 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
121} 111}
122 112
123/** 113/**
@@ -166,22 +156,19 @@ T Read(const VAddr vaddr) {
166 return value; 156 return value;
167 } 157 }
168 158
169 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state 159 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
170 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
171
172 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
173 switch (type) { 160 switch (type) {
174 case PageType::Unmapped: 161 case Common::PageType::Unmapped:
175 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); 162 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
176 return 0; 163 return 0;
177 case PageType::Memory: 164 case Common::PageType::Memory:
178 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 165 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
179 break; 166 break;
180 case PageType::RasterizerCachedMemory: { 167 case Common::PageType::RasterizerCachedMemory: {
181 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); 168 auto host_ptr{GetPointerFromVMA(vaddr)};
182 169 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
183 T value; 170 T value;
184 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); 171 std::memcpy(&value, host_ptr, sizeof(T));
185 return value; 172 return value;
186 } 173 }
187 default: 174 default:
@@ -199,21 +186,19 @@ void Write(const VAddr vaddr, const T data) {
199 return; 186 return;
200 } 187 }
201 188
202 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state 189 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
203 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
204
205 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
206 switch (type) { 190 switch (type) {
207 case PageType::Unmapped: 191 case Common::PageType::Unmapped:
208 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 192 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
209 static_cast<u32>(data), vaddr); 193 static_cast<u32>(data), vaddr);
210 return; 194 return;
211 case PageType::Memory: 195 case Common::PageType::Memory:
212 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 196 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
213 break; 197 break;
214 case PageType::RasterizerCachedMemory: { 198 case Common::PageType::RasterizerCachedMemory: {
215 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); 199 auto host_ptr{GetPointerFromVMA(vaddr)};
216 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); 200 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
201 std::memcpy(host_ptr, &data, sizeof(T));
217 break; 202 break;
218 } 203 }
219 default: 204 default:
@@ -228,10 +213,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
228 if (page_pointer) 213 if (page_pointer)
229 return true; 214 return true;
230 215
231 if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) 216 if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
232 return true; 217 return true;
233 218
234 if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) 219 if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
235 return false; 220 return false;
236 221
237 return false; 222 return false;
@@ -251,7 +236,8 @@ u8* GetPointer(const VAddr vaddr) {
251 return page_pointer + (vaddr & PAGE_MASK); 236 return page_pointer + (vaddr & PAGE_MASK);
252 } 237 }
253 238
254 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) { 239 if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
240 Common::PageType::RasterizerCachedMemory) {
255 return GetPointerFromVMA(vaddr); 241 return GetPointerFromVMA(vaddr);
256 } 242 }
257 243
@@ -285,20 +271,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
285 271
286 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; 272 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
287 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { 273 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
288 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; 274 Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
289 275
290 if (cached) { 276 if (cached) {
291 // Switch page type to cached if now cached 277 // Switch page type to cached if now cached
292 switch (page_type) { 278 switch (page_type) {
293 case PageType::Unmapped: 279 case Common::PageType::Unmapped:
294 // It is not necessary for a process to have this region mapped into its address 280 // It is not necessary for a process to have this region mapped into its address
295 // space, for example, a system module need not have a VRAM mapping. 281 // space, for example, a system module need not have a VRAM mapping.
296 break; 282 break;
297 case PageType::Memory: 283 case Common::PageType::Memory:
298 page_type = PageType::RasterizerCachedMemory; 284 page_type = Common::PageType::RasterizerCachedMemory;
299 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; 285 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
300 break; 286 break;
301 case PageType::RasterizerCachedMemory: 287 case Common::PageType::RasterizerCachedMemory:
302 // There can be more than one GPU region mapped per CPU region, so it's common that 288 // There can be more than one GPU region mapped per CPU region, so it's common that
303 // this area is already marked as cached. 289 // this area is already marked as cached.
304 break; 290 break;
@@ -308,23 +294,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
308 } else { 294 } else {
309 // Switch page type to uncached if now uncached 295 // Switch page type to uncached if now uncached
310 switch (page_type) { 296 switch (page_type) {
311 case PageType::Unmapped: 297 case Common::PageType::Unmapped:
312 // It is not necessary for a process to have this region mapped into its address 298 // It is not necessary for a process to have this region mapped into its address
313 // space, for example, a system module need not have a VRAM mapping. 299 // space, for example, a system module need not have a VRAM mapping.
314 break; 300 break;
315 case PageType::Memory: 301 case Common::PageType::Memory:
316 // There can be more than one GPU region mapped per CPU region, so it's common that 302 // There can be more than one GPU region mapped per CPU region, so it's common that
317 // this area is already unmarked as cached. 303 // this area is already unmarked as cached.
318 break; 304 break;
319 case PageType::RasterizerCachedMemory: { 305 case Common::PageType::RasterizerCachedMemory: {
320 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); 306 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
321 if (pointer == nullptr) { 307 if (pointer == nullptr) {
322 // It's possible that this function has been called while updating the pagetable 308 // It's possible that this function has been called while updating the pagetable
323 // after unmapping a VMA. In that case the underlying VMA will no longer exist, 309 // after unmapping a VMA. In that case the underlying VMA will no longer exist,
324 // and we should just leave the pagetable entry blank. 310 // and we should just leave the pagetable entry blank.
325 page_type = PageType::Unmapped; 311 page_type = Common::PageType::Unmapped;
326 } else { 312 } else {
327 page_type = PageType::Memory; 313 page_type = Common::PageType::Memory;
328 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; 314 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
329 } 315 }
330 break; 316 break;
@@ -336,47 +322,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
336 } 322 }
337} 323}
338 324
339void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
340 auto& system_instance = Core::System::GetInstance();
341
342 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
343 // null here
344 if (!system_instance.IsPoweredOn()) {
345 return;
346 }
347
348 const VAddr end = start + size;
349
350 const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
351 if (start >= region_end || end <= region_start) {
352 // No overlap with region
353 return;
354 }
355
356 const VAddr overlap_start = std::max(start, region_start);
357 const VAddr overlap_end = std::min(end, region_end);
358 const VAddr overlap_size = overlap_end - overlap_start;
359
360 auto& rasterizer = system_instance.Renderer().Rasterizer();
361 switch (mode) {
362 case FlushMode::Flush:
363 rasterizer.FlushRegion(overlap_start, overlap_size);
364 break;
365 case FlushMode::Invalidate:
366 rasterizer.InvalidateRegion(overlap_start, overlap_size);
367 break;
368 case FlushMode::FlushAndInvalidate:
369 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
370 break;
371 }
372 };
373
374 const auto& vm_manager = Core::CurrentProcess()->VMManager();
375
376 CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
377 CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
378}
379
380u8 Read8(const VAddr addr) { 325u8 Read8(const VAddr addr) {
381 return Read<u8>(addr); 326 return Read<u8>(addr);
382} 327}
@@ -407,24 +352,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
407 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 352 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
408 353
409 switch (page_table.attributes[page_index]) { 354 switch (page_table.attributes[page_index]) {
410 case PageType::Unmapped: { 355 case Common::PageType::Unmapped: {
411 LOG_ERROR(HW_Memory, 356 LOG_ERROR(HW_Memory,
412 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 357 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
413 current_vaddr, src_addr, size); 358 current_vaddr, src_addr, size);
414 std::memset(dest_buffer, 0, copy_amount); 359 std::memset(dest_buffer, 0, copy_amount);
415 break; 360 break;
416 } 361 }
417 case PageType::Memory: { 362 case Common::PageType::Memory: {
418 DEBUG_ASSERT(page_table.pointers[page_index]); 363 DEBUG_ASSERT(page_table.pointers[page_index]);
419 364
420 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 365 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
421 std::memcpy(dest_buffer, src_ptr, copy_amount); 366 std::memcpy(dest_buffer, src_ptr, copy_amount);
422 break; 367 break;
423 } 368 }
424 case PageType::RasterizerCachedMemory: { 369 case Common::PageType::RasterizerCachedMemory: {
425 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 370 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
426 FlushMode::Flush); 371 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
427 std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); 372 std::memcpy(dest_buffer, host_ptr, copy_amount);
428 break; 373 break;
429 } 374 }
430 default: 375 default:
@@ -471,23 +416,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
471 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 416 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
472 417
473 switch (page_table.attributes[page_index]) { 418 switch (page_table.attributes[page_index]) {
474 case PageType::Unmapped: { 419 case Common::PageType::Unmapped: {
475 LOG_ERROR(HW_Memory, 420 LOG_ERROR(HW_Memory,
476 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 421 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
477 current_vaddr, dest_addr, size); 422 current_vaddr, dest_addr, size);
478 break; 423 break;
479 } 424 }
480 case PageType::Memory: { 425 case Common::PageType::Memory: {
481 DEBUG_ASSERT(page_table.pointers[page_index]); 426 DEBUG_ASSERT(page_table.pointers[page_index]);
482 427
483 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 428 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
484 std::memcpy(dest_ptr, src_buffer, copy_amount); 429 std::memcpy(dest_ptr, src_buffer, copy_amount);
485 break; 430 break;
486 } 431 }
487 case PageType::RasterizerCachedMemory: { 432 case Common::PageType::RasterizerCachedMemory: {
488 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 433 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
489 FlushMode::Invalidate); 434 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
490 std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); 435 std::memcpy(host_ptr, src_buffer, copy_amount);
491 break; 436 break;
492 } 437 }
493 default: 438 default:
@@ -517,23 +462,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
517 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 462 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
518 463
519 switch (page_table.attributes[page_index]) { 464 switch (page_table.attributes[page_index]) {
520 case PageType::Unmapped: { 465 case Common::PageType::Unmapped: {
521 LOG_ERROR(HW_Memory, 466 LOG_ERROR(HW_Memory,
522 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 467 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
523 current_vaddr, dest_addr, size); 468 current_vaddr, dest_addr, size);
524 break; 469 break;
525 } 470 }
526 case PageType::Memory: { 471 case Common::PageType::Memory: {
527 DEBUG_ASSERT(page_table.pointers[page_index]); 472 DEBUG_ASSERT(page_table.pointers[page_index]);
528 473
529 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 474 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
530 std::memset(dest_ptr, 0, copy_amount); 475 std::memset(dest_ptr, 0, copy_amount);
531 break; 476 break;
532 } 477 }
533 case PageType::RasterizerCachedMemory: { 478 case Common::PageType::RasterizerCachedMemory: {
534 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 479 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
535 FlushMode::Invalidate); 480 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
536 std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); 481 std::memset(host_ptr, 0, copy_amount);
537 break; 482 break;
538 } 483 }
539 default: 484 default:
@@ -559,23 +504,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
559 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 504 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
560 505
561 switch (page_table.attributes[page_index]) { 506 switch (page_table.attributes[page_index]) {
562 case PageType::Unmapped: { 507 case Common::PageType::Unmapped: {
563 LOG_ERROR(HW_Memory, 508 LOG_ERROR(HW_Memory,
564 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 509 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
565 current_vaddr, src_addr, size); 510 current_vaddr, src_addr, size);
566 ZeroBlock(process, dest_addr, copy_amount); 511 ZeroBlock(process, dest_addr, copy_amount);
567 break; 512 break;
568 } 513 }
569 case PageType::Memory: { 514 case Common::PageType::Memory: {
570 DEBUG_ASSERT(page_table.pointers[page_index]); 515 DEBUG_ASSERT(page_table.pointers[page_index]);
571 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 516 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
572 WriteBlock(process, dest_addr, src_ptr, copy_amount); 517 WriteBlock(process, dest_addr, src_ptr, copy_amount);
573 break; 518 break;
574 } 519 }
575 case PageType::RasterizerCachedMemory: { 520 case Common::PageType::RasterizerCachedMemory: {
576 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 521 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
577 FlushMode::Flush); 522 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
578 WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); 523 WriteBlock(process, dest_addr, host_ptr, copy_amount);
579 break; 524 break;
580 } 525 }
581 default: 526 default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..6845f5fe1 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -6,11 +6,11 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include <string> 8#include <string>
9#include <tuple>
10#include <vector>
11#include <boost/icl/interval_map.hpp>
12#include "common/common_types.h" 9#include "common/common_types.h"
13#include "core/memory_hook.h" 10
11namespace Common {
12struct PageTable;
13}
14 14
15namespace Kernel { 15namespace Kernel {
16class Process; 16class Process;
@@ -26,83 +26,8 @@ constexpr std::size_t PAGE_BITS = 12;
26constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS; 26constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
27constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 27constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
28 28
29enum class PageType : u8 {
30 /// Page is unmapped and should cause an access error.
31 Unmapped,
32 /// Page is mapped to regular memory. This is the only type you can get pointers to.
33 Memory,
34 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
35 /// invalidation
36 RasterizerCachedMemory,
37 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
38 Special,
39};
40
41struct SpecialRegion {
42 enum class Type {
43 DebugHook,
44 IODevice,
45 } type;
46
47 MemoryHookPointer handler;
48
49 bool operator<(const SpecialRegion& other) const {
50 return std::tie(type, handler) < std::tie(other.type, other.handler);
51 }
52
53 bool operator==(const SpecialRegion& other) const {
54 return std::tie(type, handler) == std::tie(other.type, other.handler);
55 }
56};
57
58/**
59 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
60 * mimics the way a real CPU page table works.
61 */
62struct PageTable {
63 explicit PageTable();
64 explicit PageTable(std::size_t address_space_width_in_bits);
65 ~PageTable();
66
67 /**
68 * Resizes the page table to be able to accomodate enough pages within
69 * a given address space.
70 *
71 * @param address_space_width_in_bits The address size width in bits.
72 */
73 void Resize(std::size_t address_space_width_in_bits);
74
75 /**
76 * Vector of memory pointers backing each page. An entry can only be non-null if the
77 * corresponding entry in the `attributes` vector is of type `Memory`.
78 */
79 std::vector<u8*> pointers;
80
81 /**
82 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
83 * of type `Special`.
84 */
85 boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
86
87 /**
88 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
89 * the corresponding entry in `pointers` MUST be set to null.
90 */
91 std::vector<PageType> attributes;
92};
93
94/// Virtual user-space memory regions 29/// Virtual user-space memory regions
95enum : VAddr { 30enum : VAddr {
96 /// Read-only page containing kernel and system configuration values.
97 CONFIG_MEMORY_VADDR = 0x1FF80000,
98 CONFIG_MEMORY_SIZE = 0x00001000,
99 CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE,
100
101 /// Usually read-only page containing mostly values read from hardware.
102 SHARED_PAGE_VADDR = 0x1FF81000,
103 SHARED_PAGE_SIZE = 0x00001000,
104 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
105
106 /// TLS (Thread-Local Storage) related. 31 /// TLS (Thread-Local Storage) related.
107 TLS_ENTRY_SIZE = 0x200, 32 TLS_ENTRY_SIZE = 0x200,
108 33
@@ -115,9 +40,8 @@ enum : VAddr {
115 KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, 40 KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
116}; 41};
117 42
118/// Currently active page table 43/// Changes the currently active page table.
119void SetCurrentPageTable(PageTable* page_table); 44void SetCurrentPageTable(Common::PageTable* page_table);
120PageTable* GetCurrentPageTable();
121 45
122/// Determines if the given VAddr is valid for the specified process. 46/// Determines if the given VAddr is valid for the specified process.
123bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); 47bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +85,4 @@ enum class FlushMode {
161 */ 85 */
162void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); 86void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
163 87
164/**
165 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
166 * address region.
167 */
168void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
169
170} // namespace Memory 88} // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/memory_hook.h" 8#include "common/memory_hook.h"
9
10namespace Common {
11struct PageTable;
12}
9 13
10namespace Memory { 14namespace Memory {
11 15
@@ -17,7 +21,7 @@ namespace Memory {
17 * @param size The amount of bytes to map. Must be page-aligned. 21 * @param size The amount of bytes to map. Must be page-aligned.
18 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`. 22 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
19 */ 23 */
20void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target); 24void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
21 25
22/** 26/**
23 * Maps a region of the emulated process address space as a IO region. 27 * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
26 * @param size The amount of bytes to map. Must be page-aligned. 30 * @param size The amount of bytes to map. Must be page-aligned.
27 * @param mmio_handler The handler that backs the mapping. 31 * @param mmio_handler The handler that backs the mapping.
28 */ 32 */
29void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler); 33void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
34 Common::MemoryHookPointer mmio_handler);
30 35
31void UnmapRegion(PageTable& page_table, VAddr base, u64 size); 36void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
32 37
33void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 38void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
34void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 39 Common::MemoryHookPointer hook);
40void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
41 Common::MemoryHookPointer hook);
35 42
36} // namespace Memory 43} // namespace Memory
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index c716a462b..4afd6c8a3 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -18,13 +18,13 @@ using std::chrono::microseconds;
18namespace Core { 18namespace Core {
19 19
20void PerfStats::BeginSystemFrame() { 20void PerfStats::BeginSystemFrame() {
21 std::lock_guard<std::mutex> lock(object_mutex); 21 std::lock_guard lock{object_mutex};
22 22
23 frame_begin = Clock::now(); 23 frame_begin = Clock::now();
24} 24}
25 25
26void PerfStats::EndSystemFrame() { 26void PerfStats::EndSystemFrame() {
27 std::lock_guard<std::mutex> lock(object_mutex); 27 std::lock_guard lock{object_mutex};
28 28
29 auto frame_end = Clock::now(); 29 auto frame_end = Clock::now();
30 accumulated_frametime += frame_end - frame_begin; 30 accumulated_frametime += frame_end - frame_begin;
@@ -35,13 +35,13 @@ void PerfStats::EndSystemFrame() {
35} 35}
36 36
37void PerfStats::EndGameFrame() { 37void PerfStats::EndGameFrame() {
38 std::lock_guard<std::mutex> lock(object_mutex); 38 std::lock_guard lock{object_mutex};
39 39
40 game_frames += 1; 40 game_frames += 1;
41} 41}
42 42
43PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) { 43PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
44 std::lock_guard<std::mutex> lock(object_mutex); 44 std::lock_guard lock{object_mutex};
45 45
46 const auto now = Clock::now(); 46 const auto now = Clock::now();
47 // Walltime elapsed since stats were reset 47 // Walltime elapsed since stats were reset
@@ -67,7 +67,7 @@ PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us
67} 67}
68 68
69double PerfStats::GetLastFrameTimeScale() { 69double PerfStats::GetLastFrameTimeScale() {
70 std::lock_guard<std::mutex> lock(object_mutex); 70 std::lock_guard lock{object_mutex};
71 71
72 constexpr double FRAME_LENGTH = 1.0 / 60; 72 constexpr double FRAME_LENGTH = 1.0 / 60;
73 return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH; 73 return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 26fcd3405..6d32ebea3 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -74,4 +74,35 @@ void Apply() {
74 Service::HID::ReloadInputDevices(); 74 Service::HID::ReloadInputDevices();
75} 75}
76 76
77template <typename T>
78void LogSetting(const std::string& name, const T& value) {
79 LOG_INFO(Config, "{}: {}", name, value);
80}
81
82void LogSettings() {
83 LOG_INFO(Config, "yuzu Configuration:");
84 LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
86 LogSetting("System_CurrentUser", Settings::values.current_user);
87 LogSetting("System_LanguageIndex", Settings::values.language_index);
88 LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit);
89 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
92 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
93 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
95 LogSetting("Renderer_UseAsynchronousGpuEmulation",
96 Settings::values.use_asynchronous_gpu_emulation);
97 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
98 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
99 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
100 LogSetting("DataStorage_UseVirtualSd", Settings::values.use_virtual_sd);
101 LogSetting("DataStorage_NandDir", Settings::values.nand_dir);
102 LogSetting("DataStorage_SdmcDir", Settings::values.sdmc_dir);
103 LogSetting("Debugging_UseGdbstub", Settings::values.use_gdbstub);
104 LogSetting("Debugging_GdbstubPort", Settings::values.gdbstub_port);
105 LogSetting("Debugging_ProgramArgs", Settings::values.program_args);
106}
107
77} // namespace Settings 108} // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index 29ce98983..d543eb32f 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -349,7 +349,6 @@ struct TouchscreenInput {
349struct Values { 349struct Values {
350 // System 350 // System
351 bool use_docked_mode; 351 bool use_docked_mode;
352 bool enable_nfc;
353 std::optional<u32> rng_seed; 352 std::optional<u32> rng_seed;
354 // Measured in seconds since epoch 353 // Measured in seconds since epoch
355 std::optional<std::chrono::seconds> custom_rtc; 354 std::optional<std::chrono::seconds> custom_rtc;
@@ -391,7 +390,9 @@ struct Values {
391 float resolution_factor; 390 float resolution_factor;
392 bool use_frame_limit; 391 bool use_frame_limit;
393 u16 frame_limit; 392 u16 frame_limit;
393 bool use_disk_shader_cache;
394 bool use_accurate_gpu_emulation; 394 bool use_accurate_gpu_emulation;
395 bool use_asynchronous_gpu_emulation;
395 396
396 float bg_red; 397 float bg_red;
397 float bg_green; 398 float bg_green;
@@ -425,4 +426,5 @@ struct Values {
425} extern values; 426} extern values;
426 427
427void Apply(); 428void Apply();
429void LogSettings();
428} // namespace Settings 430} // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 09ed74d78..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -158,8 +158,12 @@ TelemetrySession::TelemetrySession() {
158 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit", 158 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
159 Settings::values.use_frame_limit); 159 Settings::values.use_frame_limit);
160 AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit); 160 AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
161 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
162 Settings::values.use_disk_shader_cache);
161 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
162 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
163 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
164 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
165} 169}
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 1c7db28c0..5b4e032bd 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -7,15 +7,18 @@ add_library(input_common STATIC
7 main.h 7 main.h
8 motion_emu.cpp 8 motion_emu.cpp
9 motion_emu.h 9 motion_emu.h
10 10 sdl/sdl.cpp
11 $<$<BOOL:${SDL2_FOUND}>:sdl/sdl.cpp sdl/sdl.h> 11 sdl/sdl.h
12) 12)
13 13
14create_target_directory_groups(input_common)
15
16target_link_libraries(input_common PUBLIC core PRIVATE common)
17
18if(SDL2_FOUND) 14if(SDL2_FOUND)
15 target_sources(input_common PRIVATE
16 sdl/sdl_impl.cpp
17 sdl/sdl_impl.h
18 )
19 target_link_libraries(input_common PRIVATE SDL2) 19 target_link_libraries(input_common PRIVATE SDL2)
20 target_compile_definitions(input_common PRIVATE HAVE_SDL2) 20 target_compile_definitions(input_common PRIVATE HAVE_SDL2)
21endif() 21endif()
22
23create_target_directory_groups(input_common)
24target_link_libraries(input_common PUBLIC core PRIVATE common)
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 525fe6abc..078374be5 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -36,18 +36,18 @@ struct KeyButtonPair {
36class KeyButtonList { 36class KeyButtonList {
37public: 37public:
38 void AddKeyButton(int key_code, KeyButton* key_button) { 38 void AddKeyButton(int key_code, KeyButton* key_button) {
39 std::lock_guard<std::mutex> guard(mutex); 39 std::lock_guard guard{mutex};
40 list.push_back(KeyButtonPair{key_code, key_button}); 40 list.push_back(KeyButtonPair{key_code, key_button});
41 } 41 }
42 42
43 void RemoveKeyButton(const KeyButton* key_button) { 43 void RemoveKeyButton(const KeyButton* key_button) {
44 std::lock_guard<std::mutex> guard(mutex); 44 std::lock_guard guard{mutex};
45 list.remove_if( 45 list.remove_if(
46 [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; }); 46 [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; });
47 } 47 }
48 48
49 void ChangeKeyStatus(int key_code, bool pressed) { 49 void ChangeKeyStatus(int key_code, bool pressed) {
50 std::lock_guard<std::mutex> guard(mutex); 50 std::lock_guard guard{mutex};
51 for (const KeyButtonPair& pair : list) { 51 for (const KeyButtonPair& pair : list) {
52 if (pair.key_code == key_code) 52 if (pair.key_code == key_code)
53 pair.key_button->status.store(pressed); 53 pair.key_button->status.store(pressed);
@@ -55,7 +55,7 @@ public:
55 } 55 }
56 56
57 void ChangeAllKeyStatus(bool pressed) { 57 void ChangeAllKeyStatus(bool pressed) {
58 std::lock_guard<std::mutex> guard(mutex); 58 std::lock_guard guard{mutex};
59 for (const KeyButtonPair& pair : list) { 59 for (const KeyButtonPair& pair : list) {
60 pair.key_button->status.store(pressed); 60 pair.key_button->status.store(pressed);
61 } 61 }
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 37f572853..8e66c1b15 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -17,10 +17,7 @@ namespace InputCommon {
17 17
18static std::shared_ptr<Keyboard> keyboard; 18static std::shared_ptr<Keyboard> keyboard;
19static std::shared_ptr<MotionEmu> motion_emu; 19static std::shared_ptr<MotionEmu> motion_emu;
20 20static std::unique_ptr<SDL::State> sdl;
21#ifdef HAVE_SDL2
22static std::thread poll_thread;
23#endif
24 21
25void Init() { 22void Init() {
26 keyboard = std::make_shared<Keyboard>(); 23 keyboard = std::make_shared<Keyboard>();
@@ -30,15 +27,7 @@ void Init() {
30 motion_emu = std::make_shared<MotionEmu>(); 27 motion_emu = std::make_shared<MotionEmu>();
31 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu); 28 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu);
32 29
33#ifdef HAVE_SDL2 30 sdl = SDL::Init();
34 SDL::Init();
35#endif
36}
37
38void StartJoystickEventHandler() {
39#ifdef HAVE_SDL2
40 poll_thread = std::thread(SDL::PollLoop);
41#endif
42} 31}
43 32
44void Shutdown() { 33void Shutdown() {
@@ -47,11 +36,7 @@ void Shutdown() {
47 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button"); 36 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button");
48 Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); 37 Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
49 motion_emu.reset(); 38 motion_emu.reset();
50 39 sdl.reset();
51#ifdef HAVE_SDL2
52 SDL::Shutdown();
53 poll_thread.join();
54#endif
55} 40}
56 41
57Keyboard* GetKeyboard() { 42Keyboard* GetKeyboard() {
@@ -88,7 +73,7 @@ namespace Polling {
88 73
89std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) { 74std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) {
90#ifdef HAVE_SDL2 75#ifdef HAVE_SDL2
91 return SDL::Polling::GetPollers(type); 76 return sdl->GetPollers(type);
92#else 77#else
93 return {}; 78 return {};
94#endif 79#endif
diff --git a/src/input_common/main.h b/src/input_common/main.h
index 9eb13106e..77a0ce90b 100644
--- a/src/input_common/main.h
+++ b/src/input_common/main.h
@@ -20,8 +20,6 @@ void Init();
20/// Deregisters all built-in input device factories and shuts them down. 20/// Deregisters all built-in input device factories and shuts them down.
21void Shutdown(); 21void Shutdown();
22 22
23void StartJoystickEventHandler();
24
25class Keyboard; 23class Keyboard;
26 24
27/// Gets the keyboard button device factory. 25/// Gets the keyboard button device factory.
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..868251628 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,32 +32,32 @@ public:
32 } 32 }
33 33
34 void BeginTilt(int x, int y) { 34 void BeginTilt(int x, int y) {
35 mouse_origin = Math::MakeVec(x, y); 35 mouse_origin = Common::MakeVec(x, y);
36 is_tilting = true; 36 is_tilting = true;
37 } 37 }
38 38
39 void Tilt(int x, int y) { 39 void Tilt(int x, int y) {
40 auto mouse_move = Math::MakeVec(x, y) - mouse_origin; 40 auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
41 if (is_tilting) { 41 if (is_tilting) {
42 std::lock_guard<std::mutex> guard(tilt_mutex); 42 std::lock_guard guard{tilt_mutex};
43 if (mouse_move.x == 0 && mouse_move.y == 0) { 43 if (mouse_move.x == 0 && mouse_move.y == 0) {
44 tilt_angle = 0; 44 tilt_angle = 0;
45 } else { 45 } else {
46 tilt_direction = mouse_move.Cast<float>(); 46 tilt_direction = mouse_move.Cast<float>();
47 tilt_angle = 47 tilt_angle =
48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); 48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
49 } 49 }
50 } 50 }
51 } 51 }
52 52
53 void EndTilt() { 53 void EndTilt() {
54 std::lock_guard<std::mutex> guard(tilt_mutex); 54 std::lock_guard guard{tilt_mutex};
55 tilt_angle = 0; 55 tilt_angle = 0;
56 is_tilting = false; 56 is_tilting = false;
57 } 57 }
58 58
59 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { 59 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
60 std::lock_guard<std::mutex> guard(status_mutex); 60 std::lock_guard guard{status_mutex};
61 return status; 61 return status;
62 } 62 }
63 63
@@ -66,17 +66,17 @@ private:
66 const std::chrono::steady_clock::duration update_duration; 66 const std::chrono::steady_clock::duration update_duration;
67 const float sensitivity; 67 const float sensitivity;
68 68
69 Math::Vec2<int> mouse_origin; 69 Common::Vec2<int> mouse_origin;
70 70
71 std::mutex tilt_mutex; 71 std::mutex tilt_mutex;
72 Math::Vec2<float> tilt_direction; 72 Common::Vec2<float> tilt_direction;
73 float tilt_angle = 0; 73 float tilt_angle = 0;
74 74
75 bool is_tilting = false; 75 bool is_tilting = false;
76 76
77 Common::Event shutdown_event; 77 Common::Event shutdown_event;
78 78
79 std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; 79 std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
80 std::mutex status_mutex; 80 std::mutex status_mutex;
81 81
82 // Note: always keep the thread declaration at the end so that other objects are initialized 82 // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,29 +85,29 @@ private:
85 85
86 void MotionEmuThread() { 86 void MotionEmuThread() {
87 auto update_time = std::chrono::steady_clock::now(); 87 auto update_time = std::chrono::steady_clock::now();
88 Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); 88 Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
89 Math::Quaternion<float> old_q; 89 Common::Quaternion<float> old_q;
90 90
91 while (!shutdown_event.WaitUntil(update_time)) { 91 while (!shutdown_event.WaitUntil(update_time)) {
92 update_time += update_duration; 92 update_time += update_duration;
93 old_q = q; 93 old_q = q;
94 94
95 { 95 {
96 std::lock_guard<std::mutex> guard(tilt_mutex); 96 std::lock_guard guard{tilt_mutex};
97 97
98 // Find the quaternion describing current 3DS tilting 98 // Find the quaternion describing current 3DS tilting
99 q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), 99 q = Common::MakeQuaternion(
100 tilt_angle); 100 Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
101 } 101 }
102 102
103 auto inv_q = q.Inverse(); 103 auto inv_q = q.Inverse();
104 104
105 // Set the gravity vector in world space 105 // Set the gravity vector in world space
106 auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); 106 auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
107 107
108 // Find the angular rate vector in world space 108 // Find the angular rate vector in world space
109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2; 109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
110 angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; 110 angular_rate *= 1000 / update_millisecond / Common::PI * 180;
111 111
112 // Transform the two vectors from world space to 3DS space 112 // Transform the two vectors from world space to 3DS space
113 gravity = QuaternionRotate(inv_q, gravity); 113 gravity = QuaternionRotate(inv_q, gravity);
@@ -115,7 +115,7 @@ private:
115 115
116 // Update the sensor state 116 // Update the sensor state
117 { 117 {
118 std::lock_guard<std::mutex> guard(status_mutex); 118 std::lock_guard guard{status_mutex};
119 status = std::make_tuple(gravity, angular_rate); 119 status = std::make_tuple(gravity, angular_rate);
120 } 120 }
121 } 121 }
@@ -131,7 +131,7 @@ public:
131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); 131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
132 } 132 }
133 133
134 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { 134 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
135 return device->GetStatus(); 135 return device->GetStatus();
136 } 136 }
137 137
diff --git a/src/input_common/sdl/sdl.cpp b/src/input_common/sdl/sdl.cpp
index faf3c1fa3..644db3448 100644
--- a/src/input_common/sdl/sdl.cpp
+++ b/src/input_common/sdl/sdl.cpp
@@ -1,631 +1,19 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "input_common/main.h"
24#include "input_common/sdl/sdl.h" 5#include "input_common/sdl/sdl.h"
6#ifdef HAVE_SDL2
7#include "input_common/sdl/sdl_impl.h"
8#endif
25 9
26namespace InputCommon { 10namespace InputCommon::SDL {
27 11
28namespace SDL { 12std::unique_ptr<State> Init() {
29 13#ifdef HAVE_SDL2
30class SDLJoystick; 14 return std::make_unique<SDLState>();
31class SDLButtonFactory; 15#else
32class SDLAnalogFactory; 16 return std::make_unique<NullState>();
33 17#endif
34/// Map of GUID of a list of corresponding virtual Joysticks
35static std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
36static std::mutex joystick_map_mutex;
37
38static std::shared_ptr<SDLButtonFactory> button_factory;
39static std::shared_ptr<SDLAnalogFactory> analog_factory;
40
41/// Used by the Pollers during config
42static std::atomic<bool> polling;
43static Common::SPSCQueue<SDL_Event> event_queue;
44
45static std::atomic<bool> initialized = false;
46
47static std::string GetGUID(SDL_Joystick* joystick) {
48 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
49 char guid_str[33];
50 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
51 return guid_str;
52}
53
54class SDLJoystick {
55public:
56 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
57 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
58 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
59
60 void SetButton(int button, bool value) {
61 std::lock_guard<std::mutex> lock(mutex);
62 state.buttons[button] = value;
63 }
64
65 bool GetButton(int button) const {
66 std::lock_guard<std::mutex> lock(mutex);
67 return state.buttons.at(button);
68 }
69
70 void SetAxis(int axis, Sint16 value) {
71 std::lock_guard<std::mutex> lock(mutex);
72 state.axes[axis] = value;
73 }
74
75 float GetAxis(int axis) const {
76 std::lock_guard<std::mutex> lock(mutex);
77 return state.axes.at(axis) / 32767.0f;
78 }
79
80 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
81 float x = GetAxis(axis_x);
82 float y = GetAxis(axis_y);
83 y = -y; // 3DS uses an y-axis inverse from SDL
84
85 // Make sure the coordinates are in the unit circle,
86 // otherwise normalize it.
87 float r = x * x + y * y;
88 if (r > 1.0f) {
89 r = std::sqrt(r);
90 x /= r;
91 y /= r;
92 }
93
94 return std::make_tuple(x, y);
95 }
96
97 void SetHat(int hat, Uint8 direction) {
98 std::lock_guard<std::mutex> lock(mutex);
99 state.hats[hat] = direction;
100 }
101
102 bool GetHatDirection(int hat, Uint8 direction) const {
103 std::lock_guard<std::mutex> lock(mutex);
104 return (state.hats.at(hat) & direction) != 0;
105 }
106 /**
107 * The guid of the joystick
108 */
109 const std::string& GetGUID() const {
110 return guid;
111 }
112
113 /**
114 * The number of joystick from the same type that were connected before this joystick
115 */
116 int GetPort() const {
117 return port;
118 }
119
120 SDL_Joystick* GetSDLJoystick() const {
121 return sdl_joystick.get();
122 }
123
124 void SetSDLJoystick(SDL_Joystick* joystick,
125 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
126 sdl_joystick =
127 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
128 }
129
130private:
131 struct State {
132 std::unordered_map<int, bool> buttons;
133 std::unordered_map<int, Sint16> axes;
134 std::unordered_map<int, Uint8> hats;
135 } state;
136 std::string guid;
137 int port;
138 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
139 mutable std::mutex mutex;
140};
141
142/**
143 * Get the nth joystick with the corresponding GUID
144 */
145static std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port) {
146 std::lock_guard<std::mutex> lock(joystick_map_mutex);
147 const auto it = joystick_map.find(guid);
148 if (it != joystick_map.end()) {
149 while (it->second.size() <= port) {
150 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
151 [](SDL_Joystick*) {});
152 it->second.emplace_back(std::move(joystick));
153 }
154 return it->second[port];
155 }
156 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
157 return joystick_map[guid].emplace_back(std::move(joystick));
158}
159
160/**
161 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
162 * it to a SDLJoystick with the same guid and that port
163 */
164static std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
165 std::lock_guard<std::mutex> lock(joystick_map_mutex);
166 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
167 const std::string guid = GetGUID(sdl_joystick);
168 auto map_it = joystick_map.find(guid);
169 if (map_it != joystick_map.end()) {
170 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
171 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
172 return sdl_joystick == joystick->GetSDLJoystick();
173 });
174 if (vec_it != map_it->second.end()) {
175 // This is the common case: There is already an existing SDL_Joystick maped to a
176 // SDLJoystick. return the SDLJoystick
177 return *vec_it;
178 }
179 // Search for a SDLJoystick without a mapped SDL_Joystick...
180 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
181 [](const std::shared_ptr<SDLJoystick>& joystick) {
182 return !joystick->GetSDLJoystick();
183 });
184 if (nullptr_it != map_it->second.end()) {
185 // ... and map it
186 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
187 return *nullptr_it;
188 }
189 // There is no SDLJoystick without a mapped SDL_Joystick
190 // Create a new SDLJoystick
191 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
192 return map_it->second.emplace_back(std::move(joystick));
193 }
194 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
195 return joystick_map[guid].emplace_back(std::move(joystick));
196}
197
198void InitJoystick(int joystick_index) {
199 std::lock_guard<std::mutex> lock(joystick_map_mutex);
200 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
201 if (!sdl_joystick) {
202 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
203 return;
204 }
205 std::string guid = GetGUID(sdl_joystick);
206 if (joystick_map.find(guid) == joystick_map.end()) {
207 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
208 joystick_map[guid].emplace_back(std::move(joystick));
209 return;
210 }
211 auto& joystick_guid_list = joystick_map[guid];
212 const auto it = std::find_if(
213 joystick_guid_list.begin(), joystick_guid_list.end(),
214 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
215 if (it != joystick_guid_list.end()) {
216 (*it)->SetSDLJoystick(sdl_joystick);
217 return;
218 }
219 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
220 joystick_guid_list.emplace_back(std::move(joystick));
221}
222
223void CloseJoystick(SDL_Joystick* sdl_joystick) {
224 std::lock_guard<std::mutex> lock(joystick_map_mutex);
225 std::string guid = GetGUID(sdl_joystick);
226 // This call to guid is save since the joystick is guranteed to be in that map
227 auto& joystick_guid_list = joystick_map[guid];
228 const auto joystick_it =
229 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
230 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
231 return joystick->GetSDLJoystick() == sdl_joystick;
232 });
233 (*joystick_it)->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
234}
235
236void HandleGameControllerEvent(const SDL_Event& event) {
237 switch (event.type) {
238 case SDL_JOYBUTTONUP: {
239 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
240 if (joystick) {
241 joystick->SetButton(event.jbutton.button, false);
242 }
243 break;
244 }
245 case SDL_JOYBUTTONDOWN: {
246 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
247 if (joystick) {
248 joystick->SetButton(event.jbutton.button, true);
249 }
250 break;
251 }
252 case SDL_JOYHATMOTION: {
253 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
254 if (joystick) {
255 joystick->SetHat(event.jhat.hat, event.jhat.value);
256 }
257 break;
258 }
259 case SDL_JOYAXISMOTION: {
260 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
261 if (joystick) {
262 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
263 }
264 break;
265 }
266 case SDL_JOYDEVICEREMOVED:
267 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
268 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
269 break;
270 case SDL_JOYDEVICEADDED:
271 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
272 InitJoystick(event.jdevice.which);
273 break;
274 }
275}
276
277void CloseSDLJoysticks() {
278 std::lock_guard<std::mutex> lock(joystick_map_mutex);
279 joystick_map.clear();
280}
281
282void PollLoop() {
283 if (SDL_Init(SDL_INIT_JOYSTICK) < 0) {
284 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
285 return;
286 }
287
288 SDL_Event event;
289 while (initialized) {
290 // Wait for 10 ms or until an event happens
291 if (SDL_WaitEventTimeout(&event, 10)) {
292 // Don't handle the event if we are configuring
293 if (polling) {
294 event_queue.Push(event);
295 } else {
296 HandleGameControllerEvent(event);
297 }
298 }
299 }
300 CloseSDLJoysticks();
301 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
302}
303
304class SDLButton final : public Input::ButtonDevice {
305public:
306 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
307 : joystick(std::move(joystick_)), button(button_) {}
308
309 bool GetStatus() const override {
310 return joystick->GetButton(button);
311 }
312
313private:
314 std::shared_ptr<SDLJoystick> joystick;
315 int button;
316};
317
318class SDLDirectionButton final : public Input::ButtonDevice {
319public:
320 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
321 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
322
323 bool GetStatus() const override {
324 return joystick->GetHatDirection(hat, direction);
325 }
326
327private:
328 std::shared_ptr<SDLJoystick> joystick;
329 int hat;
330 Uint8 direction;
331};
332
333class SDLAxisButton final : public Input::ButtonDevice {
334public:
335 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
336 bool trigger_if_greater_)
337 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
338 trigger_if_greater(trigger_if_greater_) {}
339
340 bool GetStatus() const override {
341 float axis_value = joystick->GetAxis(axis);
342 if (trigger_if_greater)
343 return axis_value > threshold;
344 return axis_value < threshold;
345 }
346
347private:
348 std::shared_ptr<SDLJoystick> joystick;
349 int axis;
350 float threshold;
351 bool trigger_if_greater;
352};
353
354class SDLAnalog final : public Input::AnalogDevice {
355public:
356 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_)
357 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_) {}
358
359 std::tuple<float, float> GetStatus() const override {
360 return joystick->GetAnalog(axis_x, axis_y);
361 }
362
363private:
364 std::shared_ptr<SDLJoystick> joystick;
365 int axis_x;
366 int axis_y;
367};
368
369/// A button device factory that creates button devices from SDL joystick
370class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
371public:
372 /**
373 * Creates a button device from a joystick button
374 * @param params contains parameters for creating the device:
375 * - "guid": the guid of the joystick to bind
376 * - "port": the nth joystick of the same type to bind
377 * - "button"(optional): the index of the button to bind
378 * - "hat"(optional): the index of the hat to bind as direction buttons
379 * - "axis"(optional): the index of the axis to bind
380 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
381 * "down", "left" or "right"
382 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
383 * triggered if the axis value crosses
384 * - "direction"(only used for axis): "+" means the button is triggered when the axis
385 * value is greater than the threshold; "-" means the button is triggered when the axis
386 * value is smaller than the threshold
387 */
388 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
389 const std::string guid = params.Get("guid", "0");
390 const int port = params.Get("port", 0);
391
392 auto joystick = GetSDLJoystickByGUID(guid, port);
393
394 if (params.Has("hat")) {
395 const int hat = params.Get("hat", 0);
396 const std::string direction_name = params.Get("direction", "");
397 Uint8 direction;
398 if (direction_name == "up") {
399 direction = SDL_HAT_UP;
400 } else if (direction_name == "down") {
401 direction = SDL_HAT_DOWN;
402 } else if (direction_name == "left") {
403 direction = SDL_HAT_LEFT;
404 } else if (direction_name == "right") {
405 direction = SDL_HAT_RIGHT;
406 } else {
407 direction = 0;
408 }
409 // This is necessary so accessing GetHat with hat won't crash
410 joystick->SetHat(hat, SDL_HAT_CENTERED);
411 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
412 }
413
414 if (params.Has("axis")) {
415 const int axis = params.Get("axis", 0);
416 const float threshold = params.Get("threshold", 0.5f);
417 const std::string direction_name = params.Get("direction", "");
418 bool trigger_if_greater;
419 if (direction_name == "+") {
420 trigger_if_greater = true;
421 } else if (direction_name == "-") {
422 trigger_if_greater = false;
423 } else {
424 trigger_if_greater = true;
425 LOG_ERROR(Input, "Unknown direction '{}'", direction_name);
426 }
427 // This is necessary so accessing GetAxis with axis won't crash
428 joystick->SetAxis(axis, 0);
429 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
430 }
431
432 const int button = params.Get("button", 0);
433 // This is necessary so accessing GetButton with button won't crash
434 joystick->SetButton(button, false);
435 return std::make_unique<SDLButton>(joystick, button);
436 }
437};
438
439/// An analog device factory that creates analog devices from SDL joystick
440class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
441public:
442 /**
443 * Creates analog device from joystick axes
444 * @param params contains parameters for creating the device:
445 * - "guid": the guid of the joystick to bind
446 * - "port": the nth joystick of the same type
447 * - "axis_x": the index of the axis to be bind as x-axis
448 * - "axis_y": the index of the axis to be bind as y-axis
449 */
450 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
451 const std::string guid = params.Get("guid", "0");
452 const int port = params.Get("port", 0);
453 const int axis_x = params.Get("axis_x", 0);
454 const int axis_y = params.Get("axis_y", 1);
455
456 auto joystick = GetSDLJoystickByGUID(guid, port);
457
458 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
459 joystick->SetAxis(axis_x, 0);
460 joystick->SetAxis(axis_y, 0);
461 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y);
462 }
463};
464
465void Init() {
466 using namespace Input;
467 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>());
468 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>());
469 polling = false;
470 initialized = true;
471}
472
473void Shutdown() {
474 if (initialized) {
475 using namespace Input;
476 UnregisterFactory<ButtonDevice>("sdl");
477 UnregisterFactory<AnalogDevice>("sdl");
478 initialized = false;
479 }
480}
481
482Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event) {
483 Common::ParamPackage params({{"engine", "sdl"}});
484 switch (event.type) {
485 case SDL_JOYAXISMOTION: {
486 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
487 params.Set("port", joystick->GetPort());
488 params.Set("guid", joystick->GetGUID());
489 params.Set("axis", event.jaxis.axis);
490 if (event.jaxis.value > 0) {
491 params.Set("direction", "+");
492 params.Set("threshold", "0.5");
493 } else {
494 params.Set("direction", "-");
495 params.Set("threshold", "-0.5");
496 }
497 break;
498 }
499 case SDL_JOYBUTTONUP: {
500 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
501 params.Set("port", joystick->GetPort());
502 params.Set("guid", joystick->GetGUID());
503 params.Set("button", event.jbutton.button);
504 break;
505 }
506 case SDL_JOYHATMOTION: {
507 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
508 params.Set("port", joystick->GetPort());
509 params.Set("guid", joystick->GetGUID());
510 params.Set("hat", event.jhat.hat);
511 switch (event.jhat.value) {
512 case SDL_HAT_UP:
513 params.Set("direction", "up");
514 break;
515 case SDL_HAT_DOWN:
516 params.Set("direction", "down");
517 break;
518 case SDL_HAT_LEFT:
519 params.Set("direction", "left");
520 break;
521 case SDL_HAT_RIGHT:
522 params.Set("direction", "right");
523 break;
524 default:
525 return {};
526 }
527 break;
528 }
529 }
530 return params;
531}
532
533namespace Polling {
534
535class SDLPoller : public InputCommon::Polling::DevicePoller {
536public:
537 void Start() override {
538 event_queue.Clear();
539 polling = true;
540 }
541
542 void Stop() override {
543 polling = false;
544 }
545};
546
547class SDLButtonPoller final : public SDLPoller {
548public:
549 Common::ParamPackage GetNextInput() override {
550 SDL_Event event;
551 while (event_queue.Pop(event)) {
552 switch (event.type) {
553 case SDL_JOYAXISMOTION:
554 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
555 break;
556 }
557 case SDL_JOYBUTTONUP:
558 case SDL_JOYHATMOTION:
559 return SDLEventToButtonParamPackage(event);
560 }
561 }
562 return {};
563 }
564};
565
566class SDLAnalogPoller final : public SDLPoller {
567public:
568 void Start() override {
569 SDLPoller::Start();
570
571 // Reset stored axes
572 analog_xaxis = -1;
573 analog_yaxis = -1;
574 analog_axes_joystick = -1;
575 }
576
577 Common::ParamPackage GetNextInput() override {
578 SDL_Event event;
579 while (event_queue.Pop(event)) {
580 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
581 continue;
582 }
583 // An analog device needs two axes, so we need to store the axis for later and wait for
584 // a second SDL event. The axes also must be from the same joystick.
585 int axis = event.jaxis.axis;
586 if (analog_xaxis == -1) {
587 analog_xaxis = axis;
588 analog_axes_joystick = event.jaxis.which;
589 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
590 analog_axes_joystick == event.jaxis.which) {
591 analog_yaxis = axis;
592 }
593 }
594 Common::ParamPackage params;
595 if (analog_xaxis != -1 && analog_yaxis != -1) {
596 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
597 params.Set("engine", "sdl");
598 params.Set("port", joystick->GetPort());
599 params.Set("guid", joystick->GetGUID());
600 params.Set("axis_x", analog_xaxis);
601 params.Set("axis_y", analog_yaxis);
602 analog_xaxis = -1;
603 analog_yaxis = -1;
604 analog_axes_joystick = -1;
605 return params;
606 }
607 return params;
608 }
609
610private:
611 int analog_xaxis = -1;
612 int analog_yaxis = -1;
613 SDL_JoystickID analog_axes_joystick = -1;
614};
615
616std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
617 InputCommon::Polling::DeviceType type) {
618 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
619 switch (type) {
620 case InputCommon::Polling::DeviceType::Analog:
621 pollers.push_back(std::make_unique<SDLAnalogPoller>());
622 break;
623 case InputCommon::Polling::DeviceType::Button:
624 pollers.push_back(std::make_unique<SDLButtonPoller>());
625 break;
626 }
627 return pollers;
628} 18}
629} // namespace Polling 19} // namespace InputCommon::SDL
630} // namespace SDL
631} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl.h b/src/input_common/sdl/sdl.h
index 0206860d3..d7f24c68a 100644
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -1,4 +1,4 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -7,45 +7,38 @@
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9#include "core/frontend/input.h" 9#include "core/frontend/input.h"
10#include "input_common/main.h"
10 11
11union SDL_Event; 12union SDL_Event;
13
12namespace Common { 14namespace Common {
13class ParamPackage; 15class ParamPackage;
14} 16} // namespace Common
15namespace InputCommon { 17
16namespace Polling { 18namespace InputCommon::Polling {
17class DevicePoller; 19class DevicePoller;
18enum class DeviceType; 20enum class DeviceType;
19} // namespace Polling 21} // namespace InputCommon::Polling
20} // namespace InputCommon
21
22namespace InputCommon {
23namespace SDL {
24
25/// Initializes and registers SDL device factories
26void Init();
27
28/// Unresisters SDL device factories and shut them down.
29void Shutdown();
30 22
31/// Needs to be called before SDL_QuitSubSystem. 23namespace InputCommon::SDL {
32void CloseSDLJoysticks();
33 24
34/// Handle SDL_Events for joysticks from SDL_PollEvent 25class State {
35void HandleGameControllerEvent(const SDL_Event& event); 26public:
27 using Pollers = std::vector<std::unique_ptr<Polling::DevicePoller>>;
36 28
37/// A Loop that calls HandleGameControllerEvent until Shutdown is called 29 /// Unregisters SDL device factories and shut them down.
38void PollLoop(); 30 virtual ~State() = default;
39 31
40/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice 32 virtual Pollers GetPollers(Polling::DeviceType type) = 0;
41Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event); 33};
42 34
43namespace Polling { 35class NullState : public State {
36public:
37 Pollers GetPollers(Polling::DeviceType type) override {
38 return {};
39 }
40};
44 41
45/// Get all DevicePoller that use the SDL backend for a specific device type 42std::unique_ptr<State> Init();
46std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
47 InputCommon::Polling::DeviceType type);
48 43
49} // namespace Polling 44} // namespace InputCommon::SDL
50} // namespace SDL
51} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
new file mode 100644
index 000000000..5949ecbae
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -0,0 +1,671 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "core/frontend/input.h"
24#include "input_common/sdl/sdl_impl.h"
25
26namespace InputCommon {
27
28namespace SDL {
29
30static std::string GetGUID(SDL_Joystick* joystick) {
31 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
32 char guid_str[33];
33 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
34 return guid_str;
35}
36
37/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
38static Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event);
39
40static int SDLEventWatcher(void* userdata, SDL_Event* event) {
41 SDLState* sdl_state = reinterpret_cast<SDLState*>(userdata);
42 // Don't handle the event if we are configuring
43 if (sdl_state->polling) {
44 sdl_state->event_queue.Push(*event);
45 } else {
46 sdl_state->HandleGameControllerEvent(*event);
47 }
48 return 0;
49}
50
51class SDLJoystick {
52public:
53 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
54 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
55 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
56
57 void SetButton(int button, bool value) {
58 std::lock_guard lock{mutex};
59 state.buttons[button] = value;
60 }
61
62 bool GetButton(int button) const {
63 std::lock_guard lock{mutex};
64 return state.buttons.at(button);
65 }
66
67 void SetAxis(int axis, Sint16 value) {
68 std::lock_guard lock{mutex};
69 state.axes[axis] = value;
70 }
71
72 float GetAxis(int axis) const {
73 std::lock_guard lock{mutex};
74 return state.axes.at(axis) / 32767.0f;
75 }
76
77 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
78 float x = GetAxis(axis_x);
79 float y = GetAxis(axis_y);
80 y = -y; // 3DS uses an y-axis inverse from SDL
81
82 // Make sure the coordinates are in the unit circle,
83 // otherwise normalize it.
84 float r = x * x + y * y;
85 if (r > 1.0f) {
86 r = std::sqrt(r);
87 x /= r;
88 y /= r;
89 }
90
91 return std::make_tuple(x, y);
92 }
93
94 void SetHat(int hat, Uint8 direction) {
95 std::lock_guard lock{mutex};
96 state.hats[hat] = direction;
97 }
98
99 bool GetHatDirection(int hat, Uint8 direction) const {
100 std::lock_guard lock{mutex};
101 return (state.hats.at(hat) & direction) != 0;
102 }
103 /**
104 * The guid of the joystick
105 */
106 const std::string& GetGUID() const {
107 return guid;
108 }
109
110 /**
111 * The number of joystick from the same type that were connected before this joystick
112 */
113 int GetPort() const {
114 return port;
115 }
116
117 SDL_Joystick* GetSDLJoystick() const {
118 return sdl_joystick.get();
119 }
120
121 void SetSDLJoystick(SDL_Joystick* joystick,
122 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
123 sdl_joystick =
124 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
125 }
126
127private:
128 struct State {
129 std::unordered_map<int, bool> buttons;
130 std::unordered_map<int, Sint16> axes;
131 std::unordered_map<int, Uint8> hats;
132 } state;
133 std::string guid;
134 int port;
135 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
136 mutable std::mutex mutex;
137};
138
139/**
140 * Get the nth joystick with the corresponding GUID
141 */
142std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
143 std::lock_guard lock{joystick_map_mutex};
144 const auto it = joystick_map.find(guid);
145 if (it != joystick_map.end()) {
146 while (it->second.size() <= port) {
147 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
148 [](SDL_Joystick*) {});
149 it->second.emplace_back(std::move(joystick));
150 }
151 return it->second[port];
152 }
153 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
154 return joystick_map[guid].emplace_back(std::move(joystick));
155}
156
157/**
158 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
159 * it to a SDLJoystick with the same guid and that port
160 */
161std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
162 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
163 const std::string guid = GetGUID(sdl_joystick);
164
165 std::lock_guard lock{joystick_map_mutex};
166 auto map_it = joystick_map.find(guid);
167 if (map_it != joystick_map.end()) {
168 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
169 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
170 return sdl_joystick == joystick->GetSDLJoystick();
171 });
172 if (vec_it != map_it->second.end()) {
173 // This is the common case: There is already an existing SDL_Joystick maped to a
174 // SDLJoystick. return the SDLJoystick
175 return *vec_it;
176 }
177 // Search for a SDLJoystick without a mapped SDL_Joystick...
178 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
179 [](const std::shared_ptr<SDLJoystick>& joystick) {
180 return !joystick->GetSDLJoystick();
181 });
182 if (nullptr_it != map_it->second.end()) {
183 // ... and map it
184 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
185 return *nullptr_it;
186 }
187 // There is no SDLJoystick without a mapped SDL_Joystick
188 // Create a new SDLJoystick
189 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
190 return map_it->second.emplace_back(std::move(joystick));
191 }
192 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
193 return joystick_map[guid].emplace_back(std::move(joystick));
194}
195
196void SDLState::InitJoystick(int joystick_index) {
197 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
198 if (!sdl_joystick) {
199 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
200 return;
201 }
202 const std::string guid = GetGUID(sdl_joystick);
203
204 std::lock_guard lock{joystick_map_mutex};
205 if (joystick_map.find(guid) == joystick_map.end()) {
206 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
207 joystick_map[guid].emplace_back(std::move(joystick));
208 return;
209 }
210 auto& joystick_guid_list = joystick_map[guid];
211 const auto it = std::find_if(
212 joystick_guid_list.begin(), joystick_guid_list.end(),
213 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
214 if (it != joystick_guid_list.end()) {
215 (*it)->SetSDLJoystick(sdl_joystick);
216 return;
217 }
218 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
219 joystick_guid_list.emplace_back(std::move(joystick));
220}
221
222void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
223 std::string guid = GetGUID(sdl_joystick);
224 std::shared_ptr<SDLJoystick> joystick;
225 {
226 std::lock_guard lock{joystick_map_mutex};
227 // This call to guid is safe since the joystick is guaranteed to be in the map
228 auto& joystick_guid_list = joystick_map[guid];
229 const auto joystick_it =
230 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
231 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
232 return joystick->GetSDLJoystick() == sdl_joystick;
233 });
234 joystick = *joystick_it;
235 }
236 // Destruct SDL_Joystick outside the lock guard because SDL can internally call event calback
237 // which locks the mutex again
238 joystick->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
239}
240
241void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
242 switch (event.type) {
243 case SDL_JOYBUTTONUP: {
244 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
245 joystick->SetButton(event.jbutton.button, false);
246 }
247 break;
248 }
249 case SDL_JOYBUTTONDOWN: {
250 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
251 joystick->SetButton(event.jbutton.button, true);
252 }
253 break;
254 }
255 case SDL_JOYHATMOTION: {
256 if (auto joystick = GetSDLJoystickBySDLID(event.jhat.which)) {
257 joystick->SetHat(event.jhat.hat, event.jhat.value);
258 }
259 break;
260 }
261 case SDL_JOYAXISMOTION: {
262 if (auto joystick = GetSDLJoystickBySDLID(event.jaxis.which)) {
263 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
264 }
265 break;
266 }
267 case SDL_JOYDEVICEREMOVED:
268 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
269 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
270 break;
271 case SDL_JOYDEVICEADDED:
272 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
273 InitJoystick(event.jdevice.which);
274 break;
275 }
276}
277
278void SDLState::CloseJoysticks() {
279 std::lock_guard lock{joystick_map_mutex};
280 joystick_map.clear();
281}
282
283class SDLButton final : public Input::ButtonDevice {
284public:
285 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
286 : joystick(std::move(joystick_)), button(button_) {}
287
288 bool GetStatus() const override {
289 return joystick->GetButton(button);
290 }
291
292private:
293 std::shared_ptr<SDLJoystick> joystick;
294 int button;
295};
296
297class SDLDirectionButton final : public Input::ButtonDevice {
298public:
299 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
300 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
301
302 bool GetStatus() const override {
303 return joystick->GetHatDirection(hat, direction);
304 }
305
306private:
307 std::shared_ptr<SDLJoystick> joystick;
308 int hat;
309 Uint8 direction;
310};
311
312class SDLAxisButton final : public Input::ButtonDevice {
313public:
314 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
315 bool trigger_if_greater_)
316 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
317 trigger_if_greater(trigger_if_greater_) {}
318
319 bool GetStatus() const override {
320 float axis_value = joystick->GetAxis(axis);
321 if (trigger_if_greater)
322 return axis_value > threshold;
323 return axis_value < threshold;
324 }
325
326private:
327 std::shared_ptr<SDLJoystick> joystick;
328 int axis;
329 float threshold;
330 bool trigger_if_greater;
331};
332
333class SDLAnalog final : public Input::AnalogDevice {
334public:
335 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, float deadzone_)
336 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_) {}
337
338 std::tuple<float, float> GetStatus() const override {
339 const auto [x, y] = joystick->GetAnalog(axis_x, axis_y);
340 const float r = std::sqrt((x * x) + (y * y));
341 if (r > deadzone) {
342 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
343 y / r * (r - deadzone) / (1 - deadzone));
344 }
345 return std::make_tuple<float, float>(0.0f, 0.0f);
346 }
347
348private:
349 std::shared_ptr<SDLJoystick> joystick;
350 const int axis_x;
351 const int axis_y;
352 const float deadzone;
353};
354
355/// A button device factory that creates button devices from SDL joystick
356class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
357public:
358 explicit SDLButtonFactory(SDLState& state_) : state(state_) {}
359
360 /**
361 * Creates a button device from a joystick button
362 * @param params contains parameters for creating the device:
363 * - "guid": the guid of the joystick to bind
364 * - "port": the nth joystick of the same type to bind
365 * - "button"(optional): the index of the button to bind
366 * - "hat"(optional): the index of the hat to bind as direction buttons
367 * - "axis"(optional): the index of the axis to bind
368 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
369 * "down", "left" or "right"
370 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
371 * triggered if the axis value crosses
372 * - "direction"(only used for axis): "+" means the button is triggered when the axis
373 * value is greater than the threshold; "-" means the button is triggered when the axis
374 * value is smaller than the threshold
375 */
376 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
377 const std::string guid = params.Get("guid", "0");
378 const int port = params.Get("port", 0);
379
380 auto joystick = state.GetSDLJoystickByGUID(guid, port);
381
382 if (params.Has("hat")) {
383 const int hat = params.Get("hat", 0);
384 const std::string direction_name = params.Get("direction", "");
385 Uint8 direction;
386 if (direction_name == "up") {
387 direction = SDL_HAT_UP;
388 } else if (direction_name == "down") {
389 direction = SDL_HAT_DOWN;
390 } else if (direction_name == "left") {
391 direction = SDL_HAT_LEFT;
392 } else if (direction_name == "right") {
393 direction = SDL_HAT_RIGHT;
394 } else {
395 direction = 0;
396 }
397 // This is necessary so accessing GetHat with hat won't crash
398 joystick->SetHat(hat, SDL_HAT_CENTERED);
399 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
400 }
401
402 if (params.Has("axis")) {
403 const int axis = params.Get("axis", 0);
404 const float threshold = params.Get("threshold", 0.5f);
405 const std::string direction_name = params.Get("direction", "");
406 bool trigger_if_greater;
407 if (direction_name == "+") {
408 trigger_if_greater = true;
409 } else if (direction_name == "-") {
410 trigger_if_greater = false;
411 } else {
412 trigger_if_greater = true;
413 LOG_ERROR(Input, "Unknown direction {}", direction_name);
414 }
415 // This is necessary so accessing GetAxis with axis won't crash
416 joystick->SetAxis(axis, 0);
417 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
418 }
419
420 const int button = params.Get("button", 0);
421 // This is necessary so accessing GetButton with button won't crash
422 joystick->SetButton(button, false);
423 return std::make_unique<SDLButton>(joystick, button);
424 }
425
426private:
427 SDLState& state;
428};
429
430/// An analog device factory that creates analog devices from SDL joystick
431class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
432public:
433 explicit SDLAnalogFactory(SDLState& state_) : state(state_) {}
434 /**
435 * Creates analog device from joystick axes
436 * @param params contains parameters for creating the device:
437 * - "guid": the guid of the joystick to bind
438 * - "port": the nth joystick of the same type
439 * - "axis_x": the index of the axis to be bind as x-axis
440 * - "axis_y": the index of the axis to be bind as y-axis
441 */
442 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
443 const std::string guid = params.Get("guid", "0");
444 const int port = params.Get("port", 0);
445 const int axis_x = params.Get("axis_x", 0);
446 const int axis_y = params.Get("axis_y", 1);
447 float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f);
448
449 auto joystick = state.GetSDLJoystickByGUID(guid, port);
450
451 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
452 joystick->SetAxis(axis_x, 0);
453 joystick->SetAxis(axis_y, 0);
454 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone);
455 }
456
457private:
458 SDLState& state;
459};
460
461SDLState::SDLState() {
462 using namespace Input;
463 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>(*this));
464 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>(*this));
465
466 // If the frontend is going to manage the event loop, then we dont start one here
467 start_thread = !SDL_WasInit(SDL_INIT_JOYSTICK);
468 if (start_thread && SDL_Init(SDL_INIT_JOYSTICK) < 0) {
469 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
470 return;
471 }
472 if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
473 LOG_ERROR(Input, "Failed to set Hint for background events", SDL_GetError());
474 }
475
476 SDL_AddEventWatch(&SDLEventWatcher, this);
477
478 initialized = true;
479 if (start_thread) {
480 poll_thread = std::thread([this] {
481 using namespace std::chrono_literals;
482 while (initialized) {
483 SDL_PumpEvents();
484 std::this_thread::sleep_for(10ms);
485 }
486 });
487 }
488 // Because the events for joystick connection happens before we have our event watcher added, we
489 // can just open all the joysticks right here
490 for (int i = 0; i < SDL_NumJoysticks(); ++i) {
491 InitJoystick(i);
492 }
493}
494
495SDLState::~SDLState() {
496 using namespace Input;
497 UnregisterFactory<ButtonDevice>("sdl");
498 UnregisterFactory<AnalogDevice>("sdl");
499
500 CloseJoysticks();
501 SDL_DelEventWatch(&SDLEventWatcher, this);
502
503 initialized = false;
504 if (start_thread) {
505 poll_thread.join();
506 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
507 }
508}
509
510Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) {
511 Common::ParamPackage params({{"engine", "sdl"}});
512
513 switch (event.type) {
514 case SDL_JOYAXISMOTION: {
515 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
516 params.Set("port", joystick->GetPort());
517 params.Set("guid", joystick->GetGUID());
518 params.Set("axis", event.jaxis.axis);
519 if (event.jaxis.value > 0) {
520 params.Set("direction", "+");
521 params.Set("threshold", "0.5");
522 } else {
523 params.Set("direction", "-");
524 params.Set("threshold", "-0.5");
525 }
526 break;
527 }
528 case SDL_JOYBUTTONUP: {
529 auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which);
530 params.Set("port", joystick->GetPort());
531 params.Set("guid", joystick->GetGUID());
532 params.Set("button", event.jbutton.button);
533 break;
534 }
535 case SDL_JOYHATMOTION: {
536 auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which);
537 params.Set("port", joystick->GetPort());
538 params.Set("guid", joystick->GetGUID());
539 params.Set("hat", event.jhat.hat);
540 switch (event.jhat.value) {
541 case SDL_HAT_UP:
542 params.Set("direction", "up");
543 break;
544 case SDL_HAT_DOWN:
545 params.Set("direction", "down");
546 break;
547 case SDL_HAT_LEFT:
548 params.Set("direction", "left");
549 break;
550 case SDL_HAT_RIGHT:
551 params.Set("direction", "right");
552 break;
553 default:
554 return {};
555 }
556 break;
557 }
558 }
559 return params;
560}
561
562namespace Polling {
563
564class SDLPoller : public InputCommon::Polling::DevicePoller {
565public:
566 explicit SDLPoller(SDLState& state_) : state(state_) {}
567
568 void Start() override {
569 state.event_queue.Clear();
570 state.polling = true;
571 }
572
573 void Stop() override {
574 state.polling = false;
575 }
576
577protected:
578 SDLState& state;
579};
580
581class SDLButtonPoller final : public SDLPoller {
582public:
583 explicit SDLButtonPoller(SDLState& state_) : SDLPoller(state_) {}
584
585 Common::ParamPackage GetNextInput() override {
586 SDL_Event event;
587 while (state.event_queue.Pop(event)) {
588 switch (event.type) {
589 case SDL_JOYAXISMOTION:
590 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
591 break;
592 }
593 case SDL_JOYBUTTONUP:
594 case SDL_JOYHATMOTION:
595 return SDLEventToButtonParamPackage(state, event);
596 }
597 }
598 return {};
599 }
600};
601
602class SDLAnalogPoller final : public SDLPoller {
603public:
604 explicit SDLAnalogPoller(SDLState& state_) : SDLPoller(state_) {}
605
606 void Start() override {
607 SDLPoller::Start();
608
609 // Reset stored axes
610 analog_xaxis = -1;
611 analog_yaxis = -1;
612 analog_axes_joystick = -1;
613 }
614
615 Common::ParamPackage GetNextInput() override {
616 SDL_Event event;
617 while (state.event_queue.Pop(event)) {
618 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
619 continue;
620 }
621 // An analog device needs two axes, so we need to store the axis for later and wait for
622 // a second SDL event. The axes also must be from the same joystick.
623 int axis = event.jaxis.axis;
624 if (analog_xaxis == -1) {
625 analog_xaxis = axis;
626 analog_axes_joystick = event.jaxis.which;
627 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
628 analog_axes_joystick == event.jaxis.which) {
629 analog_yaxis = axis;
630 }
631 }
632 Common::ParamPackage params;
633 if (analog_xaxis != -1 && analog_yaxis != -1) {
634 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
635 params.Set("engine", "sdl");
636 params.Set("port", joystick->GetPort());
637 params.Set("guid", joystick->GetGUID());
638 params.Set("axis_x", analog_xaxis);
639 params.Set("axis_y", analog_yaxis);
640 analog_xaxis = -1;
641 analog_yaxis = -1;
642 analog_axes_joystick = -1;
643 return params;
644 }
645 return params;
646 }
647
648private:
649 int analog_xaxis = -1;
650 int analog_yaxis = -1;
651 SDL_JoystickID analog_axes_joystick = -1;
652};
653} // namespace Polling
654
655SDLState::Pollers SDLState::GetPollers(InputCommon::Polling::DeviceType type) {
656 Pollers pollers;
657
658 switch (type) {
659 case InputCommon::Polling::DeviceType::Analog:
660 pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
661 break;
662 case InputCommon::Polling::DeviceType::Button:
663 pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
664 break;
665 }
666
667 return pollers;
668}
669
670} // namespace SDL
671} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
new file mode 100644
index 000000000..2579741d6
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.h
@@ -0,0 +1,63 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <thread>
10#include "common/threadsafe_queue.h"
11#include "input_common/sdl/sdl.h"
12
13union SDL_Event;
14using SDL_Joystick = struct _SDL_Joystick;
15using SDL_JoystickID = s32;
16
17namespace InputCommon::SDL {
18
19class SDLJoystick;
20class SDLButtonFactory;
21class SDLAnalogFactory;
22
23class SDLState : public State {
24public:
25 /// Initializes and registers SDL device factories
26 SDLState();
27
28 /// Unregisters SDL device factories and shut them down.
29 ~SDLState() override;
30
31 /// Handle SDL_Events for joysticks from SDL_PollEvent
32 void HandleGameControllerEvent(const SDL_Event& event);
33
34 std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id);
35 std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
36
37 /// Get all DevicePoller that use the SDL backend for a specific device type
38 Pollers GetPollers(Polling::DeviceType type) override;
39
40 /// Used by the Pollers during config
41 std::atomic<bool> polling = false;
42 Common::SPSCQueue<SDL_Event> event_queue;
43
44private:
45 void InitJoystick(int joystick_index);
46 void CloseJoystick(SDL_Joystick* sdl_joystick);
47
48 /// Needs to be called before SDL_QuitSubSystem.
49 void CloseJoysticks();
50
51 /// Map of GUID of a list of corresponding virtual Joysticks
52 std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
53 std::mutex joystick_map_mutex;
54
55 std::shared_ptr<SDLButtonFactory> button_factory;
56 std::shared_ptr<SDLAnalogFactory> analog_factory;
57
58 bool start_thread = false;
59 std::atomic<bool> initialized = false;
60
61 std::thread poll_thread;
62};
63} // namespace InputCommon::SDL
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 37f09ce5f..c7038b217 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,4 +1,7 @@
1add_executable(tests 1add_executable(tests
2 common/bit_field.cpp
3 common/bit_utils.cpp
4 common/multi_level_queue.cpp
2 common/param_package.cpp 5 common/param_package.cpp
3 common/ring_buffer.cpp 6 common/ring_buffer.cpp
4 core/arm/arm_test_common.cpp 7 core/arm/arm_test_common.cpp
diff --git a/src/tests/common/bit_field.cpp b/src/tests/common/bit_field.cpp
new file mode 100644
index 000000000..8ca1889f9
--- /dev/null
+++ b/src/tests/common/bit_field.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include <type_traits>
8#include <catch2/catch.hpp>
9#include "common/bit_field.h"
10
11TEST_CASE("BitField", "[common]") {
12 enum class TestEnum : u32 {
13 A = 0b10111101,
14 B = 0b10101110,
15 C = 0b00001111,
16 };
17
18 union LEBitField {
19 u32_le raw;
20 BitField<0, 6, u32> a;
21 BitField<6, 4, s32> b;
22 BitField<10, 8, TestEnum> c;
23 BitField<18, 14, u32> d;
24 } le_bitfield;
25
26 union BEBitField {
27 u32_be raw;
28 BitFieldBE<0, 6, u32> a;
29 BitFieldBE<6, 4, s32> b;
30 BitFieldBE<10, 8, TestEnum> c;
31 BitFieldBE<18, 14, u32> d;
32 } be_bitfield;
33
34 static_assert(sizeof(LEBitField) == sizeof(u32));
35 static_assert(sizeof(BEBitField) == sizeof(u32));
36 static_assert(std::is_trivially_copyable_v<LEBitField>);
37 static_assert(std::is_trivially_copyable_v<BEBitField>);
38
39 std::array<u8, 4> raw{{
40 0b01101100,
41 0b11110110,
42 0b10111010,
43 0b11101100,
44 }};
45
46 std::memcpy(&le_bitfield, &raw, sizeof(raw));
47 std::memcpy(&be_bitfield, &raw, sizeof(raw));
48
49 // bit fields: 11101100101110'10111101'1001'101100
50 REQUIRE(le_bitfield.raw == 0b11101100'10111010'11110110'01101100);
51 REQUIRE(le_bitfield.a == 0b101100);
52 REQUIRE(le_bitfield.b == -7); // 1001 as two's complement
53 REQUIRE(le_bitfield.c == TestEnum::A);
54 REQUIRE(le_bitfield.d == 0b11101100101110);
55
56 le_bitfield.a.Assign(0b000111);
57 le_bitfield.b.Assign(-1);
58 le_bitfield.c.Assign(TestEnum::C);
59 le_bitfield.d.Assign(0b01010101010101);
60 std::memcpy(&raw, &le_bitfield, sizeof(raw));
61 // bit fields: 01010101010101'00001111'1111'000111
62 REQUIRE(le_bitfield.raw == 0b01010101'01010100'00111111'11000111);
63 REQUIRE(raw == std::array<u8, 4>{{
64 0b11000111,
65 0b00111111,
66 0b01010100,
67 0b01010101,
68 }});
69
70 // bit fields: 01101100111101'10101110'1011'101100
71 REQUIRE(be_bitfield.raw == 0b01101100'11110110'10111010'11101100);
72 REQUIRE(be_bitfield.a == 0b101100);
73 REQUIRE(be_bitfield.b == -5); // 1011 as two's complement
74 REQUIRE(be_bitfield.c == TestEnum::B);
75 REQUIRE(be_bitfield.d == 0b01101100111101);
76
77 be_bitfield.a.Assign(0b000111);
78 be_bitfield.b.Assign(-1);
79 be_bitfield.c.Assign(TestEnum::C);
80 be_bitfield.d.Assign(0b01010101010101);
81 std::memcpy(&raw, &be_bitfield, sizeof(raw));
82 // bit fields: 01010101010101'00001111'1111'000111
83 REQUIRE(be_bitfield.raw == 0b01010101'01010100'00111111'11000111);
84 REQUIRE(raw == std::array<u8, 4>{{
85 0b01010101,
86 0b01010100,
87 0b00111111,
88 0b11000111,
89 }});
90}
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp
new file mode 100644
index 000000000..479b5995a
--- /dev/null
+++ b/src/tests/common/bit_utils.cpp
@@ -0,0 +1,23 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <catch2/catch.hpp>
6#include <math.h>
7#include "common/bit_util.h"
8
9namespace Common {
10
11TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
12 REQUIRE(Common::CountTrailingZeroes32(0) == 32);
13 REQUIRE(Common::CountTrailingZeroes64(0) == 64);
14 REQUIRE(Common::CountTrailingZeroes32(9) == 0);
15 REQUIRE(Common::CountTrailingZeroes32(8) == 3);
16 REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
17 REQUIRE(Common::CountTrailingZeroes64(9) == 0);
18 REQUIRE(Common::CountTrailingZeroes64(8) == 3);
19 REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
20 REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
21}
22
23} // namespace Common
diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp
new file mode 100644
index 000000000..cca7ec7da
--- /dev/null
+++ b/src/tests/common/multi_level_queue.cpp
@@ -0,0 +1,55 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <catch2/catch.hpp>
6#include <math.h>
7#include "common/common_types.h"
8#include "common/multi_level_queue.h"
9
10namespace Common {
11
12TEST_CASE("MultiLevelQueue", "[common]") {
13 std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
14 Common::MultiLevelQueue<f32, 64> mlq;
15 REQUIRE(mlq.empty());
16 mlq.add(values[2], 2);
17 mlq.add(values[7], 7);
18 mlq.add(values[3], 3);
19 mlq.add(values[4], 4);
20 mlq.add(values[0], 0);
21 mlq.add(values[5], 5);
22 mlq.add(values[6], 6);
23 mlq.add(values[1], 1);
24 u32 index = 0;
25 bool all_set = true;
26 for (auto& f : mlq) {
27 all_set &= (f == values[index]);
28 index++;
29 }
30 REQUIRE(all_set);
31 REQUIRE(!mlq.empty());
32 f32 v = 8.0;
33 mlq.add(v, 2);
34 v = -7.0;
35 mlq.add(v, 2, false);
36 REQUIRE(mlq.front(2) == -7.0);
37 mlq.yield(2);
38 REQUIRE(mlq.front(2) == values[2]);
39 REQUIRE(mlq.back(2) == -7.0);
40 REQUIRE(mlq.empty(8));
41 v = 10.0;
42 mlq.add(v, 8);
43 mlq.adjust(v, 8, 9);
44 REQUIRE(mlq.front(9) == v);
45 REQUIRE(mlq.empty(8));
46 REQUIRE(!mlq.empty(9));
47 mlq.adjust(values[0], 0, 9);
48 REQUIRE(mlq.highest_priority_set() == 1);
49 REQUIRE(mlq.lowest_priority_set() == 9);
50 mlq.remove(values[1], 1);
51 REQUIRE(mlq.highest_priority_set() == 2);
52 REQUIRE(mlq.empty(1));
53}
54
55} // namespace Common
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..58af41f6e 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6 6
7#include "common/page_table.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
9#include "core/memory.h" 10#include "core/memory.h"
@@ -13,21 +14,20 @@
13namespace ArmTests { 14namespace ArmTests {
14 15
15TestEnvironment::TestEnvironment(bool mutable_memory_) 16TestEnvironment::TestEnvironment(bool mutable_memory_)
16 : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { 17 : mutable_memory(mutable_memory_),
17 18 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
18 auto process = Kernel::Process::Create(kernel, ""); 19 auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
19 kernel.MakeCurrentProcess(process.get()); 20 page_table = &process->VMManager().page_table;
20 page_table = &Core::CurrentProcess()->VMManager().page_table;
21 21
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 23 page_table->special_regions.clear();
24 std::fill(page_table->attributes.begin(), page_table->attributes.end(), 24 std::fill(page_table->attributes.begin(), page_table->attributes.end(),
25 Memory::PageType::Unmapped); 25 Common::PageType::Unmapped);
26 26
27 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); 27 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
28 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); 28 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
29 29
30 Memory::SetCurrentPageTable(page_table); 30 kernel.MakeCurrentProcess(process.get());
31} 31}
32 32
33TestEnvironment::~TestEnvironment() { 33TestEnvironment::~TestEnvironment() {
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
12#include "core/hle/kernel/kernel.h" 13#include "core/hle/kernel/kernel.h"
13#include "core/memory_hook.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
@@ -58,7 +58,7 @@ public:
58 58
59private: 59private:
60 friend struct TestMemory; 60 friend struct TestMemory;
61 struct TestMemory final : Memory::MemoryHook { 61 struct TestMemory final : Common::MemoryHook {
62 explicit TestMemory(TestEnvironment* env_) : env(env_) {} 62 explicit TestMemory(TestEnvironment* env_) : env(env_) {}
63 TestEnvironment* env; 63 TestEnvironment* env;
64 64
@@ -86,7 +86,7 @@ private:
86 bool mutable_memory; 86 bool mutable_memory;
87 std::shared_ptr<TestMemory> test_memory; 87 std::shared_ptr<TestMemory> test_memory;
88 std::vector<WriteRecord> write_records; 88 std::vector<WriteRecord> write_records;
89 Memory::PageTable* page_table = nullptr; 89 Common::PageTable* page_table = nullptr;
90 Kernel::KernelCore kernel; 90 Kernel::KernelCore kernel;
91}; 91};
92 92
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 2242c14cf..340d6a272 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
28 REQUIRE(lateness == cycles_late); 28 REQUIRE(lateness == cycles_late);
29} 29}
30 30
31class ScopeInit final { 31struct ScopeInit final {
32public:
33 ScopeInit() { 32 ScopeInit() {
34 CoreTiming::Init(); 33 core_timing.Initialize();
35 } 34 }
36 ~ScopeInit() { 35 ~ScopeInit() {
37 CoreTiming::Shutdown(); 36 core_timing.Shutdown();
38 } 37 }
38
39 Core::Timing::CoreTiming core_timing;
39}; 40};
40 41
41static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0, 42static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
42 int cpu_downcount = 0) { 43 int expected_lateness = 0, int cpu_downcount = 0) {
43 callbacks_ran_flags = 0; 44 callbacks_ran_flags = 0;
44 expected_callback = CB_IDS[idx]; 45 expected_callback = CB_IDS[idx];
45 lateness = expected_lateness; 46 lateness = expected_lateness;
46 47
47 CoreTiming::AddTicks(CoreTiming::GetDowncount() - 48 // Pretend we executed X cycles of instructions.
48 cpu_downcount); // Pretend we executed X cycles of instructions. 49 core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
49 CoreTiming::Advance(); 50 core_timing.Advance();
50 51
51 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); 52 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
52 REQUIRE(downcount == CoreTiming::GetDowncount()); 53 REQUIRE(downcount == core_timing.GetDowncount());
53} 54}
54 55
55TEST_CASE("CoreTiming[BasicOrder]", "[core]") { 56TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
56 ScopeInit guard; 57 ScopeInit guard;
58 auto& core_timing = guard.core_timing;
57 59
58 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 60 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
59 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 61 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
60 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 62 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
61 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); 63 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
62 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); 64 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
63 65
64 // Enter slice 0 66 // Enter slice 0
65 CoreTiming::Advance(); 67 core_timing.Advance();
66 68
67 // D -> B -> C -> A -> E 69 // D -> B -> C -> A -> E
68 CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); 70 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
69 REQUIRE(1000 == CoreTiming::GetDowncount()); 71 REQUIRE(1000 == core_timing.GetDowncount());
70 CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]); 72 core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
71 REQUIRE(500 == CoreTiming::GetDowncount()); 73 REQUIRE(500 == core_timing.GetDowncount());
72 CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]); 74 core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
73 REQUIRE(500 == CoreTiming::GetDowncount()); 75 REQUIRE(500 == core_timing.GetDowncount());
74 CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]); 76 core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
75 REQUIRE(100 == CoreTiming::GetDowncount()); 77 REQUIRE(100 == core_timing.GetDowncount());
76 CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]); 78 core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
77 REQUIRE(100 == CoreTiming::GetDowncount()); 79 REQUIRE(100 == core_timing.GetDowncount());
78 80
79 AdvanceAndCheck(3, 400); 81 AdvanceAndCheck(core_timing, 3, 400);
80 AdvanceAndCheck(1, 300); 82 AdvanceAndCheck(core_timing, 1, 300);
81 AdvanceAndCheck(2, 200); 83 AdvanceAndCheck(core_timing, 2, 200);
82 AdvanceAndCheck(0, 200); 84 AdvanceAndCheck(core_timing, 0, 200);
83 AdvanceAndCheck(4, MAX_SLICE_LENGTH); 85 AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
84} 86}
85 87
86TEST_CASE("CoreTiming[Threadsave]", "[core]") { 88TEST_CASE("CoreTiming[Threadsave]", "[core]") {
87 ScopeInit guard; 89 ScopeInit guard;
90 auto& core_timing = guard.core_timing;
88 91
89 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 92 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
90 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 93 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
91 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 94 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
92 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); 95 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
93 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); 96 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
94 97
95 // Enter slice 0 98 // Enter slice 0
96 CoreTiming::Advance(); 99 core_timing.Advance();
97 100
98 // D -> B -> C -> A -> E 101 // D -> B -> C -> A -> E
99 CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); 102 core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
100 // Manually force since ScheduleEventThreadsafe doesn't call it 103 // Manually force since ScheduleEventThreadsafe doesn't call it
101 CoreTiming::ForceExceptionCheck(1000); 104 core_timing.ForceExceptionCheck(1000);
102 REQUIRE(1000 == CoreTiming::GetDowncount()); 105 REQUIRE(1000 == core_timing.GetDowncount());
103 CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); 106 core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
104 // Manually force since ScheduleEventThreadsafe doesn't call it 107 // Manually force since ScheduleEventThreadsafe doesn't call it
105 CoreTiming::ForceExceptionCheck(500); 108 core_timing.ForceExceptionCheck(500);
106 REQUIRE(500 == CoreTiming::GetDowncount()); 109 REQUIRE(500 == core_timing.GetDowncount());
107 CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); 110 core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
108 // Manually force since ScheduleEventThreadsafe doesn't call it 111 // Manually force since ScheduleEventThreadsafe doesn't call it
109 CoreTiming::ForceExceptionCheck(800); 112 core_timing.ForceExceptionCheck(800);
110 REQUIRE(500 == CoreTiming::GetDowncount()); 113 REQUIRE(500 == core_timing.GetDowncount());
111 CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); 114 core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
112 // Manually force since ScheduleEventThreadsafe doesn't call it 115 // Manually force since ScheduleEventThreadsafe doesn't call it
113 CoreTiming::ForceExceptionCheck(100); 116 core_timing.ForceExceptionCheck(100);
114 REQUIRE(100 == CoreTiming::GetDowncount()); 117 REQUIRE(100 == core_timing.GetDowncount());
115 CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); 118 core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
116 // Manually force since ScheduleEventThreadsafe doesn't call it 119 // Manually force since ScheduleEventThreadsafe doesn't call it
117 CoreTiming::ForceExceptionCheck(1200); 120 core_timing.ForceExceptionCheck(1200);
118 REQUIRE(100 == CoreTiming::GetDowncount()); 121 REQUIRE(100 == core_timing.GetDowncount());
119 122
120 AdvanceAndCheck(3, 400); 123 AdvanceAndCheck(core_timing, 3, 400);
121 AdvanceAndCheck(1, 300); 124 AdvanceAndCheck(core_timing, 1, 300);
122 AdvanceAndCheck(2, 200); 125 AdvanceAndCheck(core_timing, 2, 200);
123 AdvanceAndCheck(0, 200); 126 AdvanceAndCheck(core_timing, 0, 200);
124 AdvanceAndCheck(4, MAX_SLICE_LENGTH); 127 AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
125} 128}
126 129
127namespace SharedSlotTest { 130namespace SharedSlotTest {
@@ -142,59 +145,63 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
142 using namespace SharedSlotTest; 145 using namespace SharedSlotTest;
143 146
144 ScopeInit guard; 147 ScopeInit guard;
148 auto& core_timing = guard.core_timing;
145 149
146 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>); 150 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
147 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>); 151 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
148 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>); 152 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
149 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>); 153 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
150 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>); 154 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);
151 155
152 CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); 156 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
153 CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); 157 core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
154 CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]); 158 core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
155 CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]); 159 core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
156 CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]); 160 core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);
157 161
158 // Enter slice 0 162 // Enter slice 0
159 CoreTiming::Advance(); 163 core_timing.Advance();
160 REQUIRE(1000 == CoreTiming::GetDowncount()); 164 REQUIRE(1000 == core_timing.GetDowncount());
161 165
162 callbacks_ran_flags = 0; 166 callbacks_ran_flags = 0;
163 counter = 0; 167 counter = 0;
164 lateness = 0; 168 lateness = 0;
165 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 169 core_timing.AddTicks(core_timing.GetDowncount());
166 CoreTiming::Advance(); 170 core_timing.Advance();
167 REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); 171 REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
168 REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); 172 REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
169} 173}
170 174
171TEST_CASE("CoreTiming[PredictableLateness]", "[core]") { 175TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
172 ScopeInit guard; 176 ScopeInit guard;
177 auto& core_timing = guard.core_timing;
173 178
174 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 179 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
175 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 180 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
176 181
177 // Enter slice 0 182 // Enter slice 0
178 CoreTiming::Advance(); 183 core_timing.Advance();
179 184
180 CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]); 185 core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
181 CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]); 186 core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
182 187
183 AdvanceAndCheck(0, 90, 10, -10); // (100 - 10) 188 AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
184 AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50); 189 AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
185} 190}
186 191
187namespace ChainSchedulingTest { 192namespace ChainSchedulingTest {
188static int reschedules = 0; 193static int reschedules = 0;
189 194
190static void RescheduleCallback(u64 userdata, s64 cycles_late) { 195static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
196 s64 cycles_late) {
191 --reschedules; 197 --reschedules;
192 REQUIRE(reschedules >= 0); 198 REQUIRE(reschedules >= 0);
193 REQUIRE(lateness == cycles_late); 199 REQUIRE(lateness == cycles_late);
194 200
195 if (reschedules > 0) 201 if (reschedules > 0) {
196 CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata), 202 core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
197 userdata); 203 userdata);
204 }
198} 205}
199} // namespace ChainSchedulingTest 206} // namespace ChainSchedulingTest
200 207
@@ -202,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
202 using namespace ChainSchedulingTest; 209 using namespace ChainSchedulingTest;
203 210
204 ScopeInit guard; 211 ScopeInit guard;
212 auto& core_timing = guard.core_timing;
205 213
206 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 214 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
207 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 215 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
208 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 216 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
209 CoreTiming::EventType* cb_rs = 217 Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
210 CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback); 218 "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
219 RescheduleCallback(core_timing, userdata, cycles_late);
220 });
211 221
212 // Enter slice 0 222 // Enter slice 0
213 CoreTiming::Advance(); 223 core_timing.Advance();
214 224
215 CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]); 225 core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
216 CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); 226 core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
217 CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]); 227 core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
218 CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); 228 core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
219 REQUIRE(800 == CoreTiming::GetDowncount()); 229 REQUIRE(800 == core_timing.GetDowncount());
220 230
221 reschedules = 3; 231 reschedules = 3;
222 AdvanceAndCheck(0, 200); // cb_a 232 AdvanceAndCheck(core_timing, 0, 200); // cb_a
223 AdvanceAndCheck(1, 1000); // cb_b, cb_rs 233 AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
224 REQUIRE(2 == reschedules); 234 REQUIRE(2 == reschedules);
225 235
226 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 236 core_timing.AddTicks(core_timing.GetDowncount());
227 CoreTiming::Advance(); // cb_rs 237 core_timing.Advance(); // cb_rs
228 REQUIRE(1 == reschedules); 238 REQUIRE(1 == reschedules);
229 REQUIRE(200 == CoreTiming::GetDowncount()); 239 REQUIRE(200 == core_timing.GetDowncount());
230 240
231 AdvanceAndCheck(2, 800); // cb_c 241 AdvanceAndCheck(core_timing, 2, 800); // cb_c
232 242
233 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 243 core_timing.AddTicks(core_timing.GetDowncount());
234 CoreTiming::Advance(); // cb_rs 244 core_timing.Advance(); // cb_rs
235 REQUIRE(0 == reschedules); 245 REQUIRE(0 == reschedules);
236 REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); 246 REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
237} 247}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 327db68a5..114bed20d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,18 +5,24 @@ add_library(video_core STATIC
5 debug_utils/debug_utils.h 5 debug_utils/debug_utils.h
6 engines/fermi_2d.cpp 6 engines/fermi_2d.cpp
7 engines/fermi_2d.h 7 engines/fermi_2d.h
8 engines/kepler_compute.cpp
9 engines/kepler_compute.h
8 engines/kepler_memory.cpp 10 engines/kepler_memory.cpp
9 engines/kepler_memory.h 11 engines/kepler_memory.h
10 engines/maxwell_3d.cpp 12 engines/maxwell_3d.cpp
11 engines/maxwell_3d.h 13 engines/maxwell_3d.h
12 engines/maxwell_compute.cpp
13 engines/maxwell_compute.h
14 engines/maxwell_dma.cpp 14 engines/maxwell_dma.cpp
15 engines/maxwell_dma.h 15 engines/maxwell_dma.h
16 engines/shader_bytecode.h 16 engines/shader_bytecode.h
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -44,6 +50,8 @@ add_library(video_core STATIC
44 renderer_opengl/gl_shader_cache.h 50 renderer_opengl/gl_shader_cache.h
45 renderer_opengl/gl_shader_decompiler.cpp 51 renderer_opengl/gl_shader_decompiler.cpp
46 renderer_opengl/gl_shader_decompiler.h 52 renderer_opengl/gl_shader_decompiler.h
53 renderer_opengl/gl_shader_disk_cache.cpp
54 renderer_opengl/gl_shader_disk_cache.h
47 renderer_opengl/gl_shader_gen.cpp 55 renderer_opengl/gl_shader_gen.cpp
48 renderer_opengl/gl_shader_gen.h 56 renderer_opengl/gl_shader_gen.h
49 renderer_opengl/gl_shader_manager.cpp 57 renderer_opengl/gl_shader_manager.cpp
@@ -59,18 +67,83 @@ add_library(video_core STATIC
59 renderer_opengl/renderer_opengl.h 67 renderer_opengl/renderer_opengl.h
60 renderer_opengl/utils.cpp 68 renderer_opengl/utils.cpp
61 renderer_opengl/utils.h 69 renderer_opengl/utils.h
70 shader/decode/arithmetic.cpp
71 shader/decode/arithmetic_immediate.cpp
72 shader/decode/bfe.cpp
73 shader/decode/bfi.cpp
74 shader/decode/shift.cpp
75 shader/decode/arithmetic_integer.cpp
76 shader/decode/arithmetic_integer_immediate.cpp
77 shader/decode/arithmetic_half.cpp
78 shader/decode/arithmetic_half_immediate.cpp
79 shader/decode/ffma.cpp
80 shader/decode/hfma2.cpp
81 shader/decode/conversion.cpp
82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
84 shader/decode/float_set_predicate.cpp
85 shader/decode/integer_set_predicate.cpp
86 shader/decode/half_set_predicate.cpp
87 shader/decode/predicate_set_register.cpp
88 shader/decode/predicate_set_predicate.cpp
89 shader/decode/register_set_predicate.cpp
90 shader/decode/float_set.cpp
91 shader/decode/integer_set.cpp
92 shader/decode/half_set.cpp
93 shader/decode/video.cpp
94 shader/decode/xmad.cpp
95 shader/decode/other.cpp
96 shader/decode.cpp
97 shader/shader_ir.cpp
98 shader/shader_ir.h
99 shader/track.cpp
62 surface.cpp 100 surface.cpp
63 surface.h 101 surface.h
64 textures/astc.cpp 102 textures/astc.cpp
65 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
66 textures/decoders.cpp 106 textures/decoders.cpp
67 textures/decoders.h 107 textures/decoders.h
68 textures/texture.h 108 textures/texture.h
109 texture_cache.cpp
110 texture_cache.h
69 video_core.cpp 111 video_core.cpp
70 video_core.h 112 video_core.h
71) 113)
72 114
115if (ENABLE_VULKAN)
116 target_sources(video_core PRIVATE
117 renderer_vulkan/declarations.h
118 renderer_vulkan/maxwell_to_vk.cpp
119 renderer_vulkan/maxwell_to_vk.h
120 renderer_vulkan/vk_buffer_cache.cpp
121 renderer_vulkan/vk_buffer_cache.h
122 renderer_vulkan/vk_device.cpp
123 renderer_vulkan/vk_device.h
124 renderer_vulkan/vk_memory_manager.cpp
125 renderer_vulkan/vk_memory_manager.h
126 renderer_vulkan/vk_resource_manager.cpp
127 renderer_vulkan/vk_resource_manager.h
128 renderer_vulkan/vk_sampler_cache.cpp
129 renderer_vulkan/vk_sampler_cache.h
130 renderer_vulkan/vk_scheduler.cpp
131 renderer_vulkan/vk_scheduler.h
132 renderer_vulkan/vk_shader_decompiler.cpp
133 renderer_vulkan/vk_shader_decompiler.h
134 renderer_vulkan/vk_stream_buffer.cpp
135 renderer_vulkan/vk_stream_buffer.h
136 renderer_vulkan/vk_swapchain.cpp
137 renderer_vulkan/vk_swapchain.h)
138
139 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
140 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
141endif()
142
73create_target_directory_groups(video_core) 143create_target_directory_groups(video_core)
74 144
75target_link_libraries(video_core PUBLIC common core) 145target_link_libraries(video_core PUBLIC common core)
76target_link_libraries(video_core PRIVATE glad) 146target_link_libraries(video_core PRIVATE glad)
147if (ENABLE_VULKAN)
148 target_link_libraries(video_core PRIVATE sirit)
149endif()
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5ffb492ea..f0ef67535 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -10,7 +10,7 @@ namespace Tegra {
10 10
11void DebugContext::DoOnEvent(Event event, void* data) { 11void DebugContext::DoOnEvent(Event event, void* data) {
12 { 12 {
13 std::unique_lock<std::mutex> lock(breakpoint_mutex); 13 std::unique_lock lock{breakpoint_mutex};
14 14
15 // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will 15 // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
16 // show on debug widgets 16 // show on debug widgets
@@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
32 32
33void DebugContext::Resume() { 33void DebugContext::Resume() {
34 { 34 {
35 std::lock_guard<std::mutex> lock(breakpoint_mutex); 35 std::lock_guard lock{breakpoint_mutex};
36 36
37 // Tell all observers that we are about to resume 37 // Tell all observers that we are about to resume
38 for (auto& breakpoint_observer : breakpoint_observers) { 38 for (auto& breakpoint_observer : breakpoint_observers) {
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index c235faf46..ac3a2eb01 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -40,7 +40,7 @@ public:
40 /// Constructs the object such that it observes events of the given DebugContext. 40 /// Constructs the object such that it observes events of the given DebugContext.
41 explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context) 41 explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
42 : context_weak(debug_context) { 42 : context_weak(debug_context) {
43 std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); 43 std::unique_lock lock{debug_context->breakpoint_mutex};
44 debug_context->breakpoint_observers.push_back(this); 44 debug_context->breakpoint_observers.push_back(this);
45 } 45 }
46 46
@@ -48,7 +48,7 @@ public:
48 auto context = context_weak.lock(); 48 auto context = context_weak.lock();
49 if (context) { 49 if (context) {
50 { 50 {
51 std::unique_lock<std::mutex> lock(context->breakpoint_mutex); 51 std::unique_lock lock{context->breakpoint_mutex};
52 context->breakpoint_observers.remove(this); 52 context->breakpoint_observers.remove(this);
53 } 53 }
54 54
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 63a958f11..046d047cb 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -8,6 +8,7 @@
8#include "video_core/dma_pusher.h" 8#include "video_core/dma_pusher.h"
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
11 12
12namespace Tegra { 13namespace Tegra {
13 14
@@ -33,16 +34,33 @@ void DmaPusher::DispatchCalls() {
33} 34}
34 35
35bool DmaPusher::Step() { 36bool DmaPusher::Step() {
36 if (dma_get != dma_put) { 37 if (!ib_enable || dma_pushbuffer.empty()) {
37 // Push buffer non-empty, read a word 38 // pushbuffer empty and IB empty or nonexistent - nothing to do
38 const CommandHeader command_header{ 39 return false;
39 Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; 40 }
40 41
41 dma_get += sizeof(u32); 42 const CommandList& command_list{dma_pushbuffer.front()};
43 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
44 GPUVAddr dma_get = command_list_header.addr;
45 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
46 bool non_main = command_list_header.is_non_main;
42 47
43 if (!non_main) { 48 if (dma_pushbuffer_subindex >= command_list.size()) {
44 dma_mget = dma_get; 49 // We've gone through the current list, remove it from the queue
45 } 50 dma_pushbuffer.pop();
51 dma_pushbuffer_subindex = 0;
52 }
53
54 if (command_list_header.size == 0) {
55 return true;
56 }
57
58 // Push buffer non-empty, read a word
59 command_headers.resize(command_list_header.size);
60 gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
61 command_list_header.size * sizeof(u32));
62
63 for (const CommandHeader& command_header : command_headers) {
46 64
47 // now, see if we're in the middle of a command 65 // now, see if we're in the middle of a command
48 if (dma_state.length_pending) { 66 if (dma_state.length_pending) {
@@ -89,22 +107,11 @@ bool DmaPusher::Step() {
89 break; 107 break;
90 } 108 }
91 } 109 }
92 } else if (ib_enable && !dma_pushbuffer.empty()) { 110 }
93 // Current pushbuffer empty, but we have more IB entries to read 111
94 const CommandList& command_list{dma_pushbuffer.front()}; 112 if (!non_main) {
95 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 113 // TODO (degasus): This is dead code, as dma_mget is never read.
96 dma_get = command_list_header.addr; 114 dma_mget = dma_put;
97 dma_put = dma_get + command_list_header.size * sizeof(u32);
98 non_main = command_list_header.is_non_main;
99
100 if (dma_pushbuffer_subindex >= command_list.size()) {
101 // We've gone through the current list, remove it from the queue
102 dma_pushbuffer.pop();
103 dma_pushbuffer_subindex = 0;
104 }
105 } else {
106 // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
107 return {};
108 } 115 }
109 116
110 return true; 117 return true;
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 16e0697c4..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
9 9
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/memory_manager.h"
13 12
14namespace Tegra { 13namespace Tegra {
15 14
@@ -75,6 +74,8 @@ private:
75 74
76 GPU& gpu; 75 GPU& gpu;
77 76
77 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
78
78 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 79 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
79 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer 80 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
80 81
@@ -83,17 +84,14 @@ private:
83 u32 subchannel; ///< Current subchannel 84 u32 subchannel; ///< Current subchannel
84 u32 method_count; ///< Current method count 85 u32 method_count; ///< Current method count
85 u32 length_pending; ///< Large NI command length pending 86 u32 length_pending; ///< Large NI command length pending
86 bool non_incrementing; ///< Current commands NI flag 87 bool non_incrementing; ///< Current command's NI flag
87 }; 88 };
88 89
89 DmaState dma_state{}; 90 DmaState dma_state{};
90 bool dma_increment_once{}; 91 bool dma_increment_once{};
91 92
92 GPUVAddr dma_put{}; ///< pushbuffer current end address
93 GPUVAddr dma_get{}; ///< pushbuffer current read address
94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 93 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
95 bool ib_enable{true}; ///< IB mode enabled 94 bool ib_enable{true}; ///< IB mode enabled
96 bool non_main{}; ///< non-main pushbuffer active
97}; 95};
98 96
99} // namespace Tegra 97} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 80f70e332..55966eef1 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,17 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "common/assert.h"
6#include "core/memory.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
7#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/memory_manager.h"
9#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
10#include "video_core/textures/decoders.h"
11 11
12namespace Tegra::Engines { 12namespace Tegra::Engines {
13 13
14Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 14Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 15 : rasterizer{rasterizer}, memory_manager{memory_manager} {}
16 16
17void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { 17void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 18 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
21 regs.reg_array[method_call.method] = method_call.argument; 21 regs.reg_array[method_call.method] = method_call.argument;
22 22
23 switch (method_call.method) { 23 switch (method_call.method) {
24 case FERMI2D_REG_INDEX(trigger): { 24 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
25 // so trigger on the second 32-bit write.
26 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
25 HandleSurfaceCopy(); 27 HandleSurfaceCopy();
26 break; 28 break;
27 } 29 }
@@ -32,55 +34,23 @@ void Fermi2D::HandleSurfaceCopy() {
32 LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", 34 LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
33 static_cast<u32>(regs.operation)); 35 static_cast<u32>(regs.operation));
34 36
35 const GPUVAddr source = regs.src.Address();
36 const GPUVAddr dest = regs.dst.Address();
37
38 // TODO(Subv): Only same-format and same-size copies are allowed for now.
39 ASSERT(regs.src.format == regs.dst.format);
40 ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
41
42 // TODO(Subv): Only raw copies are implemented. 37 // TODO(Subv): Only raw copies are implemented.
43 ASSERT(regs.operation == Regs::Operation::SrcCopy); 38 ASSERT(regs.operation == Regs::Operation::SrcCopy);
44 39
45 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); 40 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
46 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); 41 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
47 42 const u32 src_blit_x2{
48 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); 43 static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 44 const u32 src_blit_y2{
50 45 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
52 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54 46
55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 47 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
56 // We have to invalidate the destination region to evict any outdated surfaces from the 48 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
57 // cache. We do this before actually writing the new data because the destination address 49 regs.blit_dst_x + regs.blit_dst_width,
58 // might contain a dirty surface that will have to be written back to memory. 50 regs.blit_dst_y + regs.blit_dst_height};
59 rasterizer.InvalidateRegion(dest_cpu,
60 dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
61 51
62 if (regs.src.linear == regs.dst.linear) { 52 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
63 // If the input layout and the output layout are the same, just perform a raw copy. 53 UNIMPLEMENTED();
64 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
65 Memory::CopyBlock(dest_cpu, source_cpu,
66 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
67 return;
68 }
69 u8* src_buffer = Memory::GetPointer(source_cpu);
70 u8* dst_buffer = Memory::GetPointer(dest_cpu);
71 if (!regs.src.linear && regs.dst.linear) {
72 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
73 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
74 src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
75 dst_buffer, true, regs.src.BlockHeight(),
76 regs.src.BlockDepth(), 0);
77 } else {
78 // If the input is linear and the output is tiled, swizzle the input and copy it over.
79 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
80 src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
81 src_buffer, false, regs.dst.BlockHeight(),
82 regs.dst.BlockDepth(), 0);
83 }
84 } 54 }
85} 55}
86 56
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 50009bf75..2e51b7f13 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,12 +5,15 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13
14namespace Tegra {
15class MemoryManager;
16}
14 17
15namespace VideoCore { 18namespace VideoCore {
16class RasterizerInterface; 19class RasterizerInterface;
@@ -94,21 +97,30 @@ public:
94 97
95 Operation operation; 98 Operation operation;
96 99
97 INSERT_PADDING_WORDS(0x9); 100 INSERT_PADDING_WORDS(0x177);
101
102 u32 blit_control;
103
104 INSERT_PADDING_WORDS(0x8);
98 105
99 // TODO(Subv): This is only a guess. 106 u32 blit_dst_x;
100 u32 trigger; 107 u32 blit_dst_y;
108 u32 blit_dst_width;
109 u32 blit_dst_height;
110 u64 blit_du_dx;
111 u64 blit_dv_dy;
112 u64 blit_src_x;
113 u64 blit_src_y;
101 114
102 INSERT_PADDING_WORDS(0x1A3); 115 INSERT_PADDING_WORDS(0x21);
103 }; 116 };
104 std::array<u32, NUM_REGS> reg_array; 117 std::array<u32, NUM_REGS> reg_array;
105 }; 118 };
106 } regs{}; 119 } regs{};
107 120
108 MemoryManager& memory_manager;
109
110private: 121private:
111 VideoCore::RasterizerInterface& rasterizer; 122 VideoCore::RasterizerInterface& rasterizer;
123 MemoryManager& memory_manager;
112 124
113 /// Performs the copy from the source surface to the destination surface as configured in the 125 /// Performs the copy from the source surface to the destination surface as configured in the
114 /// registers. 126 /// registers.
@@ -122,7 +134,16 @@ private:
122ASSERT_REG_POSITION(dst, 0x80); 134ASSERT_REG_POSITION(dst, 0x80);
123ASSERT_REG_POSITION(src, 0x8C); 135ASSERT_REG_POSITION(src, 0x8C);
124ASSERT_REG_POSITION(operation, 0xAB); 136ASSERT_REG_POSITION(operation, 0xAB);
125ASSERT_REG_POSITION(trigger, 0xB5); 137ASSERT_REG_POSITION(blit_control, 0x223);
138ASSERT_REG_POSITION(blit_dst_x, 0x22c);
139ASSERT_REG_POSITION(blit_dst_y, 0x22d);
140ASSERT_REG_POSITION(blit_dst_width, 0x22e);
141ASSERT_REG_POSITION(blit_dst_height, 0x22f);
142ASSERT_REG_POSITION(blit_du_dx, 0x230);
143ASSERT_REG_POSITION(blit_dv_dy, 0x232);
144ASSERT_REG_POSITION(blit_src_x, 0x234);
145ASSERT_REG_POSITION(blit_src_y, 0x236);
146
126#undef ASSERT_REG_POSITION 147#undef ASSERT_REG_POSITION
127 148
128} // namespace Tegra::Engines 149} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
new file mode 100644
index 000000000..b1d950460
--- /dev/null
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,33 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/engines/kepler_compute.h"
8#include "video_core/memory_manager.h"
9
10namespace Tegra::Engines {
11
12KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
13
14KeplerCompute::~KeplerCompute() = default;
15
16void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
17 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
18 "Invalid KeplerCompute register, increase the size of the Regs structure");
19
20 regs.reg_array[method_call.method] = method_call.argument;
21
22 switch (method_call.method) {
23 case KEPLER_COMPUTE_REG_INDEX(launch):
24 // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
25 // kernels)
26 UNREACHABLE_MSG("Compute shaders are not implemented");
27 break;
28 default:
29 break;
30 }
31}
32
33} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h
index 1d71f11bd..fb6cdf432 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,52 +5,56 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h"
10#include "common/common_funcs.h" 9#include "common/common_funcs.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
13 12
13namespace Tegra {
14class MemoryManager;
15}
16
14namespace Tegra::Engines { 17namespace Tegra::Engines {
15 18
16#define MAXWELL_COMPUTE_REG_INDEX(field_name) \ 19#define KEPLER_COMPUTE_REG_INDEX(field_name) \
17 (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32)) 20 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
18 21
19class MaxwellCompute final { 22class KeplerCompute final {
20public: 23public:
21 MaxwellCompute() = default; 24 explicit KeplerCompute(MemoryManager& memory_manager);
22 ~MaxwellCompute() = default; 25 ~KeplerCompute();
26
27 static constexpr std::size_t NumConstBuffers = 8;
23 28
24 struct Regs { 29 struct Regs {
25 static constexpr std::size_t NUM_REGS = 0xCF8; 30 static constexpr std::size_t NUM_REGS = 0xCF8;
26 31
27 union { 32 union {
28 struct { 33 struct {
29 INSERT_PADDING_WORDS(0x281); 34 INSERT_PADDING_WORDS(0xAF);
30 35
31 union { 36 u32 launch;
32 u32 compute_end;
33 BitField<0, 1, u32> unknown;
34 } compute;
35 37
36 INSERT_PADDING_WORDS(0xA76); 38 INSERT_PADDING_WORDS(0xC48);
37 }; 39 };
38 std::array<u32, NUM_REGS> reg_array; 40 std::array<u32, NUM_REGS> reg_array;
39 }; 41 };
40 } regs{}; 42 } regs{};
41
42 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), 43 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
43 "MaxwellCompute Regs has wrong size"); 44 "KeplerCompute Regs has wrong size");
44 45
45 /// Write the value to the register identified by method. 46 /// Write the value to the register identified by method.
46 void CallMethod(const GPU::MethodCall& method_call); 47 void CallMethod(const GPU::MethodCall& method_call);
48
49private:
50 MemoryManager& memory_manager;
47}; 51};
48 52
49#define ASSERT_REG_POSITION(field_name, position) \ 53#define ASSERT_REG_POSITION(field_name, position) \
50 static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \ 54 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
51 "Field " #field_name " has invalid position") 55 "Field " #field_name " has invalid position")
52 56
53ASSERT_REG_POSITION(compute, 0x281); 57ASSERT_REG_POSITION(launch, 0xAF);
54 58
55#undef ASSERT_REG_POSITION 59#undef ASSERT_REG_POSITION
56 60
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4880191fc..cd51a31d7 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,18 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h"
8#include "video_core/engines/kepler_memory.h" 8#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
11 13
12namespace Tegra::Engines { 14namespace Tegra::Engines {
13 15
14KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer, 16KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 MemoryManager& memory_manager) 17 MemoryManager& memory_manager)
16 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
17 19
18KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
19 21
@@ -39,17 +41,14 @@ void KeplerMemory::ProcessData(u32 data) {
39 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
40 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
41 43
42 GPUVAddr address = regs.dest.Address();
43 VAddr dest_address =
44 *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
45
46 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 44 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
47 // We do this before actually writing the new data because the destination address might contain 45 // We do this before actually writing the new data because the destination address might
48 // a dirty surface that will have to be written back to memory. 46 // contain a dirty surface that will have to be written back to memory.
49 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 47 const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
48 rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
49 memory_manager.Write<u32>(address, data);
50 50
51 Memory::Write32(dest_address, data); 51 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
53 52
54 state.write_offset++; 53 state.write_offset++;
55} 54}
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index fe9ebc5b9..78b6c3e45 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,12 +5,19 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13
14namespace Core {
15class System;
16}
17
18namespace Tegra {
19class MemoryManager;
20}
14 21
15namespace VideoCore { 22namespace VideoCore {
16class RasterizerInterface; 23class RasterizerInterface;
@@ -23,7 +30,8 @@ namespace Tegra::Engines {
23 30
24class KeplerMemory final { 31class KeplerMemory final {
25public: 32public:
26 KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 33 KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
34 MemoryManager& memory_manager);
27 ~KeplerMemory(); 35 ~KeplerMemory();
28 36
29 /// Write the value to the register identified by method. 37 /// Write the value to the register identified by method.
@@ -76,8 +84,9 @@ public:
76 } state{}; 84 } state{};
77 85
78private: 86private:
79 MemoryManager& memory_manager; 87 Core::System& system;
80 VideoCore::RasterizerInterface& rasterizer; 88 VideoCore::RasterizerInterface& rasterizer;
89 MemoryManager& memory_manager;
81 90
82 void ProcessData(u32 data); 91 void ProcessData(u32 data);
83}; 92};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index d64a5080b..74403eed4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,11 +7,10 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "core/memory.h"
11#include "video_core/debug_utils/debug_utils.h" 10#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_base.h"
15#include "video_core/textures/texture.h" 14#include "video_core/textures/texture.h"
16 15
17namespace Tegra::Engines { 16namespace Tegra::Engines {
@@ -19,8 +18,10 @@ namespace Tegra::Engines {
19/// First register id that is actually a Macro call. 18/// First register id that is actually a Macro call.
20constexpr u32 MacroRegistersStart = 0xE00; 19constexpr u32 MacroRegistersStart = 0xE00;
21 20
22Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 21Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
23 : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) { 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
24 *this} {
24 InitializeRegisterDefaults(); 25 InitializeRegisterDefaults();
25} 26}
26 27
@@ -37,6 +38,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
37 regs.viewports[viewport].depth_range_near = 0.0f; 38 regs.viewports[viewport].depth_range_near = 0.0f;
38 regs.viewports[viewport].depth_range_far = 1.0f; 39 regs.viewports[viewport].depth_range_far = 1.0f;
39 } 40 }
41
40 // Doom and Bomberman seems to use the uninitialized registers and just enable blend 42 // Doom and Bomberman seems to use the uninitialized registers and just enable blend
41 // so initialize blend registers with sane values 43 // so initialize blend registers with sane values
42 regs.blend.equation_rgb = Regs::Blend::Equation::Add; 44 regs.blend.equation_rgb = Regs::Blend::Equation::Add;
@@ -66,6 +68,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
66 regs.stencil_back_func_func = Regs::ComparisonOp::Always; 68 regs.stencil_back_func_func = Regs::ComparisonOp::Always;
67 regs.stencil_back_func_mask = 0xFFFFFFFF; 69 regs.stencil_back_func_mask = 0xFFFFFFFF;
68 regs.stencil_back_mask = 0xFFFFFFFF; 70 regs.stencil_back_mask = 0xFFFFFFFF;
71
69 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
70 // register carrying a default value. Assume it's OpenGL's default (1). 73 // register carrying a default value. Assume it's OpenGL's default (1).
71 regs.point_size = 1.0f; 74 regs.point_size = 1.0f;
@@ -78,6 +81,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
78 regs.color_mask[color_mask].B.Assign(1); 81 regs.color_mask[color_mask].B.Assign(1);
79 regs.color_mask[color_mask].A.Assign(1); 82 regs.color_mask[color_mask].A.Assign(1);
80 } 83 }
84
85 // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
86 regs.rt_separate_frag_data = 1;
81} 87}
82 88
83void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
@@ -98,23 +104,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
98} 104}
99 105
100void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 106void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
101 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 107 auto debug_context = system.GetGPUDebugContext();
108
109 const u32 method = method_call.method;
102 110
103 // It is an error to write to a register other than the current macro's ARG register before it 111 // It is an error to write to a register other than the current macro's ARG register before it
104 // has finished execution. 112 // has finished execution.
105 if (executing_macro != 0) { 113 if (executing_macro != 0) {
106 ASSERT(method_call.method == executing_macro + 1); 114 ASSERT(method == executing_macro + 1);
107 } 115 }
108 116
109 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 117 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
110 // uploaded to the GPU during initialization. 118 // uploaded to the GPU during initialization.
111 if (method_call.method >= MacroRegistersStart) { 119 if (method >= MacroRegistersStart) {
112 // We're trying to execute a macro 120 // We're trying to execute a macro
113 if (executing_macro == 0) { 121 if (executing_macro == 0) {
114 // A macro call must begin by writing the macro method's register, not its argument. 122 // A macro call must begin by writing the macro method's register, not its argument.
115 ASSERT_MSG((method_call.method % 2) == 0, 123 ASSERT_MSG((method % 2) == 0,
116 "Can't start macro execution by writing to the ARGS register"); 124 "Can't start macro execution by writing to the ARGS register");
117 executing_macro = method_call.method; 125 executing_macro = method;
118 } 126 }
119 127
120 macro_params.push_back(method_call.argument); 128 macro_params.push_back(method_call.argument);
@@ -126,66 +134,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
126 return; 134 return;
127 } 135 }
128 136
129 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 137 ASSERT_MSG(method < Regs::NUM_REGS,
130 "Invalid Maxwell3D register, increase the size of the Regs structure"); 138 "Invalid Maxwell3D register, increase the size of the Regs structure");
131 139
132 if (debug_context) { 140 if (debug_context) {
133 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 141 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
134 } 142 }
135 143
136 if (regs.reg_array[method_call.method] != method_call.argument) { 144 if (regs.reg_array[method] != method_call.argument) {
137 regs.reg_array[method_call.method] = method_call.argument; 145 regs.reg_array[method] = method_call.argument;
138 // Color buffers 146 // Color buffers
139 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
140 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
141 if (method_call.method >= first_rt_reg && 149 if (method >= first_rt_reg &&
142 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
143 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
144 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 152 dirty_flags.color_buffer.set(rt_index);
145 } 153 }
146 154
147 // Zeta buffer 155 // Zeta buffer
148 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
149 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
150 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 158 method == MAXWELL3D_REG_INDEX(zeta_width) ||
151 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
152 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
153 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
154 dirty_flags.zeta_buffer = true; 162 dirty_flags.zeta_buffer = true;
155 } 163 }
156 164
157 // Shader 165 // Shader
158 constexpr u32 shader_registers_count = 166 constexpr u32 shader_registers_count =
159 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
160 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
161 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
162 dirty_flags.shaders = true; 170 dirty_flags.shaders = true;
163 } 171 }
164 172
165 // Vertex format 173 // Vertex format
166 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
167 method_call.method < 175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
168 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
169 dirty_flags.vertex_attrib_format = true; 176 dirty_flags.vertex_attrib_format = true;
170 } 177 }
171 178
172 // Vertex buffer 179 // Vertex buffer
173 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
174 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
175 dirty_flags.vertex_array |= 182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
176 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
177 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
178 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
179 dirty_flags.vertex_array |= 186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
180 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
181 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
182 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
183 dirty_flags.vertex_array |=
184 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
185 } 189 }
186 } 190 }
187 191
188 switch (method_call.method) { 192 switch (method) {
189 case MAXWELL3D_REG_INDEX(macros.data): { 193 case MAXWELL3D_REG_INDEX(macros.data): {
190 ProcessMacroUpload(method_call.argument); 194 ProcessMacroUpload(method_call.argument);
191 break; 195 break;
@@ -245,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
245 ProcessQueryGet(); 249 ProcessQueryGet();
246 break; 250 break;
247 } 251 }
252 case MAXWELL3D_REG_INDEX(sync_info): {
253 ProcessSyncPoint();
254 break;
255 }
248 default: 256 default:
249 break; 257 break;
250 } 258 }
@@ -265,10 +273,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
265} 273}
266 274
267void Maxwell3D::ProcessQueryGet() { 275void Maxwell3D::ProcessQueryGet() {
268 GPUVAddr sequence_address = regs.query.QueryAddress(); 276 const GPUVAddr sequence_address{regs.query.QueryAddress()};
269 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 277 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
270 // VAddr before writing. 278 // VAddr before writing.
271 std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
272 279
273 // TODO(Subv): Support the other query units. 280 // TODO(Subv): Support the other query units.
274 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 281 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -303,7 +310,7 @@ void Maxwell3D::ProcessQueryGet() {
303 // Write the current query sequence to the sequence address. 310 // Write the current query sequence to the sequence address.
304 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 311 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
305 // query. 312 // query.
306 Memory::Write32(*address, sequence); 313 memory_manager.Write<u32>(sequence_address, sequence);
307 } else { 314 } else {
308 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast 315 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
309 // GPU, this command may actually take a while to complete in real hardware due to GPU 316 // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -311,8 +318,8 @@ void Maxwell3D::ProcessQueryGet() {
311 LongQueryResult query_result{}; 318 LongQueryResult query_result{};
312 query_result.value = result; 319 query_result.value = result;
313 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming 320 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
314 query_result.timestamp = CoreTiming::GetTicks(); 321 query_result.timestamp = system.CoreTiming().GetTicks();
315 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 322 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
316 } 323 }
317 dirty_flags.OnMemoryWrite(); 324 dirty_flags.OnMemoryWrite();
318 break; 325 break;
@@ -323,12 +330,20 @@ void Maxwell3D::ProcessQueryGet() {
323 } 330 }
324} 331}
325 332
333void Maxwell3D::ProcessSyncPoint() {
334 const u32 sync_point = regs.sync_info.sync_point.Value();
335 const u32 increment = regs.sync_info.increment.Value();
336 const u32 cache_flush = regs.sync_info.unknown.Value();
337 LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
338 cache_flush);
339}
340
326void Maxwell3D::DrawArrays() { 341void Maxwell3D::DrawArrays() {
327 LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), 342 LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
328 regs.vertex_buffer.count); 343 regs.vertex_buffer.count);
329 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 344 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
330 345
331 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 346 auto debug_context = system.GetGPUDebugContext();
332 347
333 if (debug_context) { 348 if (debug_context) {
334 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); 349 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
@@ -381,16 +396,18 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
381 396
382void Maxwell3D::ProcessCBData(u32 value) { 397void Maxwell3D::ProcessCBData(u32 value) {
383 // Write the input value to the current const buffer at the current position. 398 // Write the input value to the current const buffer at the current position.
384 GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 399 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
385 ASSERT(buffer_address != 0); 400 ASSERT(buffer_address != 0);
386 401
387 // Don't allow writing past the end of the buffer. 402 // Don't allow writing past the end of the buffer.
388 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 403 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
389 404
390 std::optional<VAddr> address = 405 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
391 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 406
407 u8* ptr{memory_manager.GetPointer(address)};
408 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
409 memory_manager.Write<u32>(address, value);
392 410
393 Memory::Write32(*address, value);
394 dirty_flags.OnMemoryWrite(); 411 dirty_flags.OnMemoryWrite();
395 412
396 // Increment the current buffer position. 413 // Increment the current buffer position.
@@ -398,22 +415,19 @@ void Maxwell3D::ProcessCBData(u32 value) {
398} 415}
399 416
400Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 417Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
401 GPUVAddr tic_base_address = regs.tic.TICAddress(); 418 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
402
403 GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
404 std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
405 419
406 Texture::TICEntry tic_entry; 420 Texture::TICEntry tic_entry;
407 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 421 memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
408 422
409 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 423 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
410 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 424 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
411 "TIC versions other than BlockLinear or Pitch are unimplemented"); 425 "TIC versions other than BlockLinear or Pitch are unimplemented");
412 426
413 auto r_type = tic_entry.r_type.Value(); 427 const auto r_type = tic_entry.r_type.Value();
414 auto g_type = tic_entry.g_type.Value(); 428 const auto g_type = tic_entry.g_type.Value();
415 auto b_type = tic_entry.b_type.Value(); 429 const auto b_type = tic_entry.b_type.Value();
416 auto a_type = tic_entry.a_type.Value(); 430 const auto a_type = tic_entry.a_type.Value();
417 431
418 // TODO(Subv): Different data types for separate components are not supported 432 // TODO(Subv): Different data types for separate components are not supported
419 ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); 433 ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -422,13 +436,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
422} 436}
423 437
424Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 438Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
425 GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 439 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
426
427 GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
428 std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
429 440
430 Texture::TSCEntry tsc_entry; 441 Texture::TSCEntry tsc_entry;
431 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 442 memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
432 return tsc_entry; 443 return tsc_entry;
433} 444}
434 445
@@ -447,8 +458,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
447 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 458 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
448 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 459 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
449 460
450 Texture::TextureHandle tex_handle{ 461 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
451 Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
452 462
453 Texture::FullTextureInfo tex_info{}; 463 Texture::FullTextureInfo tex_info{};
454 // TODO(Subv): Use the shader to determine which textures are actually accessed. 464 // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -457,23 +467,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
457 sizeof(Texture::TextureHandle); 467 sizeof(Texture::TextureHandle);
458 468
459 // Load the TIC data. 469 // Load the TIC data.
460 if (tex_handle.tic_id != 0) { 470 auto tic_entry = GetTICEntry(tex_handle.tic_id);
461 tex_info.enabled = true; 471 // TODO(Subv): Workaround for BitField's move constructor being deleted.
462 472 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
463 auto tic_entry = GetTICEntry(tex_handle.tic_id);
464 // TODO(Subv): Workaround for BitField's move constructor being deleted.
465 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
466 }
467 473
468 // Load the TSC data 474 // Load the TSC data
469 if (tex_handle.tsc_id != 0) { 475 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
470 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); 476 // TODO(Subv): Workaround for BitField's move constructor being deleted.
471 // TODO(Subv): Workaround for BitField's move constructor being deleted. 477 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
472 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
473 }
474 478
475 if (tex_info.enabled) 479 textures.push_back(tex_info);
476 textures.push_back(tex_info);
477 } 480 }
478 481
479 return textures; 482 return textures;
@@ -485,31 +488,25 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
485 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; 488 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
486 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); 489 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
487 490
488 GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); 491 const GPUVAddr tex_info_address =
492 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
489 493
490 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); 494 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
491 495
492 std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); 496 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
493 Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
494 497
495 Texture::FullTextureInfo tex_info{}; 498 Texture::FullTextureInfo tex_info{};
496 tex_info.index = static_cast<u32>(offset); 499 tex_info.index = static_cast<u32>(offset);
497 500
498 // Load the TIC data. 501 // Load the TIC data.
499 if (tex_handle.tic_id != 0) { 502 auto tic_entry = GetTICEntry(tex_handle.tic_id);
500 tex_info.enabled = true; 503 // TODO(Subv): Workaround for BitField's move constructor being deleted.
501 504 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
502 auto tic_entry = GetTICEntry(tex_handle.tic_id);
503 // TODO(Subv): Workaround for BitField's move constructor being deleted.
504 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
505 }
506 505
507 // Load the TSC data 506 // Load the TSC data
508 if (tex_handle.tsc_id != 0) { 507 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
509 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); 508 // TODO(Subv): Workaround for BitField's move constructor being deleted.
510 // TODO(Subv): Workaround for BitField's move constructor being deleted. 509 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
511 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
512 }
513 510
514 return tex_info; 511 return tex_info;
515} 512}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1f76aa670..321af3297 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -14,9 +16,16 @@
14#include "common/math_util.h" 16#include "common/math_util.h"
15#include "video_core/gpu.h" 17#include "video_core/gpu.h"
16#include "video_core/macro_interpreter.h" 18#include "video_core/macro_interpreter.h"
17#include "video_core/memory_manager.h"
18#include "video_core/textures/texture.h" 19#include "video_core/textures/texture.h"
19 20
21namespace Core {
22class System;
23}
24
25namespace Tegra {
26class MemoryManager;
27}
28
20namespace VideoCore { 29namespace VideoCore {
21class RasterizerInterface; 30class RasterizerInterface;
22} 31}
@@ -28,7 +37,8 @@ namespace Tegra::Engines {
28 37
29class Maxwell3D final { 38class Maxwell3D final {
30public: 39public:
31 explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 40 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
41 MemoryManager& memory_manager);
32 ~Maxwell3D() = default; 42 ~Maxwell3D() = default;
33 43
34 /// Register structure of the Maxwell3D engine. 44 /// Register structure of the Maxwell3D engine.
@@ -498,7 +508,7 @@ public:
498 f32 translate_z; 508 f32 translate_z;
499 INSERT_PADDING_WORDS(2); 509 INSERT_PADDING_WORDS(2);
500 510
501 MathUtil::Rectangle<s32> GetRect() const { 511 Common::Rectangle<s32> GetRect() const {
502 return { 512 return {
503 GetX(), // left 513 GetX(), // left
504 GetY() + GetHeight(), // top 514 GetY() + GetHeight(), // top
@@ -569,7 +579,17 @@ public:
569 u32 bind; 579 u32 bind;
570 } macros; 580 } macros;
571 581
572 INSERT_PADDING_WORDS(0x188); 582 INSERT_PADDING_WORDS(0x69);
583
584 struct {
585 union {
586 BitField<0, 16, u32> sync_point;
587 BitField<16, 1, u32> unknown;
588 BitField<20, 1, u32> increment;
589 };
590 } sync_info;
591
592 INSERT_PADDING_WORDS(0x11E);
573 593
574 u32 tfb_enabled; 594 u32 tfb_enabled;
575 595
@@ -1086,22 +1106,20 @@ public:
1086 }; 1106 };
1087 1107
1088 State state{}; 1108 State state{};
1089 MemoryManager& memory_manager;
1090 1109
1091 struct DirtyFlags { 1110 struct DirtyFlags {
1092 u8 color_buffer = 0xFF; 1111 std::bitset<8> color_buffer{0xFF};
1093 bool zeta_buffer = true; 1112 std::bitset<32> vertex_array{0xFFFFFFFF};
1094
1095 bool shaders = true;
1096 1113
1097 bool vertex_attrib_format = true; 1114 bool vertex_attrib_format = true;
1098 u32 vertex_array = 0xFFFFFFFF; 1115 bool zeta_buffer = true;
1116 bool shaders = true;
1099 1117
1100 void OnMemoryWrite() { 1118 void OnMemoryWrite() {
1101 color_buffer = 0xFF;
1102 zeta_buffer = true; 1119 zeta_buffer = true;
1103 shaders = true; 1120 shaders = true;
1104 vertex_array = 0xFFFFFFFF; 1121 color_buffer.set();
1122 vertex_array.set();
1105 } 1123 }
1106 }; 1124 };
1107 1125
@@ -1131,8 +1149,12 @@ public:
1131private: 1149private:
1132 void InitializeRegisterDefaults(); 1150 void InitializeRegisterDefaults();
1133 1151
1152 Core::System& system;
1153
1134 VideoCore::RasterizerInterface& rasterizer; 1154 VideoCore::RasterizerInterface& rasterizer;
1135 1155
1156 MemoryManager& memory_manager;
1157
1136 /// Start offsets of each macro in macro_memory 1158 /// Start offsets of each macro in macro_memory
1137 std::unordered_map<u32, u32> macro_offsets; 1159 std::unordered_map<u32, u32> macro_offsets;
1138 1160
@@ -1172,6 +1194,9 @@ private:
1172 /// Handles a write to the QUERY_GET register. 1194 /// Handles a write to the QUERY_GET register.
1173 void ProcessQueryGet(); 1195 void ProcessQueryGet();
1174 1196
1197 /// Handles writes to syncing register.
1198 void ProcessSyncPoint();
1199
1175 /// Handles a write to the CB_DATA[i] register. 1200 /// Handles a write to the CB_DATA[i] register.
1176 void ProcessCBData(u32 value); 1201 void ProcessCBData(u32 value);
1177 1202
@@ -1187,6 +1212,7 @@ private:
1187 "Field " #field_name " has invalid position") 1212 "Field " #field_name " has invalid position")
1188 1213
1189ASSERT_REG_POSITION(macros, 0x45); 1214ASSERT_REG_POSITION(macros, 0x45);
1215ASSERT_REG_POSITION(sync_info, 0xB2);
1190ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1216ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1191ASSERT_REG_POSITION(rt, 0x200); 1217ASSERT_REG_POSITION(rt, 0x200);
1192ASSERT_REG_POSITION(viewport_transform, 0x280); 1218ASSERT_REG_POSITION(viewport_transform, 0x280);
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
deleted file mode 100644
index 656db6a61..000000000
--- a/src/video_core/engines/maxwell_compute.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/core.h"
7#include "video_core/engines/maxwell_compute.h"
8
9namespace Tegra::Engines {
10
11void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
12 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
13 "Invalid MaxwellCompute register, increase the size of the Regs structure");
14
15 regs.reg_array[method_call.method] = method_call.argument;
16
17 switch (method_call.method) {
18 case MAXWELL_COMPUTE_REG_INDEX(compute): {
19 LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
20 UNREACHABLE();
21 break;
22 }
23 default:
24 break;
25 }
26}
27
28} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 06462f570..2426d0067 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,17 +2,21 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
6#include "common/logging/log.h"
5#include "core/core.h" 7#include "core/core.h"
6#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/engines/maxwell_dma.h" 9#include "video_core/engines/maxwell_dma.h"
10#include "video_core/memory_manager.h"
9#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
10#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
11 14
12namespace Tegra::Engines { 15namespace Tegra::Engines {
13 16
14MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 17MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 MemoryManager& memory_manager)
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
16 20
17void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 21void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 22 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -39,9 +43,6 @@ void MaxwellDMA::HandleCopy() {
39 const GPUVAddr source = regs.src_address.Address(); 43 const GPUVAddr source = regs.src_address.Address();
40 const GPUVAddr dest = regs.dst_address.Address(); 44 const GPUVAddr dest = regs.dst_address.Address();
41 45
42 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
43 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
44
45 // TODO(Subv): Perform more research and implement all features of this engine. 46 // TODO(Subv): Perform more research and implement all features of this engine.
46 ASSERT(regs.exec.enable_swizzle == 0); 47 ASSERT(regs.exec.enable_swizzle == 0);
47 ASSERT(regs.exec.query_mode == Regs::QueryMode::None); 48 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -57,14 +58,14 @@ void MaxwellDMA::HandleCopy() {
57 } 58 }
58 59
59 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
61 62
62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 65 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
65 // y_count). 66 // y_count).
66 if (!regs.exec.enable_2d) { 67 if (!regs.exec.enable_2d) {
67 Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count); 68 memory_manager.CopyBlock(dest, source, regs.x_count);
68 return; 69 return;
69 } 70 }
70 71
@@ -73,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
73 // rectangle. There is no need to manually flush/invalidate the regions because 74 // rectangle. There is no need to manually flush/invalidate the regions because
74 // CopyBlock does that for us. 75 // CopyBlock does that for us.
75 for (u32 line = 0; line < regs.y_count; ++line) { 76 for (u32 line = 0; line < regs.y_count; ++line) {
76 const VAddr source_line = source_cpu + line * regs.src_pitch; 77 const GPUVAddr source_line = source + line * regs.src_pitch;
77 const VAddr dest_line = dest_cpu + line * regs.dst_pitch; 78 const GPUVAddr dest_line = dest + line * regs.dst_pitch;
78 Memory::CopyBlock(dest_line, source_line, regs.x_count); 79 memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
79 } 80 }
80 return; 81 return;
81 } 82 }
@@ -84,15 +85,28 @@ void MaxwellDMA::HandleCopy() {
84 85
85 const std::size_t copy_size = regs.x_count * regs.y_count; 86 const std::size_t copy_size = regs.x_count * regs.y_count;
86 87
88 auto source_ptr{memory_manager.GetPointer(source)};
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90
91 if (!source_ptr) {
92 LOG_ERROR(HW_GPU, "source_ptr is invalid");
93 return;
94 }
95
96 if (!dst_ptr) {
97 LOG_ERROR(HW_GPU, "dst_ptr is invalid");
98 return;
99 }
100
87 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 101 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
88 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 102 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
89 // copying. 103 // copying.
90 rasterizer.FlushRegion(source_cpu, src_size); 104 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
91 105
92 // We have to invalidate the destination region to evict any outdated surfaces from the 106 // We have to invalidate the destination region to evict any outdated surfaces from the
93 // cache. We do this before actually writing the new data because the destination address 107 // cache. We do this before actually writing the new data because the destination address
94 // might contain a dirty surface that will have to be written back to memory. 108 // might contain a dirty surface that will have to be written back to memory.
95 rasterizer.InvalidateRegion(dest_cpu, dst_size); 109 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
96 }; 110 };
97 111
98 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 112 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,7 +119,7 @@ void MaxwellDMA::HandleCopy() {
105 copy_size * src_bytes_per_pixel); 119 copy_size * src_bytes_per_pixel);
106 120
107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 121 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
108 regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu, 122 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
109 regs.src_params.BlockHeight(), regs.src_params.pos_x, 123 regs.src_params.BlockHeight(), regs.src_params.pos_x,
110 regs.src_params.pos_y); 124 regs.src_params.pos_y);
111 } else { 125 } else {
@@ -119,7 +133,7 @@ void MaxwellDMA::HandleCopy() {
119 133
120 // If the input is linear and the output is tiled, swizzle the input and copy it over. 134 // If the input is linear and the output is tiled, swizzle the input and copy it over.
121 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 135 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
122 src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight()); 136 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
123 } 137 }
124} 138}
125 139
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 1f8cd65d2..c6b649842 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,12 +5,19 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13
14namespace Core {
15class System;
16}
17
18namespace Tegra {
19class MemoryManager;
20}
14 21
15namespace VideoCore { 22namespace VideoCore {
16class RasterizerInterface; 23class RasterizerInterface;
@@ -20,7 +27,8 @@ namespace Tegra::Engines {
20 27
21class MaxwellDMA final { 28class MaxwellDMA final {
22public: 29public:
23 explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 30 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
31 MemoryManager& memory_manager);
24 ~MaxwellDMA() = default; 32 ~MaxwellDMA() = default;
25 33
26 /// Write the value to the register identified by method. 34 /// Write the value to the register identified by method.
@@ -134,11 +142,13 @@ public:
134 }; 142 };
135 } regs{}; 143 } regs{};
136 144
137 MemoryManager& memory_manager;
138
139private: 145private:
146 Core::System& system;
147
140 VideoCore::RasterizerInterface& rasterizer; 148 VideoCore::RasterizerInterface& rasterizer;
141 149
150 MemoryManager& memory_manager;
151
142 /// Performs the copy from the source buffer to the destination buffer as configured in the 152 /// Performs the copy from the source buffer to the destination buffer as configured in the
143 /// registers. 153 /// registers.
144 void HandleCopy(); 154 void HandleCopy();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e53c77f2b..2e1e96c81 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
6 6
7#include <bitset> 7#include <bitset>
8#include <optional> 8#include <optional>
9#include <string>
10#include <tuple> 9#include <tuple>
11#include <vector> 10#include <vector>
12 11
@@ -186,7 +185,7 @@ enum class SubOp : u64 {
186}; 185};
187 186
188enum class F2iRoundingOp : u64 { 187enum class F2iRoundingOp : u64 {
189 None = 0, 188 RoundEven = 0,
190 Floor = 1, 189 Floor = 1,
191 Ceil = 2, 190 Ceil = 2,
192 Trunc = 3, 191 Trunc = 3,
@@ -208,6 +207,8 @@ enum class UniformType : u64 {
208 SignedShort = 3, 207 SignedShort = 3,
209 Single = 4, 208 Single = 4,
210 Double = 5, 209 Double = 5,
210 Quad = 6,
211 UnsignedQuad = 7,
211}; 212};
212 213
213enum class StoreType : u64 { 214enum class StoreType : u64 {
@@ -215,9 +216,9 @@ enum class StoreType : u64 {
215 Signed8 = 1, 216 Signed8 = 1,
216 Unsigned16 = 2, 217 Unsigned16 = 2,
217 Signed16 = 3, 218 Signed16 = 3,
218 Bytes32 = 4, 219 Bits32 = 4,
219 Bytes64 = 5, 220 Bits64 = 5,
220 Bytes128 = 6, 221 Bits128 = 6,
221}; 222};
222 223
223enum class IMinMaxExchange : u64 { 224enum class IMinMaxExchange : u64 {
@@ -323,11 +324,11 @@ enum class TextureQueryType : u64 {
323 324
324enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
325 None = 0, 326 None = 0,
326 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
327 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
328 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
329 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
330 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
331}; 332};
332 333
333enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -374,9 +375,9 @@ enum class R2pMode : u64 {
374}; 375};
375 376
376enum class IpaInterpMode : u64 { 377enum class IpaInterpMode : u64 {
377 Linear = 0, 378 Pass = 0,
378 Perspective = 1, 379 Multiply = 1,
379 Flat = 2, 380 Constant = 2,
380 Sc = 3, 381 Sc = 3,
381}; 382};
382 383
@@ -397,6 +398,10 @@ struct IpaMode {
397 bool operator!=(const IpaMode& a) const { 398 bool operator!=(const IpaMode& a) const {
398 return !operator==(a); 399 return !operator==(a);
399 } 400 }
401 bool operator<(const IpaMode& a) const {
402 return std::tie(interpolation_mode, sampling_mode) <
403 std::tie(a.interpolation_mode, a.sampling_mode);
404 }
400}; 405};
401 406
402enum class SystemVariable : u64 { 407enum class SystemVariable : u64 {
@@ -644,6 +649,7 @@ union Instruction {
644 BitField<37, 2, HalfPrecision> precision; 649 BitField<37, 2, HalfPrecision> precision;
645 BitField<32, 1, u64> saturate; 650 BitField<32, 1, u64> saturate;
646 651
652 BitField<31, 1, u64> negate_b;
647 BitField<30, 1, u64> negate_c; 653 BitField<30, 1, u64> negate_c;
648 BitField<35, 2, HalfType> type_c; 654 BitField<35, 2, HalfType> type_c;
649 } rr; 655 } rr;
@@ -780,6 +786,12 @@ union Instruction {
780 } st_l; 786 } st_l;
781 787
782 union { 788 union {
789 BitField<48, 3, UniformType> type;
790 BitField<46, 2, u64> cache_mode;
791 BitField<20, 24, s64> immediate_offset;
792 } ldg;
793
794 union {
783 BitField<0, 3, u64> pred0; 795 BitField<0, 3, u64> pred0;
784 BitField<3, 3, u64> pred3; 796 BitField<3, 3, u64> pred3;
785 BitField<7, 1, u64> abs_a; 797 BitField<7, 1, u64> abs_a;
@@ -968,6 +980,10 @@ union Instruction {
968 } 980 }
969 return false; 981 return false;
970 } 982 }
983
984 bool IsComponentEnabled(std::size_t component) const {
985 return ((1ULL << component) & component_mask) != 0;
986 }
971 } txq; 987 } txq;
972 988
973 union { 989 union {
@@ -1222,24 +1238,35 @@ union Instruction {
1222 1238
1223 union { 1239 union {
1224 BitField<20, 16, u64> imm20_16; 1240 BitField<20, 16, u64> imm20_16;
1241 BitField<35, 1, u64> high_b_rr; // used on RR
1225 BitField<36, 1, u64> product_shift_left; 1242 BitField<36, 1, u64> product_shift_left;
1226 BitField<37, 1, u64> merge_37; 1243 BitField<37, 1, u64> merge_37;
1227 BitField<48, 1, u64> sign_a; 1244 BitField<48, 1, u64> sign_a;
1228 BitField<49, 1, u64> sign_b; 1245 BitField<49, 1, u64> sign_b;
1246 BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
1229 BitField<50, 3, XmadMode> mode; 1247 BitField<50, 3, XmadMode> mode;
1230 BitField<52, 1, u64> high_b; 1248 BitField<52, 1, u64> high_b;
1231 BitField<53, 1, u64> high_a; 1249 BitField<53, 1, u64> high_a;
1250 BitField<55, 1, u64> product_shift_left_second; // used on CR
1232 BitField<56, 1, u64> merge_56; 1251 BitField<56, 1, u64> merge_56;
1233 } xmad; 1252 } xmad;
1234 1253
1235 union { 1254 union {
1236 BitField<20, 14, u64> offset; 1255 BitField<20, 14, u64> offset;
1237 BitField<34, 5, u64> index; 1256 BitField<34, 5, u64> index;
1257
1258 u64 GetOffset() const {
1259 return offset * 4;
1260 }
1238 } cbuf34; 1261 } cbuf34;
1239 1262
1240 union { 1263 union {
1241 BitField<20, 16, s64> offset; 1264 BitField<20, 16, s64> offset;
1242 BitField<36, 5, u64> index; 1265 BitField<36, 5, u64> index;
1266
1267 s64 GetOffset() const {
1268 return offset;
1269 }
1243 } cbuf36; 1270 } cbuf36;
1244 1271
1245 // Unsure about the size of this one. 1272 // Unsure about the size of this one.
@@ -1421,6 +1448,7 @@ public:
1421 Flow, 1448 Flow,
1422 Synch, 1449 Synch,
1423 Memory, 1450 Memory,
1451 Texture,
1424 FloatSet, 1452 FloatSet,
1425 FloatSetPredicate, 1453 FloatSetPredicate,
1426 IntegerSet, 1454 IntegerSet,
@@ -1431,6 +1459,7 @@ public:
1431 PredicateSetRegister, 1459 PredicateSetRegister,
1432 RegisterSetPredicate, 1460 RegisterSetPredicate,
1433 Conversion, 1461 Conversion,
1462 Video,
1434 Xmad, 1463 Xmad,
1435 Unknown, 1464 Unknown,
1436 }; 1465 };
@@ -1550,20 +1579,20 @@ private:
1550 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1579 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1551 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1580 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1552 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1581 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1553 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1582 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1554 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1583 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1555 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1584 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1556 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1585 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1557 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1586 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1558 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1587 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1559 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1588 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1560 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1589 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1561 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1590 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1562 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1591 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1563 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1592 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1564 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 1593 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
1565 INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), 1594 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
1566 INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"), 1595 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
1567 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 1596 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
1568 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 1597 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
1569 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 1598 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -1636,7 +1665,7 @@ private:
1636 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), 1665 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
1637 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), 1666 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
1638 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), 1667 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
1639 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), 1668 INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
1640 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), 1669 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
1641 INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), 1670 INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
1642 INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), 1671 INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index 99c34649f..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 16 TriangleStrip = 7,
17}; 17};
18 18
19enum class AttributeUse : u8 {
20 Unused = 0,
21 Constant = 1,
22 Perspective = 2,
23 ScreenLinear = 3,
24};
25
19// Documentation in: 26// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header { 28struct Header {
@@ -84,9 +91,15 @@ struct Header {
84 } vtg; 91 } vtg;
85 92
86 struct { 93 struct {
87 INSERT_PADDING_BYTES(3); // ImapSystemValuesA 94 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
88 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 95 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
89 INSERT_PADDING_BYTES(32); // ImapGenericVector[32] 96 union {
97 BitField<0, 2, AttributeUse> x;
98 BitField<2, 2, AttributeUse> y;
99 BitField<4, 2, AttributeUse> w;
100 BitField<6, 2, AttributeUse> z;
101 u8 raw;
102 } imap_generic_vector[32];
90 INSERT_PADDING_BYTES(2); // ImapColor 103 INSERT_PADDING_BYTES(2); // ImapColor
91 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 104 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
92 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] 105 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,10 +116,32 @@ struct Header {
103 const u32 bit = render_target * 4 + component; 116 const u32 bit = render_target * 4 + component;
104 return omap.target & (1 << bit); 117 return omap.target & (1 << bit);
105 } 118 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
120 return static_cast<AttributeUse>(
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
122 }
123 AttributeUse GetAttributeUse(u32 attribute) const {
124 AttributeUse result = AttributeUse::Unused;
125 for (u32 i = 0; i < 4; i++) {
126 const auto index = GetAttributeIndexUse(attribute, i);
127 if (index == AttributeUse::Unused) {
128 continue;
129 }
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue;
133 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
135 if (index == AttributeUse::Perspective) {
136 result = index;
137 }
138 }
139 return result;
140 }
106 } ps; 141 } ps;
107 }; 142 };
108 143
109 u64 GetLocalMemorySize() { 144 u64 GetLocalMemorySize() const {
110 return (common1.shader_local_memory_low_size | 145 return (common1.shader_local_memory_low_size |
111 (common2.shader_local_memory_high_size << 24)); 146 (common2.shader_local_memory_high_size << 24));
112 } 147 }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08cf6268f..4461083ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,19 +3,24 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/core.h"
7#include "core/core_timing.h"
8#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 9#include "video_core/engines/fermi_2d.h"
10#include "video_core/engines/kepler_compute.h"
7#include "video_core/engines/kepler_memory.h" 11#include "video_core/engines/kepler_memory.h"
8#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
9#include "video_core/engines/maxwell_compute.h"
10#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
11#include "video_core/gpu.h" 14#include "video_core/gpu.h"
12#include "video_core/rasterizer_interface.h" 15#include "video_core/memory_manager.h"
16#include "video_core/renderer_base.h"
13 17
14namespace Tegra { 18namespace Tegra {
15 19
16u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { 20u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
17 switch (format) { 21 switch (format) {
18 case PixelFormat::ABGR8: 22 case PixelFormat::ABGR8:
23 case PixelFormat::BGRA8:
19 return 4; 24 return 4;
20 default: 25 default:
21 return 4; 26 return 4;
@@ -24,14 +29,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
24 UNREACHABLE(); 29 UNREACHABLE();
25} 30}
26 31
27GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
28 memory_manager = std::make_unique<Tegra::MemoryManager>(); 33 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
29 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
30 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
31 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
32 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 38 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
33 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); 39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
34 kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); 40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
35} 41}
36 42
37GPU::~GPU() = default; 43GPU::~GPU() = default;
@@ -124,9 +130,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
124 } 130 }
125} 131}
126 132
133// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
134// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
135// So the values you see in docs might be multiplied by 4.
127enum class BufferMethods { 136enum class BufferMethods {
128 BindObject = 0, 137 BindObject = 0x0,
129 CountBufferMethods = 0x40, 138 Nop = 0x2,
139 SemaphoreAddressHigh = 0x4,
140 SemaphoreAddressLow = 0x5,
141 SemaphoreSequence = 0x6,
142 SemaphoreTrigger = 0x7,
143 NotifyIntr = 0x8,
144 WrcacheFlush = 0x9,
145 Unk28 = 0xA,
146 Unk2c = 0xB,
147 RefCnt = 0x14,
148 SemaphoreAcquire = 0x1A,
149 SemaphoreRelease = 0x1B,
150 Unk70 = 0x1C,
151 Unk74 = 0x1D,
152 Unk78 = 0x1E,
153 Unk7c = 0x1F,
154 Yield = 0x20,
155 NonPullerMethods = 0x40,
156};
157
158enum class GpuSemaphoreOperation {
159 AcquireEqual = 0x1,
160 WriteLong = 0x2,
161 AcquireGequal = 0x4,
162 AcquireMask = 0x8,
130}; 163};
131 164
132void GPU::CallMethod(const MethodCall& method_call) { 165void GPU::CallMethod(const MethodCall& method_call) {
@@ -135,20 +168,78 @@ void GPU::CallMethod(const MethodCall& method_call) {
135 168
136 ASSERT(method_call.subchannel < bound_engines.size()); 169 ASSERT(method_call.subchannel < bound_engines.size());
137 170
138 if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { 171 if (ExecuteMethodOnEngine(method_call)) {
139 // Bind the current subchannel to the desired engine id. 172 CallEngineMethod(method_call);
140 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, 173 } else {
141 method_call.argument); 174 CallPullerMethod(method_call);
142 bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
143 return;
144 } 175 }
176}
177
178bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
179 const auto method = static_cast<BufferMethods>(method_call.method);
180 return method >= BufferMethods::NonPullerMethods;
181}
145 182
146 if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) { 183void GPU::CallPullerMethod(const MethodCall& method_call) {
147 // TODO(Subv): Research and implement these methods. 184 regs.reg_array[method_call.method] = method_call.argument;
148 LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); 185 const auto method = static_cast<BufferMethods>(method_call.method);
149 return; 186
187 switch (method) {
188 case BufferMethods::BindObject: {
189 ProcessBindMethod(method_call);
190 break;
191 }
192 case BufferMethods::Nop:
193 case BufferMethods::SemaphoreAddressHigh:
194 case BufferMethods::SemaphoreAddressLow:
195 case BufferMethods::SemaphoreSequence:
196 case BufferMethods::RefCnt:
197 break;
198 case BufferMethods::SemaphoreTrigger: {
199 ProcessSemaphoreTriggerMethod();
200 break;
201 }
202 case BufferMethods::NotifyIntr: {
203 // TODO(Kmather73): Research and implement this method.
204 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
205 break;
206 }
207 case BufferMethods::WrcacheFlush: {
208 // TODO(Kmather73): Research and implement this method.
209 LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
210 break;
211 }
212 case BufferMethods::Unk28: {
213 // TODO(Kmather73): Research and implement this method.
214 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
215 break;
216 }
217 case BufferMethods::Unk2c: {
218 // TODO(Kmather73): Research and implement this method.
219 LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
220 break;
221 }
222 case BufferMethods::SemaphoreAcquire: {
223 ProcessSemaphoreAcquire();
224 break;
150 } 225 }
226 case BufferMethods::SemaphoreRelease: {
227 ProcessSemaphoreRelease();
228 break;
229 }
230 case BufferMethods::Yield: {
231 // TODO(Kmather73): Research and implement this method.
232 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
233 break;
234 }
235 default:
236 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
237 static_cast<u32>(method));
238 break;
239 }
240}
151 241
242void GPU::CallEngineMethod(const MethodCall& method_call) {
152 const EngineID engine = bound_engines[method_call.subchannel]; 243 const EngineID engine = bound_engines[method_call.subchannel];
153 244
154 switch (engine) { 245 switch (engine) {
@@ -158,8 +249,8 @@ void GPU::CallMethod(const MethodCall& method_call) {
158 case EngineID::MAXWELL_B: 249 case EngineID::MAXWELL_B:
159 maxwell_3d->CallMethod(method_call); 250 maxwell_3d->CallMethod(method_call);
160 break; 251 break;
161 case EngineID::MAXWELL_COMPUTE_B: 252 case EngineID::KEPLER_COMPUTE_B:
162 maxwell_compute->CallMethod(method_call); 253 kepler_compute->CallMethod(method_call);
163 break; 254 break;
164 case EngineID::MAXWELL_DMA_COPY_A: 255 case EngineID::MAXWELL_DMA_COPY_A:
165 maxwell_dma->CallMethod(method_call); 256 maxwell_dma->CallMethod(method_call);
@@ -172,4 +263,72 @@ void GPU::CallMethod(const MethodCall& method_call) {
172 } 263 }
173} 264}
174 265
266void GPU::ProcessBindMethod(const MethodCall& method_call) {
267 // Bind the current subchannel to the desired engine id.
268 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
269 method_call.argument);
270 bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
271}
272
273void GPU::ProcessSemaphoreTriggerMethod() {
274 const auto semaphoreOperationMask = 0xF;
275 const auto op =
276 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
277 if (op == GpuSemaphoreOperation::WriteLong) {
278 struct Block {
279 u32 sequence;
280 u32 zeros = 0;
281 u64 timestamp;
282 };
283
284 Block block{};
285 block.sequence = regs.semaphore_sequence;
286 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
287 // CoreTiming
288 block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
289 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
290 sizeof(block));
291 } else {
292 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
293 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
294 (op == GpuSemaphoreOperation::AcquireGequal &&
295 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
296 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
297 // Nothing to do in this case
298 } else {
299 regs.acquire_source = true;
300 regs.acquire_value = regs.semaphore_sequence;
301 if (op == GpuSemaphoreOperation::AcquireEqual) {
302 regs.acquire_active = true;
303 regs.acquire_mode = false;
304 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
305 regs.acquire_active = true;
306 regs.acquire_mode = true;
307 } else if (op == GpuSemaphoreOperation::AcquireMask) {
308 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
309 // semaphore_sequence, gives a non-0 result
310 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
311 } else {
312 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
313 }
314 }
315 }
316}
317
318void GPU::ProcessSemaphoreRelease() {
319 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
320}
321
322void GPU::ProcessSemaphoreAcquire() {
323 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
324 const auto value = regs.semaphore_acquire;
325 if (word != value) {
326 regs.acquire_active = true;
327 regs.acquire_value = value;
328 // TODO(kemathe73) figure out how to do the acquire_timeout
329 regs.acquire_mode = false;
330 regs.acquire_source = false;
331 }
332}
333
175} // namespace Tegra 334} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index af5ccd1e9..de30ea354 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,16 +6,23 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <vector>
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "core/hle/service/nvflinger/buffer_queue.h" 10#include "core/hle/service/nvflinger/buffer_queue.h"
12#include "video_core/dma_pusher.h" 11#include "video_core/dma_pusher.h"
13#include "video_core/memory_manager.h"
14 12
15namespace VideoCore { 13using CacheAddr = std::uintptr_t;
16class RasterizerInterface; 14inline CacheAddr ToCacheAddr(const void* host_ptr) {
15 return reinterpret_cast<CacheAddr>(host_ptr);
16}
17
18namespace Core {
19class System;
17} 20}
18 21
22namespace VideoCore {
23class RendererBase;
24} // namespace VideoCore
25
19namespace Tegra { 26namespace Tegra {
20 27
21enum class RenderTargetFormat : u32 { 28enum class RenderTargetFormat : u32 {
@@ -80,6 +87,7 @@ class DebugContext;
80struct FramebufferConfig { 87struct FramebufferConfig {
81 enum class PixelFormat : u32 { 88 enum class PixelFormat : u32 {
82 ABGR8 = 1, 89 ABGR8 = 1,
90 BGRA8 = 5,
83 }; 91 };
84 92
85 /** 93 /**
@@ -96,29 +104,32 @@ struct FramebufferConfig {
96 104
97 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; 105 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
98 TransformFlags transform_flags; 106 TransformFlags transform_flags;
99 MathUtil::Rectangle<int> crop_rect; 107 Common::Rectangle<int> crop_rect;
100}; 108};
101 109
102namespace Engines { 110namespace Engines {
103class Fermi2D; 111class Fermi2D;
104class Maxwell3D; 112class Maxwell3D;
105class MaxwellCompute;
106class MaxwellDMA; 113class MaxwellDMA;
114class KeplerCompute;
107class KeplerMemory; 115class KeplerMemory;
108} // namespace Engines 116} // namespace Engines
109 117
110enum class EngineID { 118enum class EngineID {
111 FERMI_TWOD_A = 0x902D, // 2D Engine 119 FERMI_TWOD_A = 0x902D, // 2D Engine
112 MAXWELL_B = 0xB197, // 3D Engine 120 MAXWELL_B = 0xB197, // 3D Engine
113 MAXWELL_COMPUTE_B = 0xB1C0, 121 KEPLER_COMPUTE_B = 0xB1C0,
114 KEPLER_INLINE_TO_MEMORY_B = 0xA140, 122 KEPLER_INLINE_TO_MEMORY_B = 0xA140,
115 MAXWELL_DMA_COPY_A = 0xB0B5, 123 MAXWELL_DMA_COPY_A = 0xB0B5,
116}; 124};
117 125
118class GPU final { 126class MemoryManager;
127
128class GPU {
119public: 129public:
120 explicit GPU(VideoCore::RasterizerInterface& rasterizer); 130 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
121 ~GPU(); 131
132 virtual ~GPU();
122 133
123 struct MethodCall { 134 struct MethodCall {
124 u32 method{}; 135 u32 method{};
@@ -156,23 +167,115 @@ public:
156 /// Returns a const reference to the GPU DMA pusher. 167 /// Returns a const reference to the GPU DMA pusher.
157 const Tegra::DmaPusher& DmaPusher() const; 168 const Tegra::DmaPusher& DmaPusher() const;
158 169
170 struct Regs {
171 static constexpr size_t NUM_REGS = 0x100;
172
173 union {
174 struct {
175 INSERT_PADDING_WORDS(0x4);
176 struct {
177 u32 address_high;
178 u32 address_low;
179
180 GPUVAddr SemaphoreAddress() const {
181 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
182 address_low);
183 }
184 } semaphore_address;
185
186 u32 semaphore_sequence;
187 u32 semaphore_trigger;
188 INSERT_PADDING_WORDS(0xC);
189
190 // The puser and the puller share the reference counter, the pusher only has read
191 // access
192 u32 reference_count;
193 INSERT_PADDING_WORDS(0x5);
194
195 u32 semaphore_acquire;
196 u32 semaphore_release;
197 INSERT_PADDING_WORDS(0xE4);
198
199 // Puller state
200 u32 acquire_mode;
201 u32 acquire_source;
202 u32 acquire_active;
203 u32 acquire_timeout;
204 u32 acquire_value;
205 };
206 std::array<u32, NUM_REGS> reg_array;
207 };
208 } regs{};
209
210 /// Push GPU command entries to be processed
211 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
212
213 /// Swap buffers (render frame)
214 virtual void SwapBuffers(
215 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
216
217 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
218 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
219
220 /// Notify rasterizer that any caches of the specified region should be invalidated
221 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
222
223 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
224 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
225
159private: 226private:
227 void ProcessBindMethod(const MethodCall& method_call);
228 void ProcessSemaphoreTriggerMethod();
229 void ProcessSemaphoreRelease();
230 void ProcessSemaphoreAcquire();
231
232 /// Calls a GPU puller method.
233 void CallPullerMethod(const MethodCall& method_call);
234
235 /// Calls a GPU engine method.
236 void CallEngineMethod(const MethodCall& method_call);
237
238 /// Determines where the method should be executed.
239 bool ExecuteMethodOnEngine(const MethodCall& method_call);
240
241protected:
160 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 242 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
243 VideoCore::RendererBase& renderer;
244
245private:
161 std::unique_ptr<Tegra::MemoryManager> memory_manager; 246 std::unique_ptr<Tegra::MemoryManager> memory_manager;
162 247
163 /// Mapping of command subchannels to their bound engine ids. 248 /// Mapping of command subchannels to their bound engine ids
164 std::array<EngineID, 8> bound_engines = {}; 249 std::array<EngineID, 8> bound_engines = {};
165
166 /// 3D engine 250 /// 3D engine
167 std::unique_ptr<Engines::Maxwell3D> maxwell_3d; 251 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
168 /// 2D engine 252 /// 2D engine
169 std::unique_ptr<Engines::Fermi2D> fermi_2d; 253 std::unique_ptr<Engines::Fermi2D> fermi_2d;
170 /// Compute engine 254 /// Compute engine
171 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; 255 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
172 /// DMA engine 256 /// DMA engine
173 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 257 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
174 /// Inline memory engine 258 /// Inline memory engine
175 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 259 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
176}; 260};
177 261
262#define ASSERT_REG_POSITION(field_name, position) \
263 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
264 "Field " #field_name " has invalid position")
265
266ASSERT_REG_POSITION(semaphore_address, 0x4);
267ASSERT_REG_POSITION(semaphore_sequence, 0x6);
268ASSERT_REG_POSITION(semaphore_trigger, 0x7);
269ASSERT_REG_POSITION(reference_count, 0x14);
270ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
271ASSERT_REG_POSITION(semaphore_release, 0x1B);
272
273ASSERT_REG_POSITION(acquire_mode, 0x100);
274ASSERT_REG_POSITION(acquire_source, 0x101);
275ASSERT_REG_POSITION(acquire_active, 0x102);
276ASSERT_REG_POSITION(acquire_timeout, 0x103);
277ASSERT_REG_POSITION(acquire_value, 0x104);
278
279#undef ASSERT_REG_POSITION
280
178} // namespace Tegra 281} // namespace Tegra
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..db507cf04
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..1dcc61a6c
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..2cfc900ed
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..766b5631c
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..cc56cf467
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,121 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/core.h"
8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
10#include "core/frontend/scope_acquire_window_context.h"
11#include "video_core/dma_pusher.h"
12#include "video_core/gpu.h"
13#include "video_core/gpu_thread.h"
14#include "video_core/renderer_base.h"
15
16namespace VideoCommon::GPUThread {
17
18/// Runs the GPU thread
19static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
20 SynchState& state) {
21 MicroProfileOnThreadCreate("GpuThread");
22
23 // Wait for first GPU command before acquiring the window context
24 state.WaitForCommands();
25
26 // If emulation was stopped during disk shader loading, abort before trying to acquire context
27 if (!state.is_running) {
28 return;
29 }
30
31 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
32
33 CommandDataContainer next;
34 while (state.is_running) {
35 state.WaitForCommands();
36 while (!state.queue.Empty()) {
37 state.queue.Pop(next);
38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
39 dma_pusher.Push(std::move(submit_list->entries));
40 dma_pusher.DispatchCalls();
41 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
42 renderer.SwapBuffers(std::move(data->framebuffer));
43 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
44 renderer.Rasterizer().FlushRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
46 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
47 } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
48 return;
49 } else {
50 UNREACHABLE();
51 }
52 state.signaled_fence = next.fence;
53 state.TrySynchronize();
54 }
55 }
56}
57
58ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
59 Tegra::DmaPusher& dma_pusher)
60 : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
61 synchronization_event = system.CoreTiming().RegisterEvent(
62 "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
63}
64
65ThreadManager::~ThreadManager() {
66 // Notify GPU thread that a shutdown is pending
67 PushCommand(EndProcessingCommand());
68 thread.join();
69}
70
71void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
72 const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
73 const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
74 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
75}
76
77void ThreadManager::SwapBuffers(
78 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
79 PushCommand(SwapBuffersCommand(std::move(framebuffer)));
80}
81
82void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
83 PushCommand(FlushRegionCommand(addr, size));
84}
85
86void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
87 if (state.queue.Empty()) {
88 // It's quicker to invalidate a single region on the CPU if the queue is already empty
89 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
90 } else {
91 PushCommand(InvalidateRegionCommand(addr, size));
92 }
93}
94
95void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
96 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
97 InvalidateRegion(addr, size);
98}
99
100u64 ThreadManager::PushCommand(CommandData&& command_data) {
101 const u64 fence{++state.last_fence};
102 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
103 state.SignalCommands();
104 return fence;
105}
106
107MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
108void SynchState::WaitForSynchronization(u64 fence) {
109 if (signaled_fence >= fence) {
110 return;
111 }
112
113 // Wait for the GPU to be idle (all commands to be executed)
114 {
115 MICROPROFILE_SCOPE(GPU_wait);
116 std::unique_lock<std::mutex> lock{synchronization_mutex};
117 synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
118 }
119}
120
121} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..62bcea5bb
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,173 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <condition_variable>
9#include <mutex>
10#include <optional>
11#include <thread>
12#include <variant>
13
14#include "common/threadsafe_queue.h"
15#include "video_core/gpu.h"
16
17namespace Tegra {
18struct FramebufferConfig;
19class DmaPusher;
20} // namespace Tegra
21
22namespace Core {
23class System;
24namespace Timing {
25struct EventType;
26} // namespace Timing
27} // namespace Core
28
29namespace VideoCommon::GPUThread {
30
31/// Command to signal to the GPU thread that processing has ended
32struct EndProcessingCommand final {};
33
34/// Command to signal to the GPU thread that a command list is ready for processing
35struct SubmitListCommand final {
36 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
37
38 Tegra::CommandList entries;
39};
40
41/// Command to signal to the GPU thread that a swap buffers is pending
42struct SwapBuffersCommand final {
43 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
44 : framebuffer{std::move(framebuffer)} {}
45
46 std::optional<Tegra::FramebufferConfig> framebuffer;
47};
48
49/// Command to signal to the GPU thread to flush a region
50struct FlushRegionCommand final {
51 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
52
53 CacheAddr addr;
54 u64 size;
55};
56
57/// Command to signal to the GPU thread to invalidate a region
58struct InvalidateRegionCommand final {
59 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
60
61 CacheAddr addr;
62 u64 size;
63};
64
65/// Command to signal to the GPU thread to flush and invalidate a region
66struct FlushAndInvalidateRegionCommand final {
67 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
68 : addr{addr}, size{size} {}
69
70 CacheAddr addr;
71 u64 size;
72};
73
74using CommandData =
75 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
76 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
77
78struct CommandDataContainer {
79 CommandDataContainer() = default;
80
81 CommandDataContainer(CommandData&& data, u64 next_fence)
82 : data{std::move(data)}, fence{next_fence} {}
83
84 CommandDataContainer& operator=(const CommandDataContainer& t) {
85 data = std::move(t.data);
86 fence = t.fence;
87 return *this;
88 }
89
90 CommandData data;
91 u64 fence{};
92};
93
94/// Struct used to synchronize the GPU thread
95struct SynchState final {
96 std::atomic_bool is_running{true};
97 std::atomic_int queued_frame_count{};
98 std::mutex synchronization_mutex;
99 std::mutex commands_mutex;
100 std::condition_variable commands_condition;
101 std::condition_variable synchronization_condition;
102
103 /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
104 /// synchronized. This is entirely empirical.
105 bool IsSynchronized() const {
106 constexpr std::size_t max_queue_gap{5};
107 return queue.Size() <= max_queue_gap;
108 }
109
110 void TrySynchronize() {
111 if (IsSynchronized()) {
112 std::lock_guard<std::mutex> lock{synchronization_mutex};
113 synchronization_condition.notify_one();
114 }
115 }
116
117 void WaitForSynchronization(u64 fence);
118
119 void SignalCommands() {
120 if (queue.Empty()) {
121 return;
122 }
123
124 commands_condition.notify_one();
125 }
126
127 void WaitForCommands() {
128 std::unique_lock lock{commands_mutex};
129 commands_condition.wait(lock, [this] { return !queue.Empty(); });
130 }
131
132 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
133 CommandQueue queue;
134 u64 last_fence{};
135 std::atomic<u64> signaled_fence{};
136};
137
138/// Class used to manage the GPU thread
139class ThreadManager final {
140public:
141 explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
142 Tegra::DmaPusher& dma_pusher);
143 ~ThreadManager();
144
145 /// Push GPU command entries to be processed
146 void SubmitList(Tegra::CommandList&& entries);
147
148 /// Swap buffers (render frame)
149 void SwapBuffers(
150 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
151
152 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
153 void FlushRegion(CacheAddr addr, u64 size);
154
155 /// Notify rasterizer that any caches of the specified region should be invalidated
156 void InvalidateRegion(CacheAddr addr, u64 size);
157
158 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
159 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
160
161private:
162 /// Pushes a command to be executed by the GPU thread
163 u64 PushCommand(CommandData&& command_data);
164
165private:
166 SynchState state;
167 Core::System& system;
168 Core::Timing::EventType* synchronization_event{};
169 std::thread thread;
170 std::thread::id thread_id;
171};
172
173} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 64f75db43..524d9ea5a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
223} 223}
224 224
225u32 MacroInterpreter::FetchParameter() { 225u32 MacroInterpreter::FetchParameter() {
226 ASSERT(next_parameter_index < parameters.size()); 226 return parameters.at(next_parameter_index++);
227 return parameters[next_parameter_index++];
228} 227}
229 228
230u32 MacroInterpreter::GetRegister(u32 register_id) const { 229u32 MacroInterpreter::GetRegister(u32 register_id) const {
231 // Register 0 is supposed to always return 0. 230 return registers.at(register_id);
232 if (register_id == 0)
233 return 0;
234
235 ASSERT(register_id < registers.size());
236 return registers[register_id];
237} 231}
238 232
239void MacroInterpreter::SetRegister(u32 register_id, u32 value) { 233void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
240 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero 234 // Register 0 is hardwired as the zero register.
241 // register. 235 // Ensure no writes to it actually occur.
242 if (register_id == 0) 236 if (register_id == 0) {
243 return; 237 return;
238 }
244 239
245 ASSERT(register_id < registers.size()); 240 registers.at(register_id) = value;
246 registers[register_id] = value;
247} 241}
248 242
249void MacroInterpreter::SetMethodAddress(u32 address) { 243void MacroInterpreter::SetMethodAddress(u32 address) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 47247f097..0f4e820aa 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,180 +5,528 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/memory.h"
8#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h"
9 11
10namespace Tegra { 12namespace Tegra {
11 13
12MemoryManager::MemoryManager() { 14MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
13 // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might 15 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
14 // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with 16 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
15 // Undertale using 0 for a render target. 17 Common::PageType::Unmapped);
16 PageSlot(0) = static_cast<u64>(PageStatus::Reserved); 18 page_table.Resize(address_space_width);
19
20 // Initialize the map with a single free region covering the entire managed space.
21 VirtualMemoryArea initial_vma;
22 initial_vma.size = address_space_end;
23 vma_map.emplace(initial_vma.base, initial_vma);
24
25 UpdatePageTableForVMA(initial_vma);
17} 26}
18 27
19GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 28GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
20 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; 29 const u64 aligned_size{Common::AlignUp(size, page_size)};
30 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
21 31
22 ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); 32 AllocateMemory(gpu_addr, 0, aligned_size);
23 33
24 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 34 return gpu_addr;
25 VAddr& slot{PageSlot(*gpu_addr + offset)}; 35}
26 36
27 ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); 37GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
38 const u64 aligned_size{Common::AlignUp(size, page_size)};
28 39
29 slot = static_cast<u64>(PageStatus::Allocated); 40 AllocateMemory(gpu_addr, 0, aligned_size);
30 }
31 41
32 return *gpu_addr; 42 return gpu_addr;
33} 43}
34 44
35GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { 45GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
36 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 46 const u64 aligned_size{Common::AlignUp(size, page_size)};
37 VAddr& slot{PageSlot(gpu_addr + offset)}; 47 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
38 48
39 ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); 49 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
40 50
41 slot = static_cast<u64>(PageStatus::Allocated); 51 return gpu_addr;
42 } 52}
53
54GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
55 ASSERT((gpu_addr & page_mask) == 0);
56
57 const u64 aligned_size{Common::AlignUp(size, page_size)};
58
59 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
43 60
44 return gpu_addr; 61 return gpu_addr;
45} 62}
46 63
47GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { 64GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
48 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)}; 65 ASSERT((gpu_addr & page_mask) == 0);
66
67 const u64 aligned_size{Common::AlignUp(size, page_size)};
68 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
69
70 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
71 UnmapRange(gpu_addr, aligned_size);
49 72
50 ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); 73 return gpu_addr;
74}
51 75
52 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 76GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
53 VAddr& slot{PageSlot(*gpu_addr + offset)}; 77 // Find the first Free VMA.
78 const VMAHandle vma_handle{
79 std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
80 if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
81 return false;
82 }
54 83
55 ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); 84 const VAddr vma_end{vma.second.base + vma.second.size};
85 return vma_end > region_start && vma_end >= region_start + size;
86 })};
56 87
57 slot = cpu_addr + offset; 88 if (vma_handle == vma_map.end()) {
89 return {};
58 } 90 }
59 91
60 const MappedRegion region{cpu_addr, *gpu_addr, size}; 92 return std::max(region_start, vma_handle->second.base);
61 mapped_regions.push_back(region); 93}
62 94
63 return *gpu_addr; 95bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
96 return (addr >> page_bits) < page_table.pointers.size();
64} 97}
65 98
66GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { 99std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
67 ASSERT((gpu_addr & PAGE_MASK) == 0); 100 if (!IsAddressValid(addr)) {
101 return {};
102 }
68 103
69 if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) { 104 const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
70 // Page has been already mapped. In this case, we must find a new area of memory to use that 105 if (cpu_addr) {
71 // is different than the specified one. Super Mario Odyssey hits this scenario when changing 106 return cpu_addr + (addr & page_mask);
72 // areas, but we do not want to overwrite the old pages. 107 }
73 // TODO(bunnei): We need to write a hardware test to confirm this behavior. 108
109 return {};
110}
74 111
75 LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr); 112template <typename T>
113T MemoryManager::Read(GPUVAddr addr) const {
114 if (!IsAddressValid(addr)) {
115 return {};
116 }
76 117
77 const std::optional<GPUVAddr> new_gpu_addr{ 118 const u8* page_pointer{page_table.pointers[addr >> page_bits]};
78 FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)}; 119 if (page_pointer) {
120 // NOTE: Avoid adding any extra logic to this fast-path block
121 T value;
122 std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
123 return value;
124 }
79 125
80 ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory"); 126 switch (page_table.attributes[addr >> page_bits]) {
127 case Common::PageType::Unmapped:
128 LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
129 return 0;
130 case Common::PageType::Memory:
131 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
132 break;
133 default:
134 UNREACHABLE();
135 }
136 return {};
137}
81 138
82 gpu_addr = *new_gpu_addr; 139template <typename T>
140void MemoryManager::Write(GPUVAddr addr, T data) {
141 if (!IsAddressValid(addr)) {
142 return;
83 } 143 }
84 144
85 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 145 u8* page_pointer{page_table.pointers[addr >> page_bits]};
86 VAddr& slot{PageSlot(gpu_addr + offset)}; 146 if (page_pointer) {
147 // NOTE: Avoid adding any extra logic to this fast-path block
148 std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
149 return;
150 }
87 151
88 ASSERT(slot == static_cast<u64>(PageStatus::Allocated)); 152 switch (page_table.attributes[addr >> page_bits]) {
153 case Common::PageType::Unmapped:
154 LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
155 static_cast<u32>(data), addr);
156 return;
157 case Common::PageType::Memory:
158 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
159 break;
160 default:
161 UNREACHABLE();
162 }
163}
89 164
90 slot = cpu_addr + offset; 165template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
166template u16 MemoryManager::Read<u16>(GPUVAddr addr) const;
167template u32 MemoryManager::Read<u32>(GPUVAddr addr) const;
168template u64 MemoryManager::Read<u64>(GPUVAddr addr) const;
169template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
170template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
171template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
172template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
173
174u8* MemoryManager::GetPointer(GPUVAddr addr) {
175 if (!IsAddressValid(addr)) {
176 return {};
91 } 177 }
92 178
93 const MappedRegion region{cpu_addr, gpu_addr, size}; 179 u8* const page_pointer{page_table.pointers[addr >> page_bits]};
94 mapped_regions.push_back(region); 180 if (page_pointer != nullptr) {
181 return page_pointer + (addr & page_mask);
182 }
95 183
96 return gpu_addr; 184 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
185 return {};
97} 186}
98 187
99GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { 188const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
100 ASSERT((gpu_addr & PAGE_MASK) == 0); 189 if (!IsAddressValid(addr)) {
190 return {};
191 }
101 192
102 for (u64 offset{}; offset < size; offset += PAGE_SIZE) { 193 const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
103 VAddr& slot{PageSlot(gpu_addr + offset)}; 194 if (page_pointer != nullptr) {
195 return page_pointer + (addr & page_mask);
196 }
104 197
105 ASSERT(slot != static_cast<u64>(PageStatus::Allocated) && 198 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
106 slot != static_cast<u64>(PageStatus::Unmapped)); 199 return {};
200}
107 201
108 slot = static_cast<u64>(PageStatus::Unmapped); 202void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
109 } 203 std::size_t remaining_size{size};
204 std::size_t page_index{src_addr >> page_bits};
205 std::size_t page_offset{src_addr & page_mask};
206
207 while (remaining_size > 0) {
208 const std::size_t copy_amount{
209 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
210
211 switch (page_table.attributes[page_index]) {
212 case Common::PageType::Memory: {
213 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
214 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
215 std::memcpy(dest_buffer, src_ptr, copy_amount);
216 break;
217 }
218 default:
219 UNREACHABLE();
220 }
110 221
111 // Delete the region mappings that are contained within the unmapped region 222 page_index++;
112 mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(), 223 page_offset = 0;
113 [&](const MappedRegion& region) { 224 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
114 return region.gpu_addr <= gpu_addr && 225 remaining_size -= copy_amount;
115 region.gpu_addr + region.size < gpu_addr + size; 226 }
116 }),
117 mapped_regions.end());
118 return gpu_addr;
119} 227}
120 228
121GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const { 229void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
122 for (const auto& region : mapped_regions) { 230 std::size_t remaining_size{size};
123 const GPUVAddr region_end{region.gpu_addr + region.size}; 231 std::size_t page_index{dest_addr >> page_bits};
124 if (region_start >= region.gpu_addr && region_start < region_end) { 232 std::size_t page_offset{dest_addr & page_mask};
125 return region_end; 233
234 while (remaining_size > 0) {
235 const std::size_t copy_amount{
236 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
237
238 switch (page_table.attributes[page_index]) {
239 case Common::PageType::Memory: {
240 u8* dest_ptr{page_table.pointers[page_index] + page_offset};
241 rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
242 std::memcpy(dest_ptr, src_buffer, copy_amount);
243 break;
126 } 244 }
245 default:
246 UNREACHABLE();
247 }
248
249 page_index++;
250 page_offset = 0;
251 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
252 remaining_size -= copy_amount;
127 } 253 }
128 return {};
129} 254}
130 255
131std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 256void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
132 PageStatus status) { 257 std::size_t remaining_size{size};
133 GPUVAddr gpu_addr{region_start}; 258 std::size_t page_index{src_addr >> page_bits};
134 u64 free_space{}; 259 std::size_t page_offset{src_addr & page_mask};
135 align = (align + PAGE_MASK) & ~PAGE_MASK; 260
261 while (remaining_size > 0) {
262 const std::size_t copy_amount{
263 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
264
265 switch (page_table.attributes[page_index]) {
266 case Common::PageType::Memory: {
267 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
268 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
269 WriteBlock(dest_addr, src_ptr, copy_amount);
270 break;
271 }
272 default:
273 UNREACHABLE();
274 }
136 275
137 while (gpu_addr + free_space < MAX_ADDRESS) { 276 page_index++;
138 if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) { 277 page_offset = 0;
139 free_space += PAGE_SIZE; 278 dest_addr += static_cast<VAddr>(copy_amount);
140 if (free_space >= size) { 279 src_addr += static_cast<VAddr>(copy_amount);
141 return gpu_addr; 280 remaining_size -= copy_amount;
142 } 281 }
143 } else { 282}
144 gpu_addr += free_space + PAGE_SIZE; 283
145 free_space = 0; 284void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
146 gpu_addr = Common::AlignUp(gpu_addr, align); 285 VAddr backing_addr) {
286 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
287 (base + size) * page_size);
288
289 const VAddr end{base + size};
290 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
291 base + page_table.pointers.size());
292
293 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
294
295 if (memory == nullptr) {
296 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
297 std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
298 backing_addr);
299 } else {
300 while (base != end) {
301 page_table.pointers[base] = memory;
302 page_table.backing_addr[base] = backing_addr;
303
304 base += 1;
305 memory += page_size;
306 backing_addr += page_size;
147 } 307 }
148 } 308 }
309}
149 310
150 return {}; 311void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
312 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
313 ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
314 MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
151} 315}
152 316
153std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { 317void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
154 const VAddr base_addr{PageSlot(gpu_addr)}; 318 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
319 ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
320 MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
321}
155 322
156 if (base_addr == static_cast<u64>(PageStatus::Allocated) || 323bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
157 base_addr == static_cast<u64>(PageStatus::Unmapped)) { 324 ASSERT(base + size == next.base);
325 if (type != next.type) {
326 return {};
327 }
328 if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
329 return {};
330 }
331 if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
158 return {}; 332 return {};
159 } 333 }
334 return true;
335}
336
337MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
338 if (target >= address_space_end) {
339 return vma_map.end();
340 } else {
341 return std::prev(vma_map.upper_bound(target));
342 }
343}
344
345MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
346 VirtualMemoryArea& vma{vma_handle->second};
347
348 vma.type = VirtualMemoryArea::Type::Allocated;
349 vma.backing_addr = 0;
350 vma.backing_memory = {};
351 UpdatePageTableForVMA(vma);
352
353 return MergeAdjacent(vma_handle);
354}
355
356MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
357 u64 size) {
358
359 // This is the appropriately sized VMA that will turn into our allocation.
360 VMAIter vma_handle{CarveVMA(target, size)};
361 VirtualMemoryArea& vma{vma_handle->second};
362
363 ASSERT(vma.size == size);
364
365 vma.offset = offset;
366
367 return Allocate(vma_handle);
368}
369
370MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
371 VAddr backing_addr) {
372 // This is the appropriately sized VMA that will turn into our allocation.
373 VMAIter vma_handle{CarveVMA(target, size)};
374 VirtualMemoryArea& vma{vma_handle->second};
375
376 ASSERT(vma.size == size);
377
378 vma.type = VirtualMemoryArea::Type::Mapped;
379 vma.backing_memory = memory;
380 vma.backing_addr = backing_addr;
381 UpdatePageTableForVMA(vma);
382
383 return MergeAdjacent(vma_handle);
384}
385
386void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
387 VMAIter vma{CarveVMARange(target, size)};
388 const VAddr target_end{target + size};
389 const VMAIter end{vma_map.end()};
390
391 // The comparison against the end of the range must be done using addresses since VMAs can be
392 // merged during this process, causing invalidation of the iterators.
393 while (vma != end && vma->second.base < target_end) {
394 // Unmapped ranges return to allocated state and can be reused
395 // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
396 vma = std::next(Allocate(vma));
397 }
398
399 ASSERT(FindVMA(target)->second.size >= size);
400}
160 401
161 return base_addr + (gpu_addr & PAGE_MASK); 402MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
403 // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
404 // non-const access to its container.
405 return vma_map.erase(iter, iter); // Erases an empty range of elements
162} 406}
163 407
164std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { 408MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
165 std::vector<GPUVAddr> results; 409 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
166 for (const auto& region : mapped_regions) { 410 ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
167 if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { 411
168 const u64 offset{cpu_addr - region.cpu_addr}; 412 VMAIter vma_handle{StripIterConstness(FindVMA(base))};
169 results.push_back(region.gpu_addr + offset); 413 if (vma_handle == vma_map.end()) {
414 // Target address is outside the managed range
415 return {};
416 }
417
418 const VirtualMemoryArea& vma{vma_handle->second};
419 if (vma.type == VirtualMemoryArea::Type::Mapped) {
420 // Region is already allocated
421 return vma_handle;
422 }
423
424 const VAddr start_in_vma{base - vma.base};
425 const VAddr end_in_vma{start_in_vma + size};
426
427 ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
428 vma.size, end_in_vma);
429
430 if (end_in_vma < vma.size) {
431 // Split VMA at the end of the allocated region
432 SplitVMA(vma_handle, end_in_vma);
433 }
434 if (start_in_vma != 0) {
435 // Split VMA at the start of the allocated region
436 vma_handle = SplitVMA(vma_handle, start_in_vma);
437 }
438
439 return vma_handle;
440}
441
442MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
443 ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
444 ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
445
446 const VAddr target_end{target + size};
447 ASSERT(target_end >= target);
448 ASSERT(size > 0);
449
450 VMAIter begin_vma{StripIterConstness(FindVMA(target))};
451 const VMAIter i_end{vma_map.lower_bound(target_end)};
452 if (std::any_of(begin_vma, i_end, [](const auto& entry) {
453 return entry.second.type == VirtualMemoryArea::Type::Unmapped;
454 })) {
455 return {};
456 }
457
458 if (target != begin_vma->second.base) {
459 begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
460 }
461
462 VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
463 if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
464 end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
465 }
466
467 return begin_vma;
468}
469
470MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
471 VirtualMemoryArea& old_vma{vma_handle->second};
472 VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
473
474 // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
475 // a bug. This restriction might be removed later.
476 ASSERT(offset_in_vma < old_vma.size);
477 ASSERT(offset_in_vma > 0);
478
479 old_vma.size = offset_in_vma;
480 new_vma.base += offset_in_vma;
481 new_vma.size -= offset_in_vma;
482
483 switch (new_vma.type) {
484 case VirtualMemoryArea::Type::Unmapped:
485 break;
486 case VirtualMemoryArea::Type::Allocated:
487 new_vma.offset += offset_in_vma;
488 break;
489 case VirtualMemoryArea::Type::Mapped:
490 new_vma.backing_memory += offset_in_vma;
491 break;
492 }
493
494 ASSERT(old_vma.CanBeMergedWith(new_vma));
495
496 return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
497}
498
499MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
500 const VMAIter next_vma{std::next(iter)};
501 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
502 iter->second.size += next_vma->second.size;
503 vma_map.erase(next_vma);
504 }
505
506 if (iter != vma_map.begin()) {
507 VMAIter prev_vma{std::prev(iter)};
508 if (prev_vma->second.CanBeMergedWith(iter->second)) {
509 prev_vma->second.size += iter->second.size;
510 vma_map.erase(iter);
511 iter = prev_vma;
170 } 512 }
171 } 513 }
172 return results; 514
515 return iter;
173} 516}
174 517
175VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { 518void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
176 auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]}; 519 switch (vma.type) {
177 if (!block) { 520 case VirtualMemoryArea::Type::Unmapped:
178 block = std::make_unique<PageBlock>(); 521 UnmapRegion(vma.base, vma.size);
179 block->fill(static_cast<VAddr>(PageStatus::Unmapped)); 522 break;
523 case VirtualMemoryArea::Type::Allocated:
524 MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
525 break;
526 case VirtualMemoryArea::Type::Mapped:
527 MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
528 break;
180 } 529 }
181 return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
182} 530}
183 531
184} // namespace Tegra 532} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..647cbf93a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,67 +1,154 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <map>
8#include <memory>
9#include <optional> 8#include <optional>
10#include <vector>
11 9
12#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/page_table.h"
12
13namespace VideoCore {
14class RasterizerInterface;
15}
13 16
14namespace Tegra { 17namespace Tegra {
15 18
16/// Virtual addresses in the GPU's memory map are 64 bit. 19/**
17using GPUVAddr = u64; 20 * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
21 * with homogeneous attributes across its extents. In this particular implementation each VMA is
22 * also backed by a single host memory allocation.
23 */
24struct VirtualMemoryArea {
25 enum class Type : u8 {
26 Unmapped,
27 Allocated,
28 Mapped,
29 };
30
31 /// Virtual base address of the region.
32 GPUVAddr base{};
33 /// Size of the region.
34 u64 size{};
35 /// Memory area mapping type.
36 Type type{Type::Unmapped};
37 /// CPU memory mapped address corresponding to this memory area.
38 VAddr backing_addr{};
39 /// Offset into the backing_memory the mapping starts from.
40 std::size_t offset{};
41 /// Pointer backing this VMA.
42 u8* backing_memory{};
43
44 /// Tests if this area can be merged to the right with `next`.
45 bool CanBeMergedWith(const VirtualMemoryArea& next) const;
46};
18 47
19class MemoryManager final { 48class MemoryManager final {
20public: 49public:
21 MemoryManager(); 50 MemoryManager(VideoCore::RasterizerInterface& rasterizer);
22 51
23 GPUVAddr AllocateSpace(u64 size, u64 align); 52 GPUVAddr AllocateSpace(u64 size, u64 align);
24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); 53 GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
25 GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); 54 GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
26 GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size); 55 GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
27 GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); 56 GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
28 GPUVAddr GetRegionEnd(GPUVAddr region_start) const; 57 std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
29 std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); 58
30 std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; 59 template <typename T>
60 T Read(GPUVAddr addr) const;
61
62 template <typename T>
63 void Write(GPUVAddr addr, T data);
31 64
32 static constexpr u64 PAGE_BITS = 16; 65 u8* GetPointer(GPUVAddr addr);
33 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; 66 const u8* GetPointer(GPUVAddr addr) const;
34 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 67
68 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
69 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
70 void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
35 71
36private: 72private:
37 enum class PageStatus : u64 { 73 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 74 using VMAHandle = VMAMap::const_iterator;
39 Allocated = 0xFFFFFFFFFFFFFFFEULL, 75 using VMAIter = VMAMap::iterator;
40 Reserved = 0xFFFFFFFFFFFFFFFDULL,
41 };
42 76
43 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 77 bool IsAddressValid(GPUVAddr addr) const;
44 PageStatus status); 78 void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
45 VAddr& PageSlot(GPUVAddr gpu_addr); 79 VAddr backing_addr = 0);
46 80 void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
47 static constexpr u64 MAX_ADDRESS{0x10000000000ULL}; 81 void UnmapRegion(GPUVAddr base, u64 size);
48 static constexpr u64 PAGE_TABLE_BITS{10}; 82
49 static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS}; 83 /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
50 static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1}; 84 VMAHandle FindVMA(GPUVAddr target) const;
51 static constexpr u64 PAGE_BLOCK_BITS{14}; 85
52 static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS}; 86 VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
53 static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1}; 87
54 88 /**
55 using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; 89 * Maps an unmanaged host memory pointer at a given address.
56 std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; 90 *
57 91 * @param target The guest address to start the mapping at.
58 struct MappedRegion { 92 * @param memory The memory to be mapped.
59 VAddr cpu_addr; 93 * @param size Size of the mapping.
60 GPUVAddr gpu_addr; 94 * @param state MemoryState tag to attach to the VMA.
61 u64 size; 95 */
62 }; 96 VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
97
98 /// Unmaps a range of addresses, splitting VMAs as necessary.
99 void UnmapRange(GPUVAddr target, u64 size);
100
101 /// Converts a VMAHandle to a mutable VMAIter.
102 VMAIter StripIterConstness(const VMAHandle& iter);
103
104 /// Marks as the specfied VMA as allocated.
105 VMAIter Allocate(VMAIter vma);
106
107 /**
108 * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
109 * the appropriate error checking.
110 */
111 VMAIter CarveVMA(GPUVAddr base, u64 size);
112
113 /**
114 * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
115 * end of the range.
116 */
117 VMAIter CarveVMARange(GPUVAddr base, u64 size);
118
119 /**
120 * Splits a VMA in two, at the specified offset.
121 * @returns the right side of the split, with the original iterator becoming the left side.
122 */
123 VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
124
125 /**
126 * Checks for and merges the specified VMA with adjacent ones if possible.
127 * @returns the merged VMA or the original if no merging was possible.
128 */
129 VMAIter MergeAdjacent(VMAIter vma);
130
131 /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
132 void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
133
134 /// Finds a free (unmapped region) of the specified size starting at the specified address.
135 GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
136
137private:
138 static constexpr u64 page_bits{16};
139 static constexpr u64 page_size{1 << page_bits};
140 static constexpr u64 page_mask{page_size - 1};
141
142 /// Address space in bits, this is fairly arbitrary but sufficiently large.
143 static constexpr u32 address_space_width{39};
144 /// Start address for mapping, this is fairly arbitrary but must be non-zero.
145 static constexpr GPUVAddr address_space_base{0x100000};
146 /// End of address space, based on address space in bits.
147 static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
63 148
64 std::vector<MappedRegion> mapped_regions; 149 Common::PageTable page_table{page_bits};
150 VMAMap vma_map;
151 VideoCore::RasterizerInterface& rasterizer;
65}; 152};
66 153
67} // namespace Tegra 154} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index b68f4fb13..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h" 9#include "video_core/morton.h"
11#include "video_core/surface.h" 10#include "video_core/surface.h"
12#include "video_core/textures/decoders.h" 11#include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
16using Surface::GetBytesPerPixel; 15using Surface::GetBytesPerPixel;
17using Surface::PixelFormat; 16using Surface::PixelFormat;
18 17
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); 18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; 19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21 20
22template <bool morton_to_linear, PixelFormat format> 21template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, 22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) { 23 u32 tile_width_spacing, u8* buffer, u8* addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); 24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26 25
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
34 stride, height, depth, block_height, block_depth, 33 stride, height, depth, block_height, block_depth,
35 tile_width_spacing); 34 tile_width_spacing);
36 } else { 35 } else {
37 Tegra::Texture::CopySwizzledData( 36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
38 (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, 37 (height + tile_size_y - 1) / tile_size_y, depth,
39 depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, 38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
40 block_height, block_depth, tile_width_spacing); 39 block_height, block_depth, tile_width_spacing);
41 } 40 }
42} 41}
43 42
44static constexpr ConversionArray morton_to_linear_fns = { 43static constexpr ConversionArray morton_to_linear_fns = {
45 // clang-format off 44 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8U>, 45 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8S>, 46 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::ABGR8UI>, 47 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::B5G6R5U>, 48 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A2B10G10R10U>, 49 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::A1B5G5R5U>, 50 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8U>, 51 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::R8UI>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16F>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16U>, 54 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT1>, 58 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT23>, 59 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXT45>, 60 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN1>, 61 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2UNORM>, 62 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::DXN2SNORM>, 63 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC7U>, 64 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_UF16>, 65 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::BC6H_SF16>, 66 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::ASTC_2D_4X4>, 67 MortonCopy<true, PixelFormat::BGRA8>,
69 MortonCopy<true, PixelFormat::BGRA8>, 68 MortonCopy<true, PixelFormat::RGBA32F>,
70 MortonCopy<true, PixelFormat::RGBA32F>, 69 MortonCopy<true, PixelFormat::RG32F>,
71 MortonCopy<true, PixelFormat::RG32F>, 70 MortonCopy<true, PixelFormat::R32F>,
72 MortonCopy<true, PixelFormat::R32F>, 71 MortonCopy<true, PixelFormat::R16F>,
73 MortonCopy<true, PixelFormat::R16F>, 72 MortonCopy<true, PixelFormat::R16U>,
74 MortonCopy<true, PixelFormat::R16U>, 73 MortonCopy<true, PixelFormat::R16S>,
75 MortonCopy<true, PixelFormat::R16S>, 74 MortonCopy<true, PixelFormat::R16UI>,
76 MortonCopy<true, PixelFormat::R16UI>, 75 MortonCopy<true, PixelFormat::R16I>,
77 MortonCopy<true, PixelFormat::R16I>, 76 MortonCopy<true, PixelFormat::RG16>,
78 MortonCopy<true, PixelFormat::RG16>, 77 MortonCopy<true, PixelFormat::RG16F>,
79 MortonCopy<true, PixelFormat::RG16F>, 78 MortonCopy<true, PixelFormat::RG16UI>,
80 MortonCopy<true, PixelFormat::RG16UI>, 79 MortonCopy<true, PixelFormat::RG16I>,
81 MortonCopy<true, PixelFormat::RG16I>, 80 MortonCopy<true, PixelFormat::RG16S>,
82 MortonCopy<true, PixelFormat::RG16S>, 81 MortonCopy<true, PixelFormat::RGB32F>,
83 MortonCopy<true, PixelFormat::RGB32F>, 82 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
84 MortonCopy<true, PixelFormat::RGBA8_SRGB>, 83 MortonCopy<true, PixelFormat::RG8U>,
85 MortonCopy<true, PixelFormat::RG8U>, 84 MortonCopy<true, PixelFormat::RG8S>,
86 MortonCopy<true, PixelFormat::RG8S>, 85 MortonCopy<true, PixelFormat::RG32UI>,
87 MortonCopy<true, PixelFormat::RG32UI>, 86 MortonCopy<true, PixelFormat::R32UI>,
88 MortonCopy<true, PixelFormat::R32UI>, 87 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
89 MortonCopy<true, PixelFormat::ASTC_2D_8X8>, 88 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X5>, 89 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
91 MortonCopy<true, PixelFormat::ASTC_2D_5X4>, 90 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
92 MortonCopy<true, PixelFormat::BGRA8_SRGB>, 91 MortonCopy<true, PixelFormat::DXT1_SRGB>,
93 MortonCopy<true, PixelFormat::DXT1_SRGB>, 92 MortonCopy<true, PixelFormat::DXT23_SRGB>,
94 MortonCopy<true, PixelFormat::DXT23_SRGB>, 93 MortonCopy<true, PixelFormat::DXT45_SRGB>,
95 MortonCopy<true, PixelFormat::DXT45_SRGB>, 94 MortonCopy<true, PixelFormat::BC7U_SRGB>,
96 MortonCopy<true, PixelFormat::BC7U_SRGB>, 95 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
97 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, 96 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, 97 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, 98 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, 99 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X5>, 100 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, 101 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
103 MortonCopy<true, PixelFormat::ASTC_2D_10X8>, 102 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, 103 MortonCopy<true, PixelFormat::Z32F>,
105 MortonCopy<true, PixelFormat::Z32F>, 104 MortonCopy<true, PixelFormat::Z16>,
106 MortonCopy<true, PixelFormat::Z16>, 105 MortonCopy<true, PixelFormat::Z24S8>,
107 MortonCopy<true, PixelFormat::Z24S8>, 106 MortonCopy<true, PixelFormat::S8Z24>,
108 MortonCopy<true, PixelFormat::S8Z24>, 107 MortonCopy<true, PixelFormat::Z32FS8>,
109 MortonCopy<true, PixelFormat::Z32FS8>,
110 // clang-format on
111}; 108};
112 109
113static constexpr ConversionArray linear_to_morton_fns = { 110static constexpr ConversionArray linear_to_morton_fns = {
114 // clang-format off 111 MortonCopy<false, PixelFormat::ABGR8U>,
115 MortonCopy<false, PixelFormat::ABGR8U>, 112 MortonCopy<false, PixelFormat::ABGR8S>,
116 MortonCopy<false, PixelFormat::ABGR8S>, 113 MortonCopy<false, PixelFormat::ABGR8UI>,
117 MortonCopy<false, PixelFormat::ABGR8UI>, 114 MortonCopy<false, PixelFormat::B5G6R5U>,
118 MortonCopy<false, PixelFormat::B5G6R5U>, 115 MortonCopy<false, PixelFormat::A2B10G10R10U>,
119 MortonCopy<false, PixelFormat::A2B10G10R10U>, 116 MortonCopy<false, PixelFormat::A1B5G5R5U>,
120 MortonCopy<false, PixelFormat::A1B5G5R5U>, 117 MortonCopy<false, PixelFormat::R8U>,
121 MortonCopy<false, PixelFormat::R8U>, 118 MortonCopy<false, PixelFormat::R8UI>,
122 MortonCopy<false, PixelFormat::R8UI>, 119 MortonCopy<false, PixelFormat::RGBA16F>,
123 MortonCopy<false, PixelFormat::RGBA16F>, 120 MortonCopy<false, PixelFormat::RGBA16U>,
124 MortonCopy<false, PixelFormat::RGBA16U>, 121 MortonCopy<false, PixelFormat::RGBA16UI>,
125 MortonCopy<false, PixelFormat::RGBA16UI>, 122 MortonCopy<false, PixelFormat::R11FG11FB10F>,
126 MortonCopy<false, PixelFormat::R11FG11FB10F>, 123 MortonCopy<false, PixelFormat::RGBA32UI>,
127 MortonCopy<false, PixelFormat::RGBA32UI>, 124 MortonCopy<false, PixelFormat::DXT1>,
128 MortonCopy<false, PixelFormat::DXT1>, 125 MortonCopy<false, PixelFormat::DXT23>,
129 MortonCopy<false, PixelFormat::DXT23>, 126 MortonCopy<false, PixelFormat::DXT45>,
130 MortonCopy<false, PixelFormat::DXT45>, 127 MortonCopy<false, PixelFormat::DXN1>,
131 MortonCopy<false, PixelFormat::DXN1>, 128 MortonCopy<false, PixelFormat::DXN2UNORM>,
132 MortonCopy<false, PixelFormat::DXN2UNORM>, 129 MortonCopy<false, PixelFormat::DXN2SNORM>,
133 MortonCopy<false, PixelFormat::DXN2SNORM>, 130 MortonCopy<false, PixelFormat::BC7U>,
134 MortonCopy<false, PixelFormat::BC7U>, 131 MortonCopy<false, PixelFormat::BC6H_UF16>,
135 MortonCopy<false, PixelFormat::BC6H_UF16>, 132 MortonCopy<false, PixelFormat::BC6H_SF16>,
136 MortonCopy<false, PixelFormat::BC6H_SF16>, 133 // TODO(Subv): Swizzling ASTC formats are not supported
137 // TODO(Subv): Swizzling ASTC formats are not supported 134 nullptr,
138 nullptr, 135 MortonCopy<false, PixelFormat::BGRA8>,
139 MortonCopy<false, PixelFormat::BGRA8>, 136 MortonCopy<false, PixelFormat::RGBA32F>,
140 MortonCopy<false, PixelFormat::RGBA32F>, 137 MortonCopy<false, PixelFormat::RG32F>,
141 MortonCopy<false, PixelFormat::RG32F>, 138 MortonCopy<false, PixelFormat::R32F>,
142 MortonCopy<false, PixelFormat::R32F>, 139 MortonCopy<false, PixelFormat::R16F>,
143 MortonCopy<false, PixelFormat::R16F>, 140 MortonCopy<false, PixelFormat::R16U>,
144 MortonCopy<false, PixelFormat::R16U>, 141 MortonCopy<false, PixelFormat::R16S>,
145 MortonCopy<false, PixelFormat::R16S>, 142 MortonCopy<false, PixelFormat::R16UI>,
146 MortonCopy<false, PixelFormat::R16UI>, 143 MortonCopy<false, PixelFormat::R16I>,
147 MortonCopy<false, PixelFormat::R16I>, 144 MortonCopy<false, PixelFormat::RG16>,
148 MortonCopy<false, PixelFormat::RG16>, 145 MortonCopy<false, PixelFormat::RG16F>,
149 MortonCopy<false, PixelFormat::RG16F>, 146 MortonCopy<false, PixelFormat::RG16UI>,
150 MortonCopy<false, PixelFormat::RG16UI>, 147 MortonCopy<false, PixelFormat::RG16I>,
151 MortonCopy<false, PixelFormat::RG16I>, 148 MortonCopy<false, PixelFormat::RG16S>,
152 MortonCopy<false, PixelFormat::RG16S>, 149 MortonCopy<false, PixelFormat::RGB32F>,
153 MortonCopy<false, PixelFormat::RGB32F>, 150 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
154 MortonCopy<false, PixelFormat::RGBA8_SRGB>, 151 MortonCopy<false, PixelFormat::RG8U>,
155 MortonCopy<false, PixelFormat::RG8U>, 152 MortonCopy<false, PixelFormat::RG8S>,
156 MortonCopy<false, PixelFormat::RG8S>, 153 MortonCopy<false, PixelFormat::RG32UI>,
157 MortonCopy<false, PixelFormat::RG32UI>, 154 MortonCopy<false, PixelFormat::R32UI>,
158 MortonCopy<false, PixelFormat::R32UI>, 155 nullptr,
159 nullptr, 156 nullptr,
160 nullptr, 157 nullptr,
161 nullptr, 158 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
162 MortonCopy<false, PixelFormat::BGRA8_SRGB>, 159 MortonCopy<false, PixelFormat::DXT1_SRGB>,
163 MortonCopy<false, PixelFormat::DXT1_SRGB>, 160 MortonCopy<false, PixelFormat::DXT23_SRGB>,
164 MortonCopy<false, PixelFormat::DXT23_SRGB>, 161 MortonCopy<false, PixelFormat::DXT45_SRGB>,
165 MortonCopy<false, PixelFormat::DXT45_SRGB>, 162 MortonCopy<false, PixelFormat::BC7U_SRGB>,
166 MortonCopy<false, PixelFormat::BC7U_SRGB>, 163 nullptr,
167 nullptr, 164 nullptr,
168 nullptr, 165 nullptr,
169 nullptr, 166 nullptr,
170 nullptr, 167 nullptr,
171 nullptr, 168 nullptr,
172 nullptr, 169 nullptr,
173 nullptr, 170 nullptr,
174 nullptr, 171 MortonCopy<false, PixelFormat::Z32F>,
175 MortonCopy<false, PixelFormat::Z32F>, 172 MortonCopy<false, PixelFormat::Z16>,
176 MortonCopy<false, PixelFormat::Z16>, 173 MortonCopy<false, PixelFormat::Z24S8>,
177 MortonCopy<false, PixelFormat::Z24S8>, 174 MortonCopy<false, PixelFormat::S8Z24>,
178 MortonCopy<false, PixelFormat::S8Z24>, 175 MortonCopy<false, PixelFormat::Z32FS8>,
179 MortonCopy<false, PixelFormat::Z32FS8>,
180 // clang-format on
181}; 176};
182 177
183static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { 178static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
191 return morton_to_linear_fns[static_cast<std::size_t>(format)]; 186 return morton_to_linear_fns[static_cast<std::size_t>(format)];
192} 187}
193 188
194/// 8x8 Z-Order coordinate from 2D coordinates
195static u32 MortonInterleave(u32 x, u32 y) {
196 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
197 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
198 return xlut[x % 8] + ylut[y % 8];
199}
200
201/// Calculates the offset of the position of the pixel in Morton order
202static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
203 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
204 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
205 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
206 // texels are laid out in a 2x2 subtile like this:
207 // 2 3
208 // 0 1
209 //
210 // The full 8x8 tile has the texels arranged like this:
211 //
212 // 42 43 46 47 58 59 62 63
213 // 40 41 44 45 56 57 60 61
214 // 34 35 38 39 50 51 54 55
215 // 32 33 36 37 48 49 52 53
216 // 10 11 14 15 26 27 30 31
217 // 08 09 12 13 24 25 28 29
218 // 02 03 06 07 18 19 22 23
219 // 00 01 04 05 16 17 20 21
220 //
221 // This pattern is what's called Z-order curve, or Morton order.
222
223 const unsigned int block_height = 8;
224 const unsigned int coarse_x = x & ~7;
225
226 u32 i = MortonInterleave(x, y);
227
228 const unsigned int offset = coarse_x * block_height;
229
230 return (i + offset) * bytes_per_pixel;
231}
232
233static u32 MortonInterleave128(u32 x, u32 y) { 189static u32 MortonInterleave128(u32 x, u32 y) {
234 // 128x128 Z-Order coordinate from 2D coordinates 190 // 128x128 Z-Order coordinate from 2D coordinates
235 static constexpr u32 xlut[] = { 191 static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
325 281
326void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 282void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
327 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 283 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
328 u8* buffer, std::size_t buffer_size, VAddr addr) { 284 u8* buffer, u8* addr) {
329
330 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, 285 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
331 tile_width_spacing, buffer, buffer_size, addr); 286 tile_width_spacing, buffer, addr);
332} 287}
333 288
334void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 289void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
335 u8* morton_data, u8* linear_data, bool morton_to_linear) { 290 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
291 const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
336 u8* data_ptrs[2]; 292 u8* data_ptrs[2];
337 for (u32 y = 0; y < height; ++y) { 293 for (u32 y = 0; y < height; ++y) {
338 for (u32 x = 0; x < width; ++x) { 294 for (u32 x = 0; x < width; ++x) {
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index 065f59ce3..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13 13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, 14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, std::size_t buffer_size, VAddr addr); 16 u8* buffer, u8* addr);
17 17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear); 19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
20 20
21} // namespace VideoCore 21} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..291772186 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <mutex>
7#include <set> 8#include <set>
8#include <unordered_map> 9#include <unordered_map>
9 10
@@ -12,14 +13,26 @@
12 13
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "core/settings.h" 15#include "core/settings.h"
16#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16 18
17class RasterizerCacheObject { 19class RasterizerCacheObject {
18public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr)
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23
19 virtual ~RasterizerCacheObject(); 24 virtual ~RasterizerCacheObject();
20 25
26 CacheAddr GetCacheAddr() const {
27 return cache_addr;
28 }
29
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
21 /// Gets the address of the shader in guest memory, required for cache management 34 /// Gets the address of the shader in guest memory, required for cache management
22 virtual VAddr GetAddr() const = 0; 35 virtual VAddr GetCpuAddr() const = 0;
23 36
24 /// Gets the size of the shader in guest memory, required for cache management 37 /// Gets the size of the shader in guest memory, required for cache management
25 virtual std::size_t GetSizeInBytes() const = 0; 38 virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
58 bool is_registered{}; ///< Whether the object is currently registered with the cache 71 bool is_registered{}; ///< Whether the object is currently registered with the cache
59 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 72 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
60 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 73 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
74 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
75 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
61}; 76};
62 77
63template <class T> 78template <class T>
@@ -68,7 +83,9 @@ public:
68 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 83 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
69 84
70 /// Write any cached resources overlapping the specified region back to memory 85 /// Write any cached resources overlapping the specified region back to memory
71 void FlushRegion(Tegra::GPUVAddr addr, size_t size) { 86 void FlushRegion(CacheAddr addr, std::size_t size) {
87 std::lock_guard lock{mutex};
88
72 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 89 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
73 for (auto& object : objects) { 90 for (auto& object : objects) {
74 FlushObject(object); 91 FlushObject(object);
@@ -76,7 +93,9 @@ public:
76 } 93 }
77 94
78 /// Mark the specified region as being invalidated 95 /// Mark the specified region as being invalidated
79 void InvalidateRegion(VAddr addr, u64 size) { 96 void InvalidateRegion(CacheAddr addr, u64 size) {
97 std::lock_guard lock{mutex};
98
80 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 99 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
81 for (auto& object : objects) { 100 for (auto& object : objects) {
82 if (!object->IsRegistered()) { 101 if (!object->IsRegistered()) {
@@ -89,49 +108,70 @@ public:
89 108
90 /// Invalidates everything in the cache 109 /// Invalidates everything in the cache
91 void InvalidateAll() { 110 void InvalidateAll() {
111 std::lock_guard lock{mutex};
112
92 while (interval_cache.begin() != interval_cache.end()) { 113 while (interval_cache.begin() != interval_cache.end()) {
93 Unregister(*interval_cache.begin()->second.begin()); 114 Unregister(*interval_cache.begin()->second.begin());
94 } 115 }
95 } 116 }
96 117
97protected: 118protected:
98 /// Tries to get an object from the cache with the specified address 119 /// Tries to get an object from the cache with the specified cache address
99 T TryGet(VAddr addr) const { 120 T TryGet(CacheAddr addr) const {
100 const auto iter = map_cache.find(addr); 121 const auto iter = map_cache.find(addr);
101 if (iter != map_cache.end()) 122 if (iter != map_cache.end())
102 return iter->second; 123 return iter->second;
103 return nullptr; 124 return nullptr;
104 } 125 }
105 126
127 T TryGet(const void* addr) const {
128 const auto iter = map_cache.find(ToCacheAddr(addr));
129 if (iter != map_cache.end())
130 return iter->second;
131 return nullptr;
132 }
133
106 /// Register an object into the cache 134 /// Register an object into the cache
107 void Register(const T& object) { 135 virtual void Register(const T& object) {
136 std::lock_guard lock{mutex};
137
108 object->SetIsRegistered(true); 138 object->SetIsRegistered(true);
109 interval_cache.add({GetInterval(object), ObjectSet{object}}); 139 interval_cache.add({GetInterval(object), ObjectSet{object}});
110 map_cache.insert({object->GetAddr(), object}); 140 map_cache.insert({object->GetCacheAddr(), object});
111 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); 141 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
112 } 142 }
113 143
114 /// Unregisters an object from the cache 144 /// Unregisters an object from the cache
115 void Unregister(const T& object) { 145 virtual void Unregister(const T& object) {
116 object->SetIsRegistered(false); 146 std::lock_guard lock{mutex};
117 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
118 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
119 if (Settings::values.use_accurate_gpu_emulation) {
120 FlushObject(object);
121 }
122 147
148 object->SetIsRegistered(false);
149 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
123 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 150 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
124 map_cache.erase(object->GetAddr()); 151 map_cache.erase(object->GetCacheAddr());
125 } 152 }
126 153
127 /// Returns a ticks counter used for tracking when cached objects were last modified 154 /// Returns a ticks counter used for tracking when cached objects were last modified
128 u64 GetModifiedTicks() { 155 u64 GetModifiedTicks() {
156 std::lock_guard lock{mutex};
157
129 return ++modified_ticks; 158 return ++modified_ticks;
130 } 159 }
131 160
161 /// Flushes the specified object, updating appropriate cache state as needed
162 void FlushObject(const T& object) {
163 std::lock_guard lock{mutex};
164
165 if (!object->IsDirty()) {
166 return;
167 }
168 object->Flush();
169 object->MarkAsModified(false, *this);
170 }
171
132private: 172private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 173 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 174 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
135 if (size == 0) { 175 if (size == 0) {
136 return {}; 176 return {};
137 } 177 }
@@ -154,27 +194,19 @@ private:
154 return objects; 194 return objects;
155 } 195 }
156 196
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 197 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 198 using ObjectCache = std::unordered_map<CacheAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 199 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
169 using ObjectInterval = typename IntervalCache::interval_type; 200 using ObjectInterval = typename IntervalCache::interval_type;
170 201
171 static auto GetInterval(const T& object) { 202 static auto GetInterval(const T& object) {
172 return ObjectInterval::right_open(object->GetAddr(), 203 return ObjectInterval::right_open(object->GetCacheAddr(),
173 object->GetAddr() + object->GetSizeInBytes()); 204 object->GetCacheAddr() + object->GetSizeInBytes());
174 } 205 }
175 206
176 ObjectCache map_cache; 207 ObjectCache map_cache;
177 IntervalCache interval_cache; ///< Cache of objects 208 IntervalCache interval_cache; ///< Cache of objects
178 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing 209 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
179 VideoCore::RasterizerInterface& rasterizer; 210 VideoCore::RasterizerInterface& rasterizer;
211 std::recursive_mutex mutex;
180}; 212};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index ff5310848..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,11 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <functional> 8#include <functional>
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
10#include "video_core/gpu.h" 11#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
12 12
13namespace VideoCore { 13namespace VideoCore {
14 14
@@ -34,23 +34,20 @@ public:
34 virtual void FlushAll() = 0; 34 virtual void FlushAll() = 0;
35 35
36 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 36 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
37 virtual void FlushRegion(VAddr addr, u64 size) = 0; 37 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
38 38
39 /// Notify rasterizer that any caches of the specified region should be invalidated 39 /// Notify rasterizer that any caches of the specified region should be invalidated
40 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 40 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
41 41
42 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 42 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
43 /// and invalidated 43 /// and invalidated
44 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 44 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
45 45
46 /// Attempt to use a faster method to perform a surface copy 46 /// Attempt to use a faster method to perform a surface copy
47 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 47 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
48 const Tegra::Engines::Fermi2D::Regs::Surface& dst) { 48 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
49 return false; 49 const Common::Rectangle<u32>& src_rect,
50 } 50 const Common::Rectangle<u32>& dst_rect) {
51
52 /// Attempt to use a faster method to fill a region
53 virtual bool AccelerateFill(const void* config) {
54 return false; 51 return false;
55 } 52 }
56 53
@@ -65,6 +62,10 @@ public:
65 } 62 }
66 63
67 /// Increase/decrease the number of object in pages touching the specified region 64 /// Increase/decrease the number of object in pages touching the specified region
68 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} 65 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
66
67 /// Initialize disk cached resources for the game being emulated
68 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
69 const DiskResourceLoadCallback& callback = {}) {}
69}; 70};
70} // namespace VideoCore 71} // namespace VideoCore
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/emu_window.h" 6#include "core/frontend/emu_window.h"
6#include "core/settings.h" 7#include "core/settings.h"
7#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index bd2b30e77..25652e794 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,29 +7,34 @@
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/memory.h" 10#include "video_core/memory_manager.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 11#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 12#include "video_core/renderer_opengl/gl_rasterizer.h"
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
17 std::size_t alignment, u8* host_ptr)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
19 alignment{alignment} {}
20
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
17 : RasterizerCache{rasterizer}, stream_buffer(size, true) {} 22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
18 23
19GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, 24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
20 std::size_t alignment, bool cache) { 25 bool cache) {
21 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 26 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
22 const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
23 27
24 // Cache management is a big overhead, so only cache entries with a given size. 28 // Cache management is a big overhead, so only cache entries with a given size.
25 // TODO: Figure out which size is the best for given games. 29 // TODO: Figure out which size is the best for given games.
26 cache &= size >= 2048; 30 cache &= size >= 2048;
27 31
32 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
28 if (cache) { 33 if (cache) {
29 auto entry = TryGet(*cpu_addr); 34 auto entry = TryGet(host_ptr);
30 if (entry) { 35 if (entry) {
31 if (entry->size >= size && entry->alignment == alignment) { 36 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
32 return entry->offset; 37 return entry->GetOffset();
33 } 38 }
34 Unregister(entry); 39 Unregister(entry);
35 } 40 }
@@ -38,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
38 AlignBuffer(alignment); 43 AlignBuffer(alignment);
39 const GLintptr uploaded_offset = buffer_offset; 44 const GLintptr uploaded_offset = buffer_offset;
40 45
41 Memory::ReadBlock(*cpu_addr, buffer_ptr, size); 46 if (!host_ptr) {
47 return uploaded_offset;
48 }
42 49
50 std::memcpy(buffer_ptr, host_ptr, size);
43 buffer_ptr += size; 51 buffer_ptr += size;
44 buffer_offset += size; 52 buffer_offset += size;
45 53
46 if (cache) { 54 if (cache) {
47 auto entry = std::make_shared<CachedBufferEntry>(); 55 auto entry = std::make_shared<CachedBufferEntry>(
48 entry->offset = uploaded_offset; 56 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
49 entry->size = size;
50 entry->alignment = alignment;
51 entry->addr = *cpu_addr;
52 Register(entry); 57 Register(entry);
53 } 58 }
54 59
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
17 17
18class RasterizerOpenGL; 18class RasterizerOpenGL;
19 19
20struct CachedBufferEntry final : public RasterizerCacheObject { 20class CachedBufferEntry final : public RasterizerCacheObject {
21 VAddr GetAddr() const override { 21public:
22 return addr; 22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
23 } 27 }
24 28
25 std::size_t GetSizeInBytes() const override { 29 std::size_t GetSizeInBytes() const override {
26 return size; 30 return size;
27 } 31 }
28 32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
29 // We do not have to flush this cache as things in it are never modified by us. 45 // We do not have to flush this cache as things in it are never modified by us.
30 void Flush() override {} 46 void Flush() override {}
31 47
32 VAddr addr; 48private:
33 std::size_t size; 49 VAddr cpu_addr{};
34 GLintptr offset; 50 std::size_t size{};
35 std::size_t alignment; 51 GLintptr offset{};
52 std::size_t alignment{};
36}; 53};
37 54
38class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 55class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
41 58
42 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 59 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
43 /// allocated. 60 /// allocated.
44 GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 61 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
45 bool cache = true); 62 bool cache = true);
46 63
47 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 64 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7992b82c4..8d9ee81f1 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,21 +4,92 @@
4 4
5#include <glad/glad.h> 5#include <glad/glad.h>
6 6
7#include "common/logging/log.h"
8#include "core/core.h"
9#include "video_core/memory_manager.h"
7#include "video_core/renderer_opengl/gl_global_cache.h" 10#include "video_core/renderer_opengl/gl_global_cache.h"
8#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
9#include "video_core/renderer_opengl/utils.h" 13#include "video_core/renderer_opengl/utils.h"
10 14
11namespace OpenGL { 15namespace OpenGL {
12 16
13CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { 17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
14 buffer.Create(); 19 buffer.Create();
15 // Bind and unbind the buffer so it gets allocated by the driver 20 // Bind and unbind the buffer so it gets allocated by the driver
16 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
17 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
18 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 23 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
24}
25
26void CachedGlobalRegion::Reload(u32 size_) {
27 constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
28
29 size = size_;
30 if (size > max_size) {
31 size = max_size;
32 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
33 max_size);
34 }
35
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
50 u8* host_ptr) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) {
53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
56 region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
57 ReserveGlobalRegion(region);
58 }
59 region->Reload(size);
60 return region;
61}
62
63void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
64 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
19} 65}
20 66
21GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 67GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
22 : RasterizerCache{rasterizer} {} 68 : RasterizerCache{rasterizer} {}
23 69
70GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
71 const GLShader::GlobalMemoryEntry& global_region,
72 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
73
74 auto& gpu{Core::System::GetInstance().GPU()};
75 auto& memory_manager{gpu.MemoryManager()};
76 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
77 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
78 global_region.GetCbufOffset()};
79 const auto actual_addr{memory_manager.Read<u64>(addr)};
80 const auto size{memory_manager.Read<u32>(addr + 8)};
81
82 // Look up global region in the cache based on address
83 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
84 GlobalRegion region{TryGet(host_ptr)};
85
86 if (!region) {
87 // No global region found - create a new one
88 region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
89 Register(region);
90 }
91
92 return region;
93}
94
24} // namespace OpenGL 95} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 406a735bc..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,9 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <unordered_map>
9
8#include <glad/glad.h> 10#include <glad/glad.h>
9 11
12#include "common/assert.h"
10#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
11#include "video_core/rasterizer_cache.h" 15#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
13 17
@@ -23,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
23 27
24class CachedGlobalRegion final : public RasterizerCacheObject { 28class CachedGlobalRegion final : public RasterizerCacheObject {
25public: 29public:
26 explicit CachedGlobalRegion(VAddr addr, u32 size); 30 explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
27 31
28 /// Gets the address of the shader in guest memory, required for cache management 32 VAddr GetCpuAddr() const override {
29 VAddr GetAddr() const { 33 return cpu_addr;
30 return addr;
31 } 34 }
32 35
33 /// Gets the size of the shader in guest memory, required for cache management 36 std::size_t GetSizeInBytes() const override {
34 std::size_t GetSizeInBytes() const {
35 return size; 37 return size;
36 } 38 }
37 39
@@ -40,21 +42,34 @@ public:
40 return buffer.handle; 42 return buffer.handle;
41 } 43 }
42 44
45 /// Reloads the global region from guest memory
46 void Reload(u32 size_);
47
43 // TODO(Rodrigo): When global memory is written (STG), implement flushing 48 // TODO(Rodrigo): When global memory is written (STG), implement flushing
44 void Flush() override { 49 void Flush() override {
45 UNIMPLEMENTED(); 50 UNIMPLEMENTED();
46 } 51 }
47 52
48private: 53private:
49 VAddr addr{}; 54 VAddr cpu_addr{};
50 u32 size{}; 55 u32 size{};
51
52 OGLBuffer buffer; 56 OGLBuffer buffer;
53}; 57};
54 58
55class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { 59class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
56public: 60public:
57 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); 61 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
62
63 /// Gets the current specified shader stage program
64 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
65 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
66
67private:
68 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
69 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
70 void ReserveGlobalRegion(GlobalRegion region);
71
72 std::unordered_map<CacheAddr, GlobalRegion> reserve;
58}; 73};
59 74
60} // namespace OpenGL 75} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index d9ed08437..c3e94d917 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -7,7 +7,7 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/memory.h" 10#include "video_core/memory_manager.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 11#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_primitive_assembler.h" 12#include "video_core/renderer_opengl/gl_primitive_assembler.h"
13 13
@@ -40,14 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
40 return index_offset; 40 return index_offset;
41} 41}
42 42
43GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, 43GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
44 u32 count) {
45 const std::size_t map_size{CalculateQuadSize(count)}; 44 const std::size_t map_size{CalculateQuadSize(count)};
46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); 45 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
47 46
48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 47 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
49 const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 48 const u8* source{memory_manager.GetPointer(gpu_addr)};
50 const u8* source{Memory::GetPointer(*cpu_addr)};
51 49
52 for (u32 primitive = 0; primitive < count / 4; ++primitive) { 50 for (u32 primitive = 0; primitive < count / 4; ++primitive) {
53 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { 51 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -62,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
62 return index_offset; 60 return index_offset;
63} 61}
64 62
65} // namespace OpenGL \ No newline at end of file 63} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..4e87ce4d6 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -4,11 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8#include <glad/glad.h> 7#include <glad/glad.h>
9 8
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "video_core/memory_manager.h"
12 10
13namespace OpenGL { 11namespace OpenGL {
14 12
@@ -24,7 +22,7 @@ public:
24 22
25 GLintptr MakeQuadArray(u32 first, u32 count); 23 GLintptr MakeQuadArray(u32 first, u32 count);
26 24
27 GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); 25 GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
28 26
29private: 27private:
30 OGLBufferCache& buffer_cache; 28 OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2bf086902..d250d5cbb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -17,15 +17,14 @@
17#include "common/microprofile.h" 17#include "common/microprofile.h"
18#include "common/scope_exit.h" 18#include "common/scope_exit.h"
19#include "core/core.h" 19#include "core/core.h"
20#include "core/frontend/emu_window.h"
21#include "core/hle/kernel/process.h" 20#include "core/hle/kernel/process.h"
22#include "core/settings.h" 21#include "core/settings.h"
23#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 25#include "video_core/renderer_opengl/gl_shader_gen.h"
26#include "video_core/renderer_opengl/maxwell_to_gl.h" 26#include "video_core/renderer_opengl/maxwell_to_gl.h"
27#include "video_core/renderer_opengl/renderer_opengl.h" 27#include "video_core/renderer_opengl/renderer_opengl.h"
28#include "video_core/video_core.h"
29 28
30namespace OpenGL { 29namespace OpenGL {
31 30
@@ -99,9 +98,9 @@ struct FramebufferCacheKey {
99 } 98 }
100}; 99};
101 100
102RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) 101RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
103 : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info}, 102 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
104 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 103 screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
105 // Create sampler objects 104 // Create sampler objects
106 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 105 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
107 texture_samplers[i].Create(); 106 texture_samplers[i].Create();
@@ -116,7 +115,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
116 115
117 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 116 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
118 117
119 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 118 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
120 CheckExtensions(); 119 CheckExtensions();
121} 120}
122 121
@@ -136,7 +135,7 @@ void RasterizerOpenGL::CheckExtensions() {
136} 135}
137 136
138GLuint RasterizerOpenGL::SetupVertexFormat() { 137GLuint RasterizerOpenGL::SetupVertexFormat() {
139 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 138 auto& gpu = system.GPU().Maxwell3D();
140 const auto& regs = gpu.regs; 139 const auto& regs = gpu.regs;
141 140
142 if (!gpu.dirty_flags.vertex_attrib_format) { 141 if (!gpu.dirty_flags.vertex_attrib_format) {
@@ -175,7 +174,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
175 continue; 174 continue;
176 175
177 const auto& buffer = regs.vertex_array[attrib.buffer]; 176 const auto& buffer = regs.vertex_array[attrib.buffer];
178 LOG_TRACE(HW_GPU, 177 LOG_TRACE(Render_OpenGL,
179 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 178 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
180 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 179 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
181 attrib.offset.Value(), attrib.IsNormalized()); 180 attrib.offset.Value(), attrib.IsNormalized());
@@ -198,32 +197,32 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
198 } 197 }
199 198
200 // Rebinding the VAO invalidates the vertex buffer bindings. 199 // Rebinding the VAO invalidates the vertex buffer bindings.
201 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 200 gpu.dirty_flags.vertex_array.set();
202 201
203 state.draw.vertex_array = vao_entry.handle; 202 state.draw.vertex_array = vao_entry.handle;
204 return vao_entry.handle; 203 return vao_entry.handle;
205} 204}
206 205
207void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 206void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
208 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 207 auto& gpu = system.GPU().Maxwell3D();
209 const auto& regs = gpu.regs; 208 const auto& regs = gpu.regs;
210 209
211 if (!gpu.dirty_flags.vertex_array) 210 if (gpu.dirty_flags.vertex_array.none())
212 return; 211 return;
213 212
214 MICROPROFILE_SCOPE(OpenGL_VB); 213 MICROPROFILE_SCOPE(OpenGL_VB);
215 214
216 // Upload all guest vertex arrays sequentially to our buffer 215 // Upload all guest vertex arrays sequentially to our buffer
217 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 216 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
218 if (~gpu.dirty_flags.vertex_array & (1u << index)) 217 if (!gpu.dirty_flags.vertex_array[index])
219 continue; 218 continue;
220 219
221 const auto& vertex_array = regs.vertex_array[index]; 220 const auto& vertex_array = regs.vertex_array[index];
222 if (!vertex_array.IsEnabled()) 221 if (!vertex_array.IsEnabled())
223 continue; 222 continue;
224 223
225 const Tegra::GPUVAddr start = vertex_array.StartAddress(); 224 const GPUVAddr start = vertex_array.StartAddress();
226 const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 225 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
227 226
228 ASSERT(end > start); 227 ASSERT(end > start);
229 const u64 size = end - start + 1; 228 const u64 size = end - start + 1;
@@ -242,11 +241,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
242 } 241 }
243 } 242 }
244 243
245 gpu.dirty_flags.vertex_array = 0; 244 gpu.dirty_flags.vertex_array.reset();
246} 245}
247 246
248DrawParameters RasterizerOpenGL::SetupDraw() { 247DrawParameters RasterizerOpenGL::SetupDraw() {
249 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 248 const auto& gpu = system.GPU().Maxwell3D();
250 const auto& regs = gpu.regs; 249 const auto& regs = gpu.regs;
251 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 250 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
252 251
@@ -295,14 +294,15 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
295 294
296void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 295void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
297 MICROPROFILE_SCOPE(OpenGL_Shader); 296 MICROPROFILE_SCOPE(OpenGL_Shader);
298 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 297 auto& gpu = system.GPU().Maxwell3D();
299 298
300 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL 299 BaseBindings base_bindings;
301 // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
302 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
303 u32 current_texture_bindpoint = 0;
304 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 300 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
305 301
302 // Prepare packed bindings
303 bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
304 bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
305
306 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 306 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
307 const auto& shader_config = gpu.regs.shader_config[index]; 307 const auto& shader_config = gpu.regs.shader_config[index];
308 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 308 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -320,47 +320,38 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
320 const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 320 const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
321 321
322 GLShader::MaxwellUniformData ubo{}; 322 GLShader::MaxwellUniformData ubo{};
323 ubo.SetFromRegs(gpu.state.shader_stages[stage]); 323 ubo.SetFromRegs(gpu, stage);
324 const GLintptr offset = buffer_cache.UploadHostMemory( 324 const GLintptr offset = buffer_cache.UploadHostMemory(
325 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); 325 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
326 326
327 // Bind the buffer 327 // Bind the emulation info buffer
328 glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(), 328 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
329 offset, static_cast<GLsizeiptr>(sizeof(ubo))); 329 static_cast<GLsizeiptr>(sizeof(ubo)));
330 330
331 Shader shader{shader_cache.GetStageProgram(program)}; 331 Shader shader{shader_cache.GetStageProgram(program)};
332 const auto [program_handle, next_bindings] =
333 shader->GetProgramHandle(primitive_mode, base_bindings);
332 334
333 switch (program) { 335 switch (program) {
334 case Maxwell::ShaderProgram::VertexA: 336 case Maxwell::ShaderProgram::VertexA:
335 case Maxwell::ShaderProgram::VertexB: { 337 case Maxwell::ShaderProgram::VertexB:
336 shader_program_manager->UseProgrammableVertexShader( 338 shader_program_manager->UseProgrammableVertexShader(program_handle);
337 shader->GetProgramHandle(primitive_mode));
338 break; 339 break;
339 } 340 case Maxwell::ShaderProgram::Geometry:
340 case Maxwell::ShaderProgram::Geometry: { 341 shader_program_manager->UseProgrammableGeometryShader(program_handle);
341 shader_program_manager->UseProgrammableGeometryShader(
342 shader->GetProgramHandle(primitive_mode));
343 break; 342 break;
344 } 343 case Maxwell::ShaderProgram::Fragment:
345 case Maxwell::ShaderProgram::Fragment: { 344 shader_program_manager->UseProgrammableFragmentShader(program_handle);
346 shader_program_manager->UseProgrammableFragmentShader(
347 shader->GetProgramHandle(primitive_mode));
348 break; 345 break;
349 }
350 default: 346 default:
351 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 347 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
352 shader_config.enable.Value(), shader_config.offset); 348 shader_config.enable.Value(), shader_config.offset);
353 UNREACHABLE();
354 } 349 }
355 350
356 // Configure the const buffers for this shader stage. 351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
357 current_constbuffer_bindpoint = 352 SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
358 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, 353 SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
359 current_constbuffer_bindpoint); 354 SetupTextures(stage_enum, shader, program_handle, base_bindings);
360
361 // Configure the textures for this shader stage.
362 current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
363 primitive_mode, current_texture_bindpoint);
364 355
365 // Workaround for Intel drivers. 356 // Workaround for Intel drivers.
366 // When a clip distance is enabled but not set in the shader it crops parts of the screen 357 // When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -375,8 +366,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
375 // VertexB was combined with VertexA, so we skip the VertexB iteration 366 // VertexB was combined with VertexA, so we skip the VertexB iteration
376 index++; 367 index++;
377 } 368 }
369
370 base_bindings = next_bindings;
378 } 371 }
379 372
373 bind_ubo_pushbuffer.Bind();
374 bind_ssbo_pushbuffer.Bind();
375
380 SyncClipEnabled(clip_distances); 376 SyncClipEnabled(clip_distances);
381 377
382 gpu.dirty_flags.shaders = false; 378 gpu.dirty_flags.shaders = false;
@@ -421,15 +417,15 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
421} 417}
422 418
423std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 419std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
424 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 420 const auto& regs = system.GPU().Maxwell3D().regs;
425 421
426 std::size_t size = 0; 422 std::size_t size = 0;
427 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 423 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
428 if (!regs.vertex_array[index].IsEnabled()) 424 if (!regs.vertex_array[index].IsEnabled())
429 continue; 425 continue;
430 426
431 const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); 427 const GPUVAddr start = regs.vertex_array[index].StartAddress();
432 const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 428 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
433 429
434 ASSERT(end > start); 430 ASSERT(end > start);
435 size += end - start + 1; 431 size += end - start + 1;
@@ -439,7 +435,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
439} 435}
440 436
441std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { 437std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
442 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 438 const auto& regs = system.GPU().Maxwell3D().regs;
443 439
444 return static_cast<std::size_t>(regs.index_array.count) * 440 return static_cast<std::size_t>(regs.index_array.count) *
445 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); 441 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
@@ -486,21 +482,26 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
486 cached_pages.add({pages_interval, delta}); 482 cached_pages.add({pages_interval, delta});
487} 483}
488 484
489void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb, 485void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
490 bool using_depth_fb, bool preserve_contents, 486 const VideoCore::DiskResourceLoadCallback& callback) {
491 std::optional<std::size_t> single_color_target) { 487 shader_cache.LoadDiskCache(stop_loading, callback);
488}
489
490std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
491 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
492 std::optional<std::size_t> single_color_target) {
492 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 493 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
493 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 494 auto& gpu = system.GPU().Maxwell3D();
494 const auto& regs = gpu.regs; 495 const auto& regs = gpu.regs;
495 496
496 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 497 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
497 single_color_target}; 498 single_color_target};
498 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 499 if (fb_config_state == current_framebuffer_config_state &&
499 !gpu.dirty_flags.zeta_buffer) { 500 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
500 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 501 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
501 // single color targets). This is done because the guest registers may not change but the 502 // single color targets). This is done because the guest registers may not change but the
502 // host framebuffer may contain different attachments 503 // host framebuffer may contain different attachments
503 return; 504 return current_depth_stencil_usage;
504 } 505 }
505 current_framebuffer_config_state = fb_config_state; 506 current_framebuffer_config_state = fb_config_state;
506 507
@@ -509,10 +510,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
509 depth_surface = res_cache.GetDepthBufferSurface(preserve_contents); 510 depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
510 } 511 }
511 512
512 // TODO(bunnei): Figure out how the below register works. According to envytools, this should be 513 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
513 // used to enable multiple render targets. However, it is left unset on all games that I have
514 // tested.
515 UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
516 514
517 // Bind the framebuffer surfaces 515 // Bind the framebuffer surfaces
518 current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; 516 current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
@@ -573,19 +571,21 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
573 depth_surface->MarkAsModified(true, res_cache); 571 depth_surface->MarkAsModified(true, res_cache);
574 572
575 fbkey.zeta = depth_surface->Texture().handle; 573 fbkey.zeta = depth_surface->Texture().handle;
576 fbkey.stencil_enable = regs.stencil_enable; 574 fbkey.stencil_enable = regs.stencil_enable &&
575 depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
577 } 576 }
578 577
579 SetupCachedFramebuffer(fbkey, current_state); 578 SetupCachedFramebuffer(fbkey, current_state);
580
581 SyncViewport(current_state); 579 SyncViewport(current_state);
580
581 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
582} 582}
583 583
584void RasterizerOpenGL::Clear() { 584void RasterizerOpenGL::Clear() {
585 const auto prev_state{state}; 585 const auto prev_state{state};
586 SCOPE_EXIT({ prev_state.Apply(); }); 586 SCOPE_EXIT({ prev_state.Apply(); });
587 587
588 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 588 const auto& regs = system.GPU().Maxwell3D().regs;
589 bool use_color{}; 589 bool use_color{};
590 bool use_depth{}; 590 bool use_depth{};
591 bool use_stencil{}; 591 bool use_stencil{};
@@ -646,10 +646,8 @@ void RasterizerOpenGL::Clear() {
646 return; 646 return;
647 } 647 }
648 648
649 ScopeAcquireGLContext acquire_context{emu_window}; 649 const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
650 650 clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
651 ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
652 regs.clear_buffers.RT.Value());
653 if (regs.clear_flags.scissor) { 651 if (regs.clear_flags.scissor) {
654 SyncScissorTest(clear_state); 652 SyncScissorTest(clear_state);
655 } 653 }
@@ -664,11 +662,11 @@ void RasterizerOpenGL::Clear() {
664 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 662 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
665 } 663 }
666 664
667 if (use_depth && use_stencil) { 665 if (clear_depth && clear_stencil) {
668 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 666 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
669 } else if (use_depth) { 667 } else if (clear_depth) {
670 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth); 668 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
671 } else if (use_stencil) { 669 } else if (clear_stencil) {
672 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 670 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
673 } 671 }
674} 672}
@@ -678,11 +676,9 @@ void RasterizerOpenGL::DrawArrays() {
678 return; 676 return;
679 677
680 MICROPROFILE_SCOPE(OpenGL_Drawing); 678 MICROPROFILE_SCOPE(OpenGL_Drawing);
681 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 679 auto& gpu = system.GPU().Maxwell3D();
682 const auto& regs = gpu.regs; 680 const auto& regs = gpu.regs;
683 681
684 ScopeAcquireGLContext acquire_context{emu_window};
685
686 ConfigureFramebuffers(state); 682 ConfigureFramebuffers(state);
687 SyncColorMask(); 683 SyncColorMask();
688 SyncFragmentColorClampState(); 684 SyncFragmentColorClampState();
@@ -728,10 +724,10 @@ void RasterizerOpenGL::DrawArrays() {
728 // Add space for at least 18 constant buffers 724 // Add space for at least 18 constant buffers
729 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 725 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
730 726
731 bool invalidate = buffer_cache.Map(buffer_size); 727 const bool invalidate = buffer_cache.Map(buffer_size);
732 if (invalidate) { 728 if (invalidate) {
733 // As all cached buffers are invalidated, we need to recheck their state. 729 // As all cached buffers are invalidated, we need to recheck their state.
734 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 730 gpu.dirty_flags.vertex_array.set();
735 } 731 }
736 732
737 const GLuint vao = SetupVertexFormat(); 733 const GLuint vao = SetupVertexFormat();
@@ -745,60 +741,45 @@ void RasterizerOpenGL::DrawArrays() {
745 shader_program_manager->ApplyTo(state); 741 shader_program_manager->ApplyTo(state);
746 state.Apply(); 742 state.Apply();
747 743
748 // Execute draw call 744 res_cache.SignalPreDrawCall();
749 params.DispatchDraw(); 745 params.DispatchDraw();
750 746 res_cache.SignalPostDrawCall();
751 // Disable scissor test
752 state.viewports[0].scissor.enabled = false;
753 747
754 accelerate_draw = AccelDraw::Disabled; 748 accelerate_draw = AccelDraw::Disabled;
755
756 // Unbind textures for potential future use as framebuffer attachments
757 for (auto& texture_unit : state.texture_units) {
758 texture_unit.Unbind();
759 }
760 state.Apply();
761} 749}
762 750
763void RasterizerOpenGL::FlushAll() {} 751void RasterizerOpenGL::FlushAll() {}
764 752
765void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 753void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
766 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 754 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
767 755 if (!addr || !size) {
768 if (Settings::values.use_accurate_gpu_emulation) { 756 return;
769 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
770 res_cache.FlushRegion(addr, size);
771 } 757 }
758 res_cache.FlushRegion(addr, size);
772} 759}
773 760
774void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 761void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
775 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 762 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
763 if (!addr || !size) {
764 return;
765 }
776 res_cache.InvalidateRegion(addr, size); 766 res_cache.InvalidateRegion(addr, size);
777 shader_cache.InvalidateRegion(addr, size); 767 shader_cache.InvalidateRegion(addr, size);
778 global_cache.InvalidateRegion(addr, size); 768 global_cache.InvalidateRegion(addr, size);
779 buffer_cache.InvalidateRegion(addr, size); 769 buffer_cache.InvalidateRegion(addr, size);
780} 770}
781 771
782void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 772void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
783 FlushRegion(addr, size); 773 FlushRegion(addr, size);
784 InvalidateRegion(addr, size); 774 InvalidateRegion(addr, size);
785} 775}
786 776
787bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 777bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
788 const Tegra::Engines::Fermi2D::Regs::Surface& dst) { 778 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
779 const Common::Rectangle<u32>& src_rect,
780 const Common::Rectangle<u32>& dst_rect) {
789 MICROPROFILE_SCOPE(OpenGL_Blits); 781 MICROPROFILE_SCOPE(OpenGL_Blits);
790 782 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
791 if (Settings::values.use_accurate_gpu_emulation) {
792 // Skip the accelerated copy and perform a slow but more accurate copy
793 return false;
794 }
795
796 res_cache.FermiCopySurface(src, dst);
797 return true;
798}
799
800bool RasterizerOpenGL::AccelerateFill(const void* config) {
801 UNREACHABLE();
802 return true; 783 return true;
803} 784}
804 785
@@ -810,7 +791,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
810 791
811 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 792 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
812 793
813 const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; 794 const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
814 if (!surface) { 795 if (!surface) {
815 return {}; 796 return {};
816 } 797 }
@@ -821,7 +802,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
821 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 802 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
822 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 803 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
823 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 804 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
824 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 805
806 if (params.pixel_format != pixel_format) {
807 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
808 }
825 809
826 screen_info.display_texture = surface->Texture().handle; 810 screen_info.display_texture = surface->Texture().handle;
827 811
@@ -830,121 +814,98 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
830 814
831void RasterizerOpenGL::SamplerInfo::Create() { 815void RasterizerOpenGL::SamplerInfo::Create() {
832 sampler.Create(); 816 sampler.Create();
833 mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; 817 mag_filter = Tegra::Texture::TextureFilter::Linear;
834 wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; 818 min_filter = Tegra::Texture::TextureFilter::Linear;
835 uses_depth_compare = false; 819 wrap_u = Tegra::Texture::WrapMode::Wrap;
820 wrap_v = Tegra::Texture::WrapMode::Wrap;
821 wrap_p = Tegra::Texture::WrapMode::Wrap;
822 use_depth_compare = false;
836 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; 823 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
837 824
838 // default is GL_LINEAR_MIPMAP_LINEAR 825 // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
839 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 826 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
840 // Other attributes have correct defaults
841 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); 827 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
828
829 // Other attributes have correct defaults
842} 830}
843 831
844void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { 832void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
845 const GLuint s = sampler.handle; 833 const GLuint sampler_id = sampler.handle;
846 if (mag_filter != config.mag_filter) { 834 if (mag_filter != config.mag_filter) {
847 mag_filter = config.mag_filter; 835 mag_filter = config.mag_filter;
848 glSamplerParameteri( 836 glSamplerParameteri(
849 s, GL_TEXTURE_MAG_FILTER, 837 sampler_id, GL_TEXTURE_MAG_FILTER,
850 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); 838 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
851 } 839 }
852 if (min_filter != config.min_filter || mip_filter != config.mip_filter) { 840 if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
853 min_filter = config.min_filter; 841 min_filter = config.min_filter;
854 mip_filter = config.mip_filter; 842 mipmap_filter = config.mipmap_filter;
855 glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, 843 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
856 MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); 844 MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
857 } 845 }
858 846
859 if (wrap_u != config.wrap_u) { 847 if (wrap_u != config.wrap_u) {
860 wrap_u = config.wrap_u; 848 wrap_u = config.wrap_u;
861 glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); 849 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
862 } 850 }
863 if (wrap_v != config.wrap_v) { 851 if (wrap_v != config.wrap_v) {
864 wrap_v = config.wrap_v; 852 wrap_v = config.wrap_v;
865 glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); 853 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
866 } 854 }
867 if (wrap_p != config.wrap_p) { 855 if (wrap_p != config.wrap_p) {
868 wrap_p = config.wrap_p; 856 wrap_p = config.wrap_p;
869 glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); 857 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
870 } 858 }
871 859
872 if (uses_depth_compare != (config.depth_compare_enabled == 1)) { 860 if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
873 uses_depth_compare = (config.depth_compare_enabled == 1); 861 use_depth_compare = enabled;
874 if (uses_depth_compare) { 862 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
875 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); 863 use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
876 } else {
877 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
878 }
879 } 864 }
880 865
881 if (depth_compare_func != config.depth_compare_func) { 866 if (depth_compare_func != config.depth_compare_func) {
882 depth_compare_func = config.depth_compare_func; 867 depth_compare_func = config.depth_compare_func;
883 glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, 868 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
884 MaxwellToGL::DepthCompareFunc(depth_compare_func)); 869 MaxwellToGL::DepthCompareFunc(depth_compare_func));
885 } 870 }
886 871
887 GLvec4 new_border_color; 872 if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
888 if (config.srgb_conversion) {
889 new_border_color[0] = config.srgb_border_color_r / 255.0f;
890 new_border_color[1] = config.srgb_border_color_g / 255.0f;
891 new_border_color[2] = config.srgb_border_color_g / 255.0f;
892 } else {
893 new_border_color[0] = config.border_color_r;
894 new_border_color[1] = config.border_color_g;
895 new_border_color[2] = config.border_color_b;
896 }
897 new_border_color[3] = config.border_color_a;
898
899 if (border_color != new_border_color) {
900 border_color = new_border_color; 873 border_color = new_border_color;
901 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); 874 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
902 } 875 }
903 876
904 const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); 877 if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
905 if (anisotropic_max != max_anisotropic) { 878 max_anisotropic = anisotropic;
906 max_anisotropic = anisotropic_max;
907 if (GLAD_GL_ARB_texture_filter_anisotropic) { 879 if (GLAD_GL_ARB_texture_filter_anisotropic) {
908 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); 880 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
909 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 881 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
910 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); 882 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
911 } 883 }
912 } 884 }
913 const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
914 if (lod_min != min_lod) {
915 min_lod = lod_min;
916 glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
917 }
918 885
919 const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; 886 if (const float min = config.GetMinLod(); min_lod != min) {
920 if (lod_max != max_lod) { 887 min_lod = min;
921 max_lod = lod_max; 888 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
922 glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod); 889 }
890 if (const float max = config.GetMaxLod(); max_lod != max) {
891 max_lod = max;
892 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
923 } 893 }
924 const u32 bias = config.mip_lod_bias.Value(); 894
925 // Sign extend the 13-bit value. 895 if (const float bias = config.GetLodBias(); lod_bias != bias) {
926 constexpr u32 mask = 1U << (13 - 1); 896 lod_bias = bias;
927 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; 897 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
928 if (lod_bias != bias_lod) {
929 lod_bias = bias_lod;
930 glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias);
931 } 898 }
932} 899}
933 900
934u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, 901void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
935 GLenum primitive_mode, u32 current_bindpoint) { 902 const Shader& shader, GLuint program_handle,
903 BaseBindings base_bindings) {
936 MICROPROFILE_SCOPE(OpenGL_UBO); 904 MICROPROFILE_SCOPE(OpenGL_UBO);
937 const auto& gpu = Core::System::GetInstance().GPU(); 905 const auto& gpu = system.GPU();
938 const auto& maxwell3d = gpu.Maxwell3D(); 906 const auto& maxwell3d = gpu.Maxwell3D();
939 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; 907 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
940 const auto& entries = shader->GetShaderEntries().const_buffer_entries; 908 const auto& entries = shader->GetShaderEntries().const_buffers;
941
942 constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
943 std::array<GLuint, max_binds> bind_buffers;
944 std::array<GLintptr, max_binds> bind_offsets;
945 std::array<GLsizeiptr, max_binds> bind_sizes;
946
947 ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
948 909
949 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 910 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
950 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 911 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
@@ -952,10 +913,8 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
952 const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; 913 const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
953 914
954 if (!buffer.enabled) { 915 if (!buffer.enabled) {
955 // With disabled buffers set values as zero to unbind them 916 // Set values to zero to unbind buffers
956 bind_buffers[bindpoint] = 0; 917 bind_ubo_pushbuffer.Push(0, 0, 0);
957 bind_offsets[bindpoint] = 0;
958 bind_sizes[bindpoint] = 0;
959 continue; 918 continue;
960 } 919 }
961 920
@@ -966,13 +925,13 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
966 size = buffer.size; 925 size = buffer.size;
967 926
968 if (size > MaxConstbufferSize) { 927 if (size > MaxConstbufferSize) {
969 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 928 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
970 MaxConstbufferSize); 929 MaxConstbufferSize);
971 size = MaxConstbufferSize; 930 size = MaxConstbufferSize;
972 } 931 }
973 } else { 932 } else {
974 // Buffer is accessed directly, upload just what we use 933 // Buffer is accessed directly, upload just what we use
975 size = used_buffer.GetSize() * sizeof(float); 934 size = used_buffer.GetSize();
976 } 935 }
977 936
978 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 937 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
@@ -980,79 +939,56 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
980 size = Common::AlignUp(size, sizeof(GLvec4)); 939 size = Common::AlignUp(size, sizeof(GLvec4));
981 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); 940 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
982 941
983 GLintptr const_buffer_offset = buffer_cache.UploadMemory( 942 const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
984 buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); 943 buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
985 944
986 // Now configure the bindpoint of the buffer inside the shader 945 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
987 glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
988 shader->GetProgramResourceIndex(used_buffer),
989 current_bindpoint + bindpoint);
990
991 // Prepare values for multibind
992 bind_buffers[bindpoint] = buffer_cache.GetHandle();
993 bind_offsets[bindpoint] = const_buffer_offset;
994 bind_sizes[bindpoint] = size;
995 } 946 }
947}
996 948
997 glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()), 949void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
998 bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); 950 const Shader& shader, GLenum primitive_mode,
999 951 BaseBindings base_bindings) {
1000 return current_bindpoint + static_cast<u32>(entries.size()); 952 const auto& entries = shader->GetShaderEntries().global_memory_entries;
953 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
954 const auto& entry{entries[bindpoint]};
955 const auto& region{global_cache.GetGlobalRegion(entry, stage)};
956 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
957 static_cast<GLsizeiptr>(region->GetSizeInBytes()));
958 }
1001} 959}
1002 960
1003u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, 961void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
1004 GLenum primitive_mode, u32 current_unit) { 962 GLuint program_handle, BaseBindings base_bindings) {
1005 MICROPROFILE_SCOPE(OpenGL_Texture); 963 MICROPROFILE_SCOPE(OpenGL_Texture);
1006 const auto& gpu = Core::System::GetInstance().GPU(); 964 const auto& gpu = system.GPU();
1007 const auto& maxwell3d = gpu.Maxwell3D(); 965 const auto& maxwell3d = gpu.Maxwell3D();
1008 const auto& entries = shader->GetShaderEntries().texture_samplers; 966 const auto& entries = shader->GetShaderEntries().samplers;
1009 967
1010 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), 968 ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
1011 "Exceeded the number of active textures."); 969 "Exceeded the number of active textures.");
1012 970
1013 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 971 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
1014 const auto& entry = entries[bindpoint]; 972 const auto& entry = entries[bindpoint];
1015 const u32 current_bindpoint = current_unit + bindpoint; 973 const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
1016 974 const u32 current_bindpoint = base_bindings.sampler + bindpoint;
1017 // Bind the uniform to the sampler.
1018
1019 glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
1020 shader->GetUniformLocation(entry), current_bindpoint);
1021
1022 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
1023
1024 if (!texture.enabled) {
1025 state.texture_units[current_bindpoint].texture = 0;
1026 continue;
1027 }
1028 975
1029 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 976 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1030 Surface surface = res_cache.GetTextureSurface(texture, entry); 977
1031 if (surface != nullptr) { 978 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1032 const GLuint handle = 979 state.texture_units[current_bindpoint].texture =
1033 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 980 surface->Texture(entry.IsArray()).handle;
1034 const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); 981 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1035 state.texture_units[current_bindpoint].texture = handle; 982 texture.tic.w_source);
1036 state.texture_units[current_bindpoint].target = target;
1037 state.texture_units[current_bindpoint].swizzle.r =
1038 MaxwellToGL::SwizzleSource(texture.tic.x_source);
1039 state.texture_units[current_bindpoint].swizzle.g =
1040 MaxwellToGL::SwizzleSource(texture.tic.y_source);
1041 state.texture_units[current_bindpoint].swizzle.b =
1042 MaxwellToGL::SwizzleSource(texture.tic.z_source);
1043 state.texture_units[current_bindpoint].swizzle.a =
1044 MaxwellToGL::SwizzleSource(texture.tic.w_source);
1045 } else { 983 } else {
1046 // Can occur when texture addr is null or its memory is unmapped/invalid 984 // Can occur when texture addr is null or its memory is unmapped/invalid
1047 state.texture_units[current_bindpoint].texture = 0; 985 state.texture_units[current_bindpoint].texture = 0;
1048 } 986 }
1049 } 987 }
1050
1051 return current_unit + static_cast<u32>(entries.size());
1052} 988}
1053 989
1054void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 990void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1055 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 991 const auto& regs = system.GPU().Maxwell3D().regs;
1056 const bool geometry_shaders_enabled = 992 const bool geometry_shaders_enabled =
1057 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 993 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1058 const std::size_t viewport_count = 994 const std::size_t viewport_count =
@@ -1060,7 +996,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1060 for (std::size_t i = 0; i < viewport_count; i++) { 996 for (std::size_t i = 0; i < viewport_count; i++) {
1061 auto& viewport = current_state.viewports[i]; 997 auto& viewport = current_state.viewports[i];
1062 const auto& src = regs.viewports[i]; 998 const auto& src = regs.viewports[i];
1063 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 999 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
1064 viewport.x = viewport_rect.left; 1000 viewport.x = viewport_rect.left;
1065 viewport.y = viewport_rect.bottom; 1001 viewport.y = viewport_rect.bottom;
1066 viewport.width = viewport_rect.GetWidth(); 1002 viewport.width = viewport_rect.GetWidth();
@@ -1075,7 +1011,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1075void RasterizerOpenGL::SyncClipEnabled( 1011void RasterizerOpenGL::SyncClipEnabled(
1076 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { 1012 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
1077 1013
1078 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1014 const auto& regs = system.GPU().Maxwell3D().regs;
1079 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ 1015 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
1080 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, 1016 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
1081 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, 1017 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
@@ -1092,7 +1028,7 @@ void RasterizerOpenGL::SyncClipCoef() {
1092} 1028}
1093 1029
1094void RasterizerOpenGL::SyncCullMode() { 1030void RasterizerOpenGL::SyncCullMode() {
1095 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1031 const auto& regs = system.GPU().Maxwell3D().regs;
1096 1032
1097 state.cull.enabled = regs.cull.enabled != 0; 1033 state.cull.enabled = regs.cull.enabled != 0;
1098 1034
@@ -1116,14 +1052,14 @@ void RasterizerOpenGL::SyncCullMode() {
1116} 1052}
1117 1053
1118void RasterizerOpenGL::SyncPrimitiveRestart() { 1054void RasterizerOpenGL::SyncPrimitiveRestart() {
1119 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1055 const auto& regs = system.GPU().Maxwell3D().regs;
1120 1056
1121 state.primitive_restart.enabled = regs.primitive_restart.enabled; 1057 state.primitive_restart.enabled = regs.primitive_restart.enabled;
1122 state.primitive_restart.index = regs.primitive_restart.index; 1058 state.primitive_restart.index = regs.primitive_restart.index;
1123} 1059}
1124 1060
1125void RasterizerOpenGL::SyncDepthTestState() { 1061void RasterizerOpenGL::SyncDepthTestState() {
1126 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1062 const auto& regs = system.GPU().Maxwell3D().regs;
1127 1063
1128 state.depth.test_enabled = regs.depth_test_enable != 0; 1064 state.depth.test_enabled = regs.depth_test_enable != 0;
1129 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1065 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
@@ -1135,7 +1071,7 @@ void RasterizerOpenGL::SyncDepthTestState() {
1135} 1071}
1136 1072
1137void RasterizerOpenGL::SyncStencilTestState() { 1073void RasterizerOpenGL::SyncStencilTestState() {
1138 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1074 const auto& regs = system.GPU().Maxwell3D().regs;
1139 state.stencil.test_enabled = regs.stencil_enable != 0; 1075 state.stencil.test_enabled = regs.stencil_enable != 0;
1140 1076
1141 if (!regs.stencil_enable) { 1077 if (!regs.stencil_enable) {
@@ -1169,7 +1105,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
1169} 1105}
1170 1106
1171void RasterizerOpenGL::SyncColorMask() { 1107void RasterizerOpenGL::SyncColorMask() {
1172 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1108 const auto& regs = system.GPU().Maxwell3D().regs;
1173 const std::size_t count = 1109 const std::size_t count =
1174 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1110 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
1175 for (std::size_t i = 0; i < count; i++) { 1111 for (std::size_t i = 0; i < count; i++) {
@@ -1183,18 +1119,18 @@ void RasterizerOpenGL::SyncColorMask() {
1183} 1119}
1184 1120
1185void RasterizerOpenGL::SyncMultiSampleState() { 1121void RasterizerOpenGL::SyncMultiSampleState() {
1186 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1122 const auto& regs = system.GPU().Maxwell3D().regs;
1187 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; 1123 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
1188 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; 1124 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
1189} 1125}
1190 1126
1191void RasterizerOpenGL::SyncFragmentColorClampState() { 1127void RasterizerOpenGL::SyncFragmentColorClampState() {
1192 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1128 const auto& regs = system.GPU().Maxwell3D().regs;
1193 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; 1129 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
1194} 1130}
1195 1131
1196void RasterizerOpenGL::SyncBlendState() { 1132void RasterizerOpenGL::SyncBlendState() {
1197 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1133 const auto& regs = system.GPU().Maxwell3D().regs;
1198 1134
1199 state.blend_color.red = regs.blend_color.r; 1135 state.blend_color.red = regs.blend_color.r;
1200 state.blend_color.green = regs.blend_color.g; 1136 state.blend_color.green = regs.blend_color.g;
@@ -1236,7 +1172,7 @@ void RasterizerOpenGL::SyncBlendState() {
1236} 1172}
1237 1173
1238void RasterizerOpenGL::SyncLogicOpState() { 1174void RasterizerOpenGL::SyncLogicOpState() {
1239 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1175 const auto& regs = system.GPU().Maxwell3D().regs;
1240 1176
1241 state.logic_op.enabled = regs.logic_op.enable != 0; 1177 state.logic_op.enabled = regs.logic_op.enable != 0;
1242 1178
@@ -1250,7 +1186,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
1250} 1186}
1251 1187
1252void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1188void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1253 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1189 const auto& regs = system.GPU().Maxwell3D().regs;
1254 const bool geometry_shaders_enabled = 1190 const bool geometry_shaders_enabled =
1255 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1191 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1256 const std::size_t viewport_count = 1192 const std::size_t viewport_count =
@@ -1272,21 +1208,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1272} 1208}
1273 1209
1274void RasterizerOpenGL::SyncTransformFeedback() { 1210void RasterizerOpenGL::SyncTransformFeedback() {
1275 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1211 const auto& regs = system.GPU().Maxwell3D().regs;
1276 1212 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1277 if (regs.tfb_enabled != 0) {
1278 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1279 UNREACHABLE();
1280 }
1281} 1213}
1282 1214
1283void RasterizerOpenGL::SyncPointState() { 1215void RasterizerOpenGL::SyncPointState() {
1284 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1216 const auto& regs = system.GPU().Maxwell3D().regs;
1285 state.point.size = regs.point_size; 1217 state.point.size = regs.point_size;
1286} 1218}
1287 1219
1288void RasterizerOpenGL::SyncPolygonOffset() { 1220void RasterizerOpenGL::SyncPolygonOffset() {
1289 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1221 const auto& regs = system.GPU().Maxwell3D().regs;
1290 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1222 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1291 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1223 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1292 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1224 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
@@ -1296,13 +1228,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1296} 1228}
1297 1229
1298void RasterizerOpenGL::CheckAlphaTests() { 1230void RasterizerOpenGL::CheckAlphaTests() {
1299 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1231 const auto& regs = system.GPU().Maxwell3D().regs;
1300 1232 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1301 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1233 "Alpha Testing is enabled with more than one rendertarget");
1302 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1303 "this behavior is undefined.");
1304 UNREACHABLE();
1305 }
1306} 1234}
1307 1235
1308} // namespace OpenGL 1236} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 21c51f874..e4c64ae71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,21 +5,19 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
8#include <cstddef> 9#include <cstddef>
9#include <map> 10#include <map>
10#include <memory> 11#include <memory>
11#include <optional> 12#include <optional>
12#include <tuple> 13#include <tuple>
13#include <utility> 14#include <utility>
14#include <vector>
15 15
16#include <boost/icl/interval_map.hpp> 16#include <boost/icl/interval_map.hpp>
17#include <boost/range/iterator_range.hpp>
18#include <glad/glad.h> 17#include <glad/glad.h>
19 18
20#include "common/common_types.h" 19#include "common/common_types.h"
21#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
22#include "video_core/memory_manager.h"
23#include "video_core/rasterizer_cache.h" 21#include "video_core/rasterizer_cache.h"
24#include "video_core/rasterizer_interface.h" 22#include "video_core/rasterizer_interface.h"
25#include "video_core/renderer_opengl/gl_buffer_cache.h" 23#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -28,10 +26,13 @@
28#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 26#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
29#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 28#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_gen.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 29#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state.h" 30#include "video_core/renderer_opengl/gl_state.h"
34#include "video_core/renderer_opengl/gl_stream_buffer.h" 31#include "video_core/renderer_opengl/utils.h"
32
33namespace Core {
34class System;
35}
35 36
36namespace Core::Frontend { 37namespace Core::Frontend {
37class EmuWindow; 38class EmuWindow;
@@ -45,22 +46,25 @@ struct FramebufferCacheKey;
45 46
46class RasterizerOpenGL : public VideoCore::RasterizerInterface { 47class RasterizerOpenGL : public VideoCore::RasterizerInterface {
47public: 48public:
48 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info); 49 explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
49 ~RasterizerOpenGL() override; 50 ~RasterizerOpenGL() override;
50 51
51 void DrawArrays() override; 52 void DrawArrays() override;
52 void Clear() override; 53 void Clear() override;
53 void FlushAll() override; 54 void FlushAll() override;
54 void FlushRegion(VAddr addr, u64 size) override; 55 void FlushRegion(CacheAddr addr, u64 size) override;
55 void InvalidateRegion(VAddr addr, u64 size) override; 56 void InvalidateRegion(CacheAddr addr, u64 size) override;
56 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 57 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
57 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 58 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
58 const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; 59 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
59 bool AccelerateFill(const void* config) override; 60 const Common::Rectangle<u32>& src_rect,
61 const Common::Rectangle<u32>& dst_rect) override;
60 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 62 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
61 u32 pixel_stride) override; 63 u32 pixel_stride) override;
62 bool AccelerateDrawBatch(bool is_indexed) override; 64 bool AccelerateDrawBatch(bool is_indexed) override;
63 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; 65 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
66 void LoadDiskResources(const std::atomic_bool& stop_loading,
67 const VideoCore::DiskResourceLoadCallback& callback) override;
64 68
65 /// Maximum supported size that a constbuffer can have in bytes. 69 /// Maximum supported size that a constbuffer can have in bytes.
66 static constexpr std::size_t MaxConstbufferSize = 0x10000; 70 static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -85,11 +89,12 @@ private:
85 private: 89 private:
86 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; 90 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
87 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; 91 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
88 Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; 92 Tegra::Texture::TextureMipmapFilter mipmap_filter =
93 Tegra::Texture::TextureMipmapFilter::None;
89 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; 94 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
90 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; 95 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
91 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; 96 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
92 bool uses_depth_compare = false; 97 bool use_depth_compare = false;
93 Tegra::Texture::DepthCompareFunc depth_compare_func = 98 Tegra::Texture::DepthCompareFunc depth_compare_func =
94 Tegra::Texture::DepthCompareFunc::Always; 99 Tegra::Texture::DepthCompareFunc::Always;
95 GLvec4 border_color = {}; 100 GLvec4 border_color = {};
@@ -122,30 +127,25 @@ private:
122 * @param using_depth_fb If true, configure the depth/stencil framebuffer. 127 * @param using_depth_fb If true, configure the depth/stencil framebuffer.
123 * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. 128 * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
124 * @param single_color_target Specifies if a single color buffer target should be used. 129 * @param single_color_target Specifies if a single color buffer target should be used.
130 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
131 * (requires using_depth_fb to be true)
125 */ 132 */
126 void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true, 133 std::pair<bool, bool> ConfigureFramebuffers(
127 bool using_depth_fb = true, bool preserve_contents = true, 134 OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true,
128 std::optional<std::size_t> single_color_target = {}); 135 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
129 136
130 /** 137 /// Configures the current constbuffers to use for the draw command.
131 * Configures the current constbuffers to use for the draw command. 138 void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
132 * @param stage The shader stage to configure buffers for. 139 GLuint program_handle, BaseBindings base_bindings);
133 * @param shader The shader object that contains the specified stage.
134 * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
135 * @returns The next available bindpoint for use in the next shader stage.
136 */
137 u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
138 GLenum primitive_mode, u32 current_bindpoint);
139 140
140 /** 141 /// Configures the current global memory entries to use for the draw command.
141 * Configures the current textures to use for the draw command. 142 void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
142 * @param stage The shader stage to configure textures for. 143 const Shader& shader, GLenum primitive_mode,
143 * @param shader The shader object that contains the specified stage. 144 BaseBindings base_bindings);
144 * @param current_unit The offset at which to start counting unused texture units. 145
145 * @returns The next available bindpoint for use in the next shader stage. 146 /// Configures the current textures to use for the draw command.
146 */ 147 void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
147 u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, 148 GLuint program_handle, BaseBindings base_bindings);
148 GLenum primitive_mode, u32 current_unit);
149 149
150 /// Syncs the viewport and depth range to match the guest state 150 /// Syncs the viewport and depth range to match the guest state
151 void SyncViewport(OpenGLState& current_state); 151 void SyncViewport(OpenGLState& current_state);
@@ -209,7 +209,7 @@ private:
209 ShaderCacheOpenGL shader_cache; 209 ShaderCacheOpenGL shader_cache;
210 GlobalRegionCacheOpenGL global_cache; 210 GlobalRegionCacheOpenGL global_cache;
211 211
212 Core::Frontend::EmuWindow& emu_window; 212 Core::System& system;
213 213
214 ScreenInfo& screen_info; 214 ScreenInfo& screen_info;
215 215
@@ -221,6 +221,7 @@ private:
221 221
222 std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; 222 std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
223 FramebufferConfigState current_framebuffer_config_state; 223 FramebufferConfigState current_framebuffer_config_state;
224 std::pair<bool, bool> current_depth_stencil_usage{};
224 225
225 std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; 226 std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
226 227
@@ -229,6 +230,9 @@ private:
229 PrimitiveAssembler primitive_assembler{buffer_cache}; 230 PrimitiveAssembler primitive_assembler{buffer_cache};
230 GLint uniform_buffer_alignment; 231 GLint uniform_buffer_alignment;
231 232
233 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
234 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
235
232 std::size_t CalculateVertexArraysSize() const; 236 std::size_t CalculateVertexArraysSize() const;
233 237
234 std::size_t CalculateIndexBufferSize() const; 238 std::size_t CalculateIndexBufferSize() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index a05b8b936..55b6d8591 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -12,16 +13,15 @@
12#include "common/scope_exit.h" 13#include "common/scope_exit.h"
13#include "core/core.h" 14#include "core/core.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
15#include "core/memory.h"
16#include "core/settings.h" 16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/memory_manager.h"
18#include "video_core/morton.h" 19#include "video_core/morton.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 20#include "video_core/renderer_opengl/gl_rasterizer.h"
20#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
21#include "video_core/renderer_opengl/gl_state.h"
22#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h" 23#include "video_core/surface.h"
24#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
25#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
@@ -44,23 +44,22 @@ struct FormatTuple {
44 bool compressed; 44 bool compressed;
45}; 45};
46 46
47static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) { 47static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
48 glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 48 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
49 glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 49 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
50 glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 50 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
51 glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 51 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
52 glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); 52 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
53 if (max_mip_level == 1) { 53 if (max_mip_level == 1) {
54 glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0); 54 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
55 } 55 }
56} 56}
57 57
58void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { 58void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; 59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
60 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
61 60
62 addr = cpu_addr ? *cpu_addr : 0;
63 gpu_addr = gpu_addr_; 61 gpu_addr = gpu_addr_;
62 host_ptr = memory_manager.GetPointer(gpu_addr_);
64 size_in_bytes = SizeInBytesRaw(); 63 size_in_bytes = SizeInBytesRaw();
65 64
66 if (IsPixelFormatASTC(pixel_format)) { 65 if (IsPixelFormatASTC(pixel_format)) {
@@ -126,8 +125,12 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
126 125
127 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); 126 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
128 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); 127 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
128 if (!params.is_tiled) {
129 params.pitch = config.tic.Pitch();
130 }
129 params.unaligned_height = config.tic.Height(); 131 params.unaligned_height = config.tic.Height();
130 params.target = SurfaceTargetFromTextureType(config.tic.texture_type); 132 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
133 params.identity = SurfaceClass::Uploaded;
131 134
132 switch (params.target) { 135 switch (params.target) {
133 case SurfaceTarget::Texture1D: 136 case SurfaceTarget::Texture1D:
@@ -167,6 +170,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
167 } 170 }
168 171
169 params.is_layered = SurfaceTargetIsLayered(params.target); 172 params.is_layered = SurfaceTargetIsLayered(params.target);
173 params.is_array = SurfaceTargetIsArray(params.target);
170 params.max_mip_level = config.tic.max_mip_level + 1; 174 params.max_mip_level = config.tic.max_mip_level + 1;
171 params.rt = {}; 175 params.rt = {};
172 176
@@ -190,10 +194,17 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
190 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; 194 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
191 params.component_type = ComponentTypeFromRenderTarget(config.format); 195 params.component_type = ComponentTypeFromRenderTarget(config.format);
192 params.type = GetFormatType(params.pixel_format); 196 params.type = GetFormatType(params.pixel_format);
193 params.width = config.width; 197 if (params.is_tiled) {
198 params.width = config.width;
199 } else {
200 params.pitch = config.width;
201 const u32 bpp = params.GetFormatBpp() / 8;
202 params.width = params.pitch / bpp;
203 }
194 params.height = config.height; 204 params.height = config.height;
195 params.unaligned_height = config.height; 205 params.unaligned_height = config.height;
196 params.target = SurfaceTarget::Texture2D; 206 params.target = SurfaceTarget::Texture2D;
207 params.identity = SurfaceClass::RenderTarget;
197 params.depth = 1; 208 params.depth = 1;
198 params.max_mip_level = 1; 209 params.max_mip_level = 1;
199 params.is_layered = false; 210 params.is_layered = false;
@@ -211,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
211} 222}
212 223
213/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( 224/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
214 u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, 225 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
215 u32 block_width, u32 block_height, u32 block_depth, 226 u32 block_width, u32 block_height, u32 block_depth,
216 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { 227 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
217 SurfaceParams params{}; 228 SurfaceParams params{};
@@ -229,6 +240,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
229 params.height = zeta_height; 240 params.height = zeta_height;
230 params.unaligned_height = zeta_height; 241 params.unaligned_height = zeta_height;
231 params.target = SurfaceTarget::Texture2D; 242 params.target = SurfaceTarget::Texture2D;
243 params.identity = SurfaceClass::DepthBuffer;
232 params.depth = 1; 244 params.depth = 1;
233 params.max_mip_level = 1; 245 params.max_mip_level = 1;
234 params.is_layered = false; 246 params.is_layered = false;
@@ -254,9 +266,14 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
254 params.component_type = ComponentTypeFromRenderTarget(config.format); 266 params.component_type = ComponentTypeFromRenderTarget(config.format);
255 params.type = GetFormatType(params.pixel_format); 267 params.type = GetFormatType(params.pixel_format);
256 params.width = config.width; 268 params.width = config.width;
269 if (!params.is_tiled) {
270 const u32 bpp = params.GetFormatBpp() / 8;
271 params.pitch = config.width * bpp;
272 }
257 params.height = config.height; 273 params.height = config.height;
258 params.unaligned_height = config.height; 274 params.unaligned_height = config.height;
259 params.target = SurfaceTarget::Texture2D; 275 params.target = SurfaceTarget::Texture2D;
276 params.identity = SurfaceClass::Copy;
260 params.depth = 1; 277 params.depth = 1;
261 params.max_mip_level = 1; 278 params.max_mip_level = 1;
262 params.rt = {}; 279 params.rt = {};
@@ -386,7 +403,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
386 return format; 403 return format;
387} 404}
388 405
389MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 406/// Returns the discrepant array target
407constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
408 switch (target) {
409 case SurfaceTarget::Texture1D:
410 return GL_TEXTURE_1D_ARRAY;
411 case SurfaceTarget::Texture2D:
412 return GL_TEXTURE_2D_ARRAY;
413 case SurfaceTarget::Texture3D:
414 return GL_NONE;
415 case SurfaceTarget::Texture1DArray:
416 return GL_TEXTURE_1D;
417 case SurfaceTarget::Texture2DArray:
418 return GL_TEXTURE_2D;
419 case SurfaceTarget::TextureCubemap:
420 return GL_TEXTURE_CUBE_MAP_ARRAY;
421 case SurfaceTarget::TextureCubeArray:
422 return GL_TEXTURE_CUBE_MAP;
423 }
424 return GL_NONE;
425}
426
427Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
390 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 428 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
391 if (IsPixelFormatASTC(pixel_format)) { 429 if (IsPixelFormatASTC(pixel_format)) {
392 // ASTC formats must stop at the ATSC block size boundary 430 // ASTC formats must stop at the ATSC block size boundary
@@ -410,8 +448,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
410 for (u32 i = 0; i < params.depth; i++) { 448 for (u32 i = 0; i < params.depth; i++) {
411 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 449 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
412 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 450 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
413 params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, 451 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
414 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 452 gl_buffer.data() + offset_gl, params.host_ptr + offset);
415 offset += layer_size; 453 offset += layer_size;
416 offset_gl += gl_size; 454 offset_gl += gl_size;
417 } 455 }
@@ -420,11 +458,12 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
420 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 458 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
421 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 459 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
422 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, 460 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
423 gl_buffer.data(), gl_buffer.size(), params.addr + offset); 461 gl_buffer.data(), params.host_ptr + offset);
424 } 462 }
425} 463}
426 464
427static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { 465void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
466 const Surface& dst_surface) {
428 const auto& src_params{src_surface->GetSurfaceParams()}; 467 const auto& src_params{src_surface->GetSurfaceParams()};
429 const auto& dst_params{dst_surface->GetSurfaceParams()}; 468 const auto& dst_params{dst_surface->GetSurfaceParams()};
430 469
@@ -434,12 +473,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
434 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, 473 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
435 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, 474 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
436 0, 0, width, height, 1); 475 0, 0, width, height, 1);
476
477 dst_surface->MarkAsModified(true, *this);
437} 478}
438 479
439MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); 480MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
440static void CopySurface(const Surface& src_surface, const Surface& dst_surface, 481void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
441 const GLuint copy_pbo_handle, const GLenum src_attachment = 0, 482 const GLuint copy_pbo_handle, const GLenum src_attachment,
442 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) { 483 const GLenum dst_attachment,
484 const std::size_t cubemap_face) {
443 MICROPROFILE_SCOPE(OpenGL_CopySurface); 485 MICROPROFILE_SCOPE(OpenGL_CopySurface);
444 ASSERT_MSG(dst_attachment == 0, "Unimplemented"); 486 ASSERT_MSG(dst_attachment == 0, "Unimplemented");
445 487
@@ -474,9 +516,9 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
474 "reinterpretation but the texture is tiled."); 516 "reinterpretation but the texture is tiled.");
475 } 517 }
476 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; 518 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
477 519 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
478 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, 520 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
479 Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); 521 memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
480 } 522 }
481 523
482 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 524 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -519,172 +561,58 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
519 } 561 }
520 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); 562 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
521 } 563 }
564
565 dst_surface->MarkAsModified(true, *this);
522} 566}
523 567
524CachedSurface::CachedSurface(const SurfaceParams& params) 568CachedSurface::CachedSurface(const SurfaceParams& params)
525 : params(params), gl_target(SurfaceTargetToGL(params.target)), 569 : RasterizerCacheObject{params.host_ptr}, params{params},
526 cached_size_in_bytes(params.size_in_bytes) { 570 gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
527 texture.Create();
528 const auto& rect{params.GetRect()};
529
530 // Keep track of previous texture bindings
531 OpenGLState cur_state = OpenGLState::GetCurState();
532 const auto& old_tex = cur_state.texture_units[0];
533 SCOPE_EXIT({
534 cur_state.texture_units[0] = old_tex;
535 cur_state.Apply();
536 });
537
538 cur_state.texture_units[0].texture = texture.handle;
539 cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
540 cur_state.Apply();
541 glActiveTexture(GL_TEXTURE0);
542
543 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
544 gl_internal_format = format_tuple.internal_format;
545 gl_is_compressed = format_tuple.compressed;
546 571
547 if (!format_tuple.compressed) { 572 const auto optional_cpu_addr{
548 // Only pre-create the texture for non-compressed textures. 573 Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
549 switch (params.target) { 574 ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
550 case SurfaceTarget::Texture1D: 575 cpu_addr = *optional_cpu_addr;
551 glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
552 format_tuple.internal_format, rect.GetWidth());
553 break;
554 case SurfaceTarget::Texture2D:
555 case SurfaceTarget::TextureCubemap:
556 glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
557 format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
558 break;
559 case SurfaceTarget::Texture3D:
560 case SurfaceTarget::Texture2DArray:
561 case SurfaceTarget::TextureCubeArray:
562 glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
563 format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
564 params.depth);
565 break;
566 default:
567 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
568 static_cast<u32>(params.target));
569 UNREACHABLE();
570 glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
571 rect.GetWidth(), rect.GetHeight());
572 }
573 }
574 576
575 ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level); 577 texture.Create(gl_target);
576 578
577 LabelGLObject(GL_TEXTURE, texture.handle, params.addr, 579 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
578 SurfaceParams::SurfaceTargetName(params.target)); 580 // alternatives. This signals a bug on those functions.
581 const auto width = static_cast<GLsizei>(params.MipWidth(0));
582 const auto height = static_cast<GLsizei>(params.MipHeight(0));
583 memory_size = params.MemorySize();
584 reinterpreted = false;
579 585
580 // Clamp size to mapped GPU memory region 586 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
581 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 587 gl_internal_format = format_tuple.internal_format;
582 // R32F render buffer. We do not yet know if this is a game bug or something else, but this
583 // check is necessary to prevent flushing from overwriting unmapped memory.
584
585 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
586 const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
587 if (cached_size_in_bytes > max_size) {
588 LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
589 cached_size_in_bytes = max_size;
590 }
591}
592
593static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
594 union S8Z24 {
595 BitField<0, 24, u32> z24;
596 BitField<24, 8, u32> s8;
597 };
598 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
599
600 union Z24S8 {
601 BitField<0, 8, u32> s8;
602 BitField<8, 24, u32> z24;
603 };
604 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
605
606 S8Z24 s8z24_pixel{};
607 Z24S8 z24s8_pixel{};
608 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
609 for (std::size_t y = 0; y < height; ++y) {
610 for (std::size_t x = 0; x < width; ++x) {
611 const std::size_t offset{bpp * (y * width + x)};
612 if (reverse) {
613 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
614 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
615 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
616 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
617 } else {
618 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
619 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
620 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
621 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
622 }
623 }
624 }
625}
626 588
627/** 589 switch (params.target) {
628 * Helper function to perform software conversion (as needed) when loading a buffer from Switch 590 case SurfaceTarget::Texture1D:
629 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with 591 glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
630 * typical desktop GPUs. 592 width);
631 */
632static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
633 u32 width, u32 height, u32 depth) {
634 switch (pixel_format) {
635 case PixelFormat::ASTC_2D_4X4:
636 case PixelFormat::ASTC_2D_8X8:
637 case PixelFormat::ASTC_2D_8X5:
638 case PixelFormat::ASTC_2D_5X4:
639 case PixelFormat::ASTC_2D_5X5:
640 case PixelFormat::ASTC_2D_4X4_SRGB:
641 case PixelFormat::ASTC_2D_8X8_SRGB:
642 case PixelFormat::ASTC_2D_8X5_SRGB:
643 case PixelFormat::ASTC_2D_5X4_SRGB:
644 case PixelFormat::ASTC_2D_5X5_SRGB:
645 case PixelFormat::ASTC_2D_10X8:
646 case PixelFormat::ASTC_2D_10X8_SRGB: {
647 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
648 u32 block_width{};
649 u32 block_height{};
650 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
651 data =
652 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
653 break;
654 }
655 case PixelFormat::S8Z24:
656 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
657 ConvertS8Z24ToZ24S8(data, width, height, false);
658 break; 593 break;
659 } 594 case SurfaceTarget::Texture2D:
660} 595 case SurfaceTarget::TextureCubemap:
661 596 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
662/** 597 width, height);
663 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
664 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
665 * with typical desktop GPUs.
666 */
667static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
668 u32 width, u32 height) {
669 switch (pixel_format) {
670 case PixelFormat::ASTC_2D_4X4:
671 case PixelFormat::ASTC_2D_8X8:
672 case PixelFormat::ASTC_2D_4X4_SRGB:
673 case PixelFormat::ASTC_2D_8X8_SRGB:
674 case PixelFormat::ASTC_2D_5X5:
675 case PixelFormat::ASTC_2D_5X5_SRGB:
676 case PixelFormat::ASTC_2D_10X8:
677 case PixelFormat::ASTC_2D_10X8_SRGB: {
678 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
679 static_cast<u32>(pixel_format));
680 UNREACHABLE();
681 break; 598 break;
682 } 599 case SurfaceTarget::Texture3D:
683 case PixelFormat::S8Z24: 600 case SurfaceTarget::Texture2DArray:
684 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. 601 case SurfaceTarget::TextureCubeArray:
685 ConvertS8Z24ToZ24S8(data, width, height, true); 602 glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
603 width, height, params.depth);
686 break; 604 break;
605 default:
606 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
607 static_cast<u32>(params.target));
608 UNREACHABLE();
609 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
610 width, height);
687 } 611 }
612
613 ApplyTextureDefaults(texture.handle, params.max_mip_level);
614
615 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
688} 616}
689 617
690MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 618MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -699,13 +627,31 @@ void CachedSurface::LoadGLBuffer() {
699 for (u32 i = 0; i < params.max_mip_level; i++) 627 for (u32 i = 0; i < params.max_mip_level; i++)
700 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); 628 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
701 } else { 629 } else {
702 const auto texture_src_data{Memory::GetPointer(params.addr)}; 630 const u32 bpp = params.GetFormatBpp() / 8;
703 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 631 const u32 copy_size = params.width * bpp;
704 gl_buffer[0].assign(texture_src_data, texture_src_data_end); 632 if (params.pitch == copy_size) {
633 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
634 } else {
635 const u8* start{params.host_ptr};
636 u8* write_to = gl_buffer[0].data();
637 for (u32 h = params.height; h > 0; h--) {
638 std::memcpy(write_to, start, copy_size);
639 start += params.pitch;
640 write_to += copy_size;
641 }
642 }
705 } 643 }
706 for (u32 i = 0; i < params.max_mip_level; i++) { 644 for (u32 i = 0; i < params.max_mip_level; i++) {
707 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 645 const u32 width = params.MipWidth(i);
708 params.MipHeight(i), params.MipDepth(i)); 646 const u32 height = params.MipHeight(i);
647 const u32 depth = params.MipDepth(i);
648 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
649 // Reserve size for RGBA8 conversion
650 constexpr std::size_t rgba_bpp = 4;
651 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
652 }
653 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
654 height, depth, true, true);
709 } 655 }
710} 656}
711 657
@@ -720,26 +666,35 @@ void CachedSurface::FlushGLBuffer() {
720 gl_buffer[0].resize(GetSizeInBytes()); 666 gl_buffer[0].resize(GetSizeInBytes());
721 667
722 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); 668 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
723 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT 669 const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
724 ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0); 670 glPixelStorei(GL_PACK_ALIGNMENT, align);
725 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); 671 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
726 ASSERT(!tuple.compressed); 672 ASSERT(!tuple.compressed);
727 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 673 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
728 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 674 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
729 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 675 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
730 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 676 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
731 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 677 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
732 params.height); 678 params.height, params.depth, true, true);
733 ASSERT(params.type != SurfaceType::Fill);
734 const u8* const texture_src_data = Memory::GetPointer(params.addr);
735 ASSERT(texture_src_data);
736 if (params.is_tiled) { 679 if (params.is_tiled) {
737 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 680 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
738 params.block_width, static_cast<u32>(params.target)); 681 params.block_width, static_cast<u32>(params.target));
739 682
740 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); 683 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
741 } else { 684 } else {
742 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 685 const u32 bpp = params.GetFormatBpp() / 8;
686 const u32 copy_size = params.width * bpp;
687 if (params.pitch == copy_size) {
688 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
689 } else {
690 u8* start{params.host_ptr};
691 const u8* read_to = gl_buffer[0].data();
692 for (u32 h = params.height; h > 0; h--) {
693 std::memcpy(start, read_to, copy_size);
694 start += params.pitch;
695 read_to += copy_size;
696 }
697 }
743 } 698 }
744} 699}
745 700
@@ -748,63 +703,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
748 const auto& rect{params.GetRect(mip_map)}; 703 const auto& rect{params.GetRect(mip_map)};
749 704
750 // Load data from memory to the surface 705 // Load data from memory to the surface
751 const GLint x0 = static_cast<GLint>(rect.left); 706 const auto x0 = static_cast<GLint>(rect.left);
752 const GLint y0 = static_cast<GLint>(rect.bottom); 707 const auto y0 = static_cast<GLint>(rect.bottom);
753 std::size_t buffer_offset = 708 auto buffer_offset =
754 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + 709 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
755 static_cast<std::size_t>(x0)) * 710 static_cast<std::size_t>(x0)) *
756 GetBytesPerPixel(params.pixel_format); 711 GetBytesPerPixel(params.pixel_format);
757 712
758 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); 713 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
759 const GLuint target_tex = texture.handle; 714
760 OpenGLState cur_state = OpenGLState::GetCurState(); 715 const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
761 716 glPixelStorei(GL_UNPACK_ALIGNMENT, align);
762 const auto& old_tex = cur_state.texture_units[0];
763 SCOPE_EXIT({
764 cur_state.texture_units[0] = old_tex;
765 cur_state.Apply();
766 });
767 cur_state.texture_units[0].texture = target_tex;
768 cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
769 cur_state.Apply();
770
771 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
772 ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
773 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); 717 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
774 718
775 GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); 719 const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
776 glActiveTexture(GL_TEXTURE0);
777 if (tuple.compressed) { 720 if (tuple.compressed) {
778 switch (params.target) { 721 switch (params.target) {
779 case SurfaceTarget::Texture2D: 722 case SurfaceTarget::Texture2D:
780 glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 723 glCompressedTextureSubImage2D(
781 static_cast<GLsizei>(params.MipWidth(mip_map)), 724 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
782 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size, 725 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
783 &gl_buffer[mip_map][buffer_offset]); 726 &gl_buffer[mip_map][buffer_offset]);
784 break; 727 break;
785 case SurfaceTarget::Texture3D: 728 case SurfaceTarget::Texture3D:
786 glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 729 glCompressedTextureSubImage3D(
787 static_cast<GLsizei>(params.MipWidth(mip_map)), 730 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
788 static_cast<GLsizei>(params.MipHeight(mip_map)), 731 static_cast<GLsizei>(params.MipHeight(mip_map)),
789 static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size, 732 static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
790 &gl_buffer[mip_map][buffer_offset]); 733 &gl_buffer[mip_map][buffer_offset]);
791 break; 734 break;
792 case SurfaceTarget::Texture2DArray: 735 case SurfaceTarget::Texture2DArray:
793 case SurfaceTarget::TextureCubeArray: 736 case SurfaceTarget::TextureCubeArray:
794 glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 737 glCompressedTextureSubImage3D(
795 static_cast<GLsizei>(params.MipWidth(mip_map)), 738 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
796 static_cast<GLsizei>(params.MipHeight(mip_map)), 739 static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
797 static_cast<GLsizei>(params.depth), 0, image_size, 740 tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
798 &gl_buffer[mip_map][buffer_offset]);
799 break; 741 break;
800 case SurfaceTarget::TextureCubemap: { 742 case SurfaceTarget::TextureCubemap: {
801 GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); 743 const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
802 for (std::size_t face = 0; face < params.depth; ++face) { 744 for (std::size_t face = 0; face < params.depth; ++face) {
803 glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 745 glCompressedTextureSubImage3D(
804 mip_map, tuple.internal_format, 746 texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
805 static_cast<GLsizei>(params.MipWidth(mip_map)), 747 static_cast<GLsizei>(params.MipWidth(mip_map)),
806 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, 748 static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
807 layer_size, &gl_buffer[mip_map][buffer_offset]); 749 layer_size, &gl_buffer[mip_map][buffer_offset]);
808 buffer_offset += layer_size; 750 buffer_offset += layer_size;
809 } 751 }
810 break; 752 break;
@@ -813,46 +755,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
813 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 755 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
814 static_cast<u32>(params.target)); 756 static_cast<u32>(params.target));
815 UNREACHABLE(); 757 UNREACHABLE();
816 glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format, 758 glCompressedTextureSubImage2D(
817 static_cast<GLsizei>(params.MipWidth(mip_map)), 759 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
818 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, 760 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
819 static_cast<GLsizei>(params.size_in_bytes_gl), 761 static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
820 &gl_buffer[mip_map][buffer_offset]);
821 } 762 }
822 } else { 763 } else {
823
824 switch (params.target) { 764 switch (params.target) {
825 case SurfaceTarget::Texture1D: 765 case SurfaceTarget::Texture1D:
826 glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0, 766 glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
827 static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type, 767 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
828 &gl_buffer[mip_map][buffer_offset]);
829 break; 768 break;
830 case SurfaceTarget::Texture2D: 769 case SurfaceTarget::Texture2D:
831 glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 770 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
832 static_cast<GLsizei>(rect.GetWidth()), 771 static_cast<GLsizei>(rect.GetWidth()),
833 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 772 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
834 &gl_buffer[mip_map][buffer_offset]); 773 &gl_buffer[mip_map][buffer_offset]);
835 break; 774 break;
836 case SurfaceTarget::Texture3D: 775 case SurfaceTarget::Texture3D:
837 glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, 776 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
838 static_cast<GLsizei>(rect.GetWidth()), 777 static_cast<GLsizei>(rect.GetWidth()),
839 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), 778 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
840 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); 779 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
841 break; 780 break;
842 case SurfaceTarget::Texture2DArray: 781 case SurfaceTarget::Texture2DArray:
843 case SurfaceTarget::TextureCubeArray: 782 case SurfaceTarget::TextureCubeArray:
844 glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, 783 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
845 static_cast<GLsizei>(rect.GetWidth()), 784 static_cast<GLsizei>(rect.GetWidth()),
846 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, 785 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
847 tuple.type, &gl_buffer[mip_map][buffer_offset]); 786 tuple.type, &gl_buffer[mip_map][buffer_offset]);
848 break; 787 break;
849 case SurfaceTarget::TextureCubemap: { 788 case SurfaceTarget::TextureCubemap: {
850 std::size_t start = buffer_offset; 789 std::size_t start = buffer_offset;
851 for (std::size_t face = 0; face < params.depth; ++face) { 790 for (std::size_t face = 0; face < params.depth; ++face) {
852 glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map, 791 glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
853 x0, y0, static_cast<GLsizei>(rect.GetWidth()), 792 static_cast<GLsizei>(rect.GetWidth()),
854 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 793 static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
855 &gl_buffer[mip_map][buffer_offset]); 794 tuple.type, &gl_buffer[mip_map][buffer_offset]);
856 buffer_offset += params.LayerSizeGL(mip_map); 795 buffer_offset += params.LayerSizeGL(mip_map);
857 } 796 }
858 break; 797 break;
@@ -861,51 +800,62 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
861 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 800 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
862 static_cast<u32>(params.target)); 801 static_cast<u32>(params.target));
863 UNREACHABLE(); 802 UNREACHABLE();
864 glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 803 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
865 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 804 static_cast<GLsizei>(rect.GetWidth()),
866 &gl_buffer[mip_map][buffer_offset]); 805 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
806 &gl_buffer[mip_map][buffer_offset]);
867 } 807 }
868 } 808 }
869 809
870 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 810 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
871} 811}
872 812
873void CachedSurface::EnsureTextureView() { 813void CachedSurface::EnsureTextureDiscrepantView() {
874 if (texture_view.handle != 0) 814 if (discrepant_view.handle != 0)
875 return; 815 return;
876 // Compressed texture are not being created with immutable storage 816
877 UNIMPLEMENTED_IF(gl_is_compressed); 817 const GLenum target{GetArrayDiscrepantTarget(params.target)};
878 818 ASSERT(target != GL_NONE);
879 const GLenum target{TargetLayer()}; 819
880 820 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
881 texture_view.Create(); 821 constexpr GLuint min_layer = 0;
882 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0, 822 constexpr GLuint min_level = 0;
883 params.max_mip_level, 0, 1); 823
884 824 glGenTextures(1, &discrepant_view.handle);
885 OpenGLState cur_state = OpenGLState::GetCurState(); 825 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
886 const auto& old_tex = cur_state.texture_units[0]; 826 params.max_mip_level, min_layer, num_layers);
887 SCOPE_EXIT({ 827 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
888 cur_state.texture_units[0] = old_tex; 828 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
889 cur_state.Apply(); 829 reinterpret_cast<const GLint*>(swizzle.data()));
890 });
891 cur_state.texture_units[0].texture = texture_view.handle;
892 cur_state.texture_units[0].target = target;
893 cur_state.Apply();
894
895 ApplyTextureDefaults(target, params.max_mip_level);
896} 830}
897 831
898MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); 832MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
899void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { 833void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
900 if (params.type == SurfaceType::Fill)
901 return;
902
903 MICROPROFILE_SCOPE(OpenGL_TextureUL); 834 MICROPROFILE_SCOPE(OpenGL_TextureUL);
904 835
905 for (u32 i = 0; i < params.max_mip_level; i++) 836 for (u32 i = 0; i < params.max_mip_level; i++)
906 UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); 837 UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
907} 838}
908 839
840void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
841 Tegra::Texture::SwizzleSource swizzle_y,
842 Tegra::Texture::SwizzleSource swizzle_z,
843 Tegra::Texture::SwizzleSource swizzle_w) {
844 const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
845 const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
846 const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
847 const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
848 if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
849 return;
850 }
851 swizzle = {new_x, new_y, new_z, new_w};
852 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
853 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
854 if (discrepant_view.handle != 0) {
855 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
856 }
857}
858
909RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) 859RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
910 : RasterizerCache{rasterizer} { 860 : RasterizerCache{rasterizer} {
911 read_framebuffer.Create(); 861 read_framebuffer.Create();
@@ -943,42 +893,45 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
943 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 893 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
944 const auto& regs{gpu.regs}; 894 const auto& regs{gpu.regs};
945 895
946 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 896 if (!gpu.dirty_flags.color_buffer[index]) {
947 return last_color_buffers[index]; 897 return current_color_buffers[index];
948 } 898 }
949 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 899 gpu.dirty_flags.color_buffer.reset(index);
950 900
951 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 901 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
952 902
953 if (index >= regs.rt_control.count) { 903 if (index >= regs.rt_control.count) {
954 return last_color_buffers[index] = {}; 904 return current_color_buffers[index] = {};
955 } 905 }
956 906
957 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 907 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
958 return last_color_buffers[index] = {}; 908 return current_color_buffers[index] = {};
959 } 909 }
960 910
961 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 911 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
962 912
963 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 913 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
964} 914}
965 915
966void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 916void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
967 surface->LoadGLBuffer(); 917 surface->LoadGLBuffer();
968 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 918 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
969 surface->MarkAsModified(false, *this); 919 surface->MarkAsModified(false, *this);
920 surface->MarkForReload(false);
970} 921}
971 922
972Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 923Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
973 if (params.addr == 0 || params.height * params.width == 0) { 924 if (!params.IsValid()) {
974 return {}; 925 return {};
975 } 926 }
976 927
977 // Look up surface in the cache based on address 928 // Look up surface in the cache based on address
978 Surface surface{TryGet(params.addr)}; 929 Surface surface{TryGet(params.host_ptr)};
979 if (surface) { 930 if (surface) {
980 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 931 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
981 // Use the cached surface as-is 932 // Use the cached surface as-is unless it's not synced with memory
933 if (surface->MustReload())
934 LoadSurface(surface);
982 return surface; 935 return surface;
983 } else if (preserve_contents) { 936 } else if (preserve_contents) {
984 // If surface parameters changed and we care about keeping the previous data, recreate 937 // If surface parameters changed and we care about keeping the previous data, recreate
@@ -986,6 +939,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
986 Surface new_surface{RecreateSurface(surface, params)}; 939 Surface new_surface{RecreateSurface(surface, params)};
987 Unregister(surface); 940 Unregister(surface);
988 Register(new_surface); 941 Register(new_surface);
942 if (new_surface->IsUploaded()) {
943 RegisterReinterpretSurface(new_surface);
944 }
989 return new_surface; 945 return new_surface;
990 } else { 946 } else {
991 // Delete the old surface before creating a new one to prevent collisions. 947 // Delete the old surface before creating a new one to prevent collisions.
@@ -1019,14 +975,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1019 const Surface& dst_surface) { 975 const Surface& dst_surface) {
1020 const auto& init_params{src_surface->GetSurfaceParams()}; 976 const auto& init_params{src_surface->GetSurfaceParams()};
1021 const auto& dst_params{dst_surface->GetSurfaceParams()}; 977 const auto& dst_params{dst_surface->GetSurfaceParams()};
1022 VAddr address = init_params.addr; 978 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
1023 const std::size_t layer_size = dst_params.LayerMemorySize(); 979 GPUVAddr address{init_params.gpu_addr};
980 const std::size_t layer_size{dst_params.LayerMemorySize()};
1024 for (u32 layer = 0; layer < dst_params.depth; layer++) { 981 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1025 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { 982 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1026 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); 983 const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
1027 const Surface& copy = TryGet(sub_address); 984 const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
1028 if (!copy) 985 if (!copy) {
1029 continue; 986 continue;
987 }
1030 const auto& src_params{copy->GetSurfaceParams()}; 988 const auto& src_params{copy->GetSurfaceParams()};
1031 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; 989 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
1032 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; 990 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1038,26 +996,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1038 } 996 }
1039 address += layer_size; 997 address += layer_size;
1040 } 998 }
999
1000 dst_surface->MarkAsModified(true, *this);
1001}
1002
1003static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1004 const Common::Rectangle<u32>& src_rect,
1005 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1006 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1007 std::size_t cubemap_face = 0) {
1008
1009 const auto& src_params{src_surface->GetSurfaceParams()};
1010 const auto& dst_params{dst_surface->GetSurfaceParams()};
1011
1012 OpenGLState prev_state{OpenGLState::GetCurState()};
1013 SCOPE_EXIT({ prev_state.Apply(); });
1014
1015 OpenGLState state;
1016 state.draw.read_framebuffer = read_fb_handle;
1017 state.draw.draw_framebuffer = draw_fb_handle;
1018 state.Apply();
1019
1020 u32 buffers{};
1021
1022 if (src_params.type == SurfaceType::ColorTexture) {
1023 switch (src_params.target) {
1024 case SurfaceTarget::Texture2D:
1025 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1026 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1027 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1028 0, 0);
1029 break;
1030 case SurfaceTarget::TextureCubemap:
1031 glFramebufferTexture2D(
1032 GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1033 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1034 src_surface->Texture().handle, 0);
1035 glFramebufferTexture2D(
1036 GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1037 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1038 break;
1039 case SurfaceTarget::Texture2DArray:
1040 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1041 src_surface->Texture().handle, 0, 0);
1042 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1043 break;
1044 case SurfaceTarget::Texture3D:
1045 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1046 SurfaceTargetToGL(src_params.target),
1047 src_surface->Texture().handle, 0, 0);
1048 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1049 SurfaceTargetToGL(src_params.target), 0, 0, 0);
1050 break;
1051 default:
1052 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1053 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1054 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1055 0, 0);
1056 break;
1057 }
1058
1059 switch (dst_params.target) {
1060 case SurfaceTarget::Texture2D:
1061 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1062 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1063 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1064 0, 0);
1065 break;
1066 case SurfaceTarget::TextureCubemap:
1067 glFramebufferTexture2D(
1068 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1069 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1070 dst_surface->Texture().handle, 0);
1071 glFramebufferTexture2D(
1072 GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1073 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1074 break;
1075 case SurfaceTarget::Texture2DArray:
1076 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1077 dst_surface->Texture().handle, 0, 0);
1078 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1079 break;
1080
1081 case SurfaceTarget::Texture3D:
1082 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1083 SurfaceTargetToGL(dst_params.target),
1084 dst_surface->Texture().handle, 0, 0);
1085 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1086 SurfaceTargetToGL(dst_params.target), 0, 0, 0);
1087 break;
1088 default:
1089 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1090 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1091 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1092 0, 0);
1093 break;
1094 }
1095
1096 buffers = GL_COLOR_BUFFER_BIT;
1097 } else if (src_params.type == SurfaceType::Depth) {
1098 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1099 GL_TEXTURE_2D, 0, 0);
1100 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1101 src_surface->Texture().handle, 0);
1102 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1103
1104 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1105 GL_TEXTURE_2D, 0, 0);
1106 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1107 dst_surface->Texture().handle, 0);
1108 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1109
1110 buffers = GL_DEPTH_BUFFER_BIT;
1111 } else if (src_params.type == SurfaceType::DepthStencil) {
1112 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1113 GL_TEXTURE_2D, 0, 0);
1114 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1115 src_surface->Texture().handle, 0);
1116
1117 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1118 GL_TEXTURE_2D, 0, 0);
1119 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1120 dst_surface->Texture().handle, 0);
1121
1122 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1123 }
1124
1125 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
1126 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
1127 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
1128
1129 return true;
1041} 1130}
1042 1131
1043void RasterizerCacheOpenGL::FermiCopySurface( 1132void RasterizerCacheOpenGL::FermiCopySurface(
1044 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1133 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1045 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { 1134 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1135 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1046 1136
1047 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 1137 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1048 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 1138 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
1049 1139
1050 ASSERT(src_params.width == dst_params.width);
1051 ASSERT(src_params.height == dst_params.height);
1052 ASSERT(src_params.pixel_format == dst_params.pixel_format); 1140 ASSERT(src_params.pixel_format == dst_params.pixel_format);
1053 ASSERT(src_params.block_height == dst_params.block_height); 1141 ASSERT(src_params.block_height == dst_params.block_height);
1054 ASSERT(src_params.is_tiled == dst_params.is_tiled); 1142 ASSERT(src_params.is_tiled == dst_params.is_tiled);
1055 ASSERT(src_params.depth == dst_params.depth); 1143 ASSERT(src_params.depth == dst_params.depth);
1056 ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
1057 ASSERT(src_params.target == dst_params.target); 1144 ASSERT(src_params.target == dst_params.target);
1058 ASSERT(src_params.rt.index == dst_params.rt.index); 1145 ASSERT(src_params.rt.index == dst_params.rt.index);
1059 1146
1060 FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); 1147 auto src_surface = GetSurface(src_params, true);
1148 auto dst_surface = GetSurface(dst_params, true);
1149
1150 BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
1151 draw_framebuffer.handle);
1152
1153 dst_surface->MarkAsModified(true, *this);
1061} 1154}
1062 1155
1063void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, 1156void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
@@ -1066,7 +1159,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1066 const auto& dst_params{dst_surface->GetSurfaceParams()}; 1159 const auto& dst_params{dst_surface->GetSurfaceParams()};
1067 1160
1068 // Flush enough memory for both the source and destination surface 1161 // Flush enough memory for both the source and destination surface
1069 FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); 1162 FlushRegion(ToCacheAddr(src_params.host_ptr),
1163 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1070 1164
1071 LoadSurface(dst_surface); 1165 LoadSurface(dst_surface);
1072} 1166}
@@ -1085,10 +1179,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1085 return new_surface; 1179 return new_surface;
1086 } 1180 }
1087 1181
1182 const bool old_compressed =
1183 GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
1184 const bool new_compressed =
1185 GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
1186 const bool compatible_formats =
1187 GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
1188 !(old_compressed || new_compressed);
1088 // For compatible surfaces, we can just do fast glCopyImageSubData based copy 1189 // For compatible surfaces, we can just do fast glCopyImageSubData based copy
1089 if (old_params.target == new_params.target && old_params.type == new_params.type && 1190 if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
1090 old_params.depth == new_params.depth && old_params.depth == 1 && 1191 old_params.depth == 1 && compatible_formats) {
1091 GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
1092 FastCopySurface(old_surface, new_surface); 1192 FastCopySurface(old_surface, new_surface);
1093 return new_surface; 1193 return new_surface;
1094 } 1194 }
@@ -1103,7 +1203,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1103 case SurfaceTarget::TextureCubemap: 1203 case SurfaceTarget::TextureCubemap:
1104 case SurfaceTarget::Texture2DArray: 1204 case SurfaceTarget::Texture2DArray:
1105 case SurfaceTarget::TextureCubeArray: 1205 case SurfaceTarget::TextureCubeArray:
1106 FastLayeredCopySurface(old_surface, new_surface); 1206 if (compatible_formats)
1207 FastLayeredCopySurface(old_surface, new_surface);
1208 else {
1209 AccurateCopySurface(old_surface, new_surface);
1210 }
1107 break; 1211 break;
1108 default: 1212 default:
1109 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 1213 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1114,8 +1218,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1114 return new_surface; 1218 return new_surface;
1115} 1219}
1116 1220
1117Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { 1221Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1118 return TryGet(addr); 1222 return TryGet(host_ptr);
1119} 1223}
1120 1224
1121void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { 1225void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1132,4 +1236,108 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1132 return {}; 1236 return {};
1133} 1237}
1134 1238
1239static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1240 u32 height) {
1241 for (u32 i = 0; i < params.max_mip_level; i++) {
1242 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1243 return {i};
1244 }
1245 }
1246 return {};
1247}
1248
1249static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
1250 const std::size_t size{params.LayerMemorySize()};
1251 GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
1252 for (u32 i = 0; i < params.depth; i++) {
1253 if (start == addr) {
1254 return {i};
1255 }
1256 start += size;
1257 }
1258 return {};
1259}
1260
1261static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1262 const Surface blitted_surface) {
1263 const auto& dst_params = blitted_surface->GetSurfaceParams();
1264 const auto& src_params = render_surface->GetSurfaceParams();
1265 const std::size_t src_memory_size = src_params.size_in_bytes;
1266 const std::optional<u32> level =
1267 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1268 if (level.has_value()) {
1269 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1270 src_params.height == dst_params.MipHeight(*level) &&
1271 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1272 const std::optional<u32> slot =
1273 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
1274 if (slot.has_value()) {
1275 glCopyImageSubData(render_surface->Texture().handle,
1276 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1277 blitted_surface->Texture().handle,
1278 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1279 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1280 blitted_surface->MarkAsModified(true, cache);
1281 return true;
1282 }
1283 }
1284 }
1285 return false;
1286}
1287
1288static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1289 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1290 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1291 if (bound2 > bound1)
1292 return true;
1293 const auto& dst_params = blitted_surface->GetSurfaceParams();
1294 const auto& src_params = render_surface->GetSurfaceParams();
1295 return (dst_params.component_type != src_params.component_type);
1296}
1297
1298static bool IsReinterpretInvalidSecond(const Surface render_surface,
1299 const Surface blitted_surface) {
1300 const auto& dst_params = blitted_surface->GetSurfaceParams();
1301 const auto& src_params = render_surface->GetSurfaceParams();
1302 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1303}
1304
1305bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1306 Surface intersect) {
1307 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1308 Unregister(intersect);
1309 return false;
1310 }
1311 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1312 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1313 Unregister(intersect);
1314 return false;
1315 }
1316 FlushObject(intersect);
1317 FlushObject(triggering_surface);
1318 intersect->MarkForReload(true);
1319 }
1320 return true;
1321}
1322
1323void RasterizerCacheOpenGL::SignalPreDrawCall() {
1324 if (texception && GLAD_GL_ARB_texture_barrier) {
1325 glTextureBarrier();
1326 }
1327 texception = false;
1328}
1329
1330void RasterizerCacheOpenGL::SignalPostDrawCall() {
1331 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1332 if (current_color_buffers[i] != nullptr) {
1333 Surface intersect =
1334 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1335 if (intersect != nullptr) {
1336 PartialReinterpretSurface(current_color_buffers[i], intersect);
1337 texception = true;
1338 }
1339 }
1340 }
1341}
1342
1135} // namespace OpenGL 1343} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 37611c4fc..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -5,12 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <map>
9#include <memory> 8#include <memory>
10#include <string> 9#include <string>
10#include <tuple>
11#include <vector> 11#include <vector>
12 12
13#include "common/alignment.h" 13#include "common/alignment.h"
14#include "common/bit_util.h"
14#include "common/common_types.h" 15#include "common/common_types.h"
15#include "common/hash.h" 16#include "common/hash.h"
16#include "common/math_util.h" 17#include "common/math_util.h"
@@ -27,14 +28,22 @@ namespace OpenGL {
27 28
28class CachedSurface; 29class CachedSurface;
29using Surface = std::shared_ptr<CachedSurface>; 30using Surface = std::shared_ptr<CachedSurface>;
30using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
31 32
32using SurfaceTarget = VideoCore::Surface::SurfaceTarget; 33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
33using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
34using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
35using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
36 38
37struct SurfaceParams { 39struct SurfaceParams {
40 enum class SurfaceClass {
41 Uploaded,
42 RenderTarget,
43 DepthBuffer,
44 Copy,
45 };
46
38 static std::string SurfaceTargetName(SurfaceTarget target) { 47 static std::string SurfaceTargetName(SurfaceTarget target) {
39 switch (target) { 48 switch (target) {
40 case SurfaceTarget::Texture1D: 49 case SurfaceTarget::Texture1D:
@@ -63,7 +72,7 @@ struct SurfaceParams {
63 } 72 }
64 73
65 /// Returns the rectangle corresponding to this surface 74 /// Returns the rectangle corresponding to this surface
66 MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; 75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
67 76
68 /// Returns the total size of this surface in bytes, adjusted for compression 77 /// Returns the total size of this surface in bytes, adjusted for compression
69 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { 78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -100,6 +109,11 @@ struct SurfaceParams {
100 return size; 109 return size;
101 } 110 }
102 111
112 /// Returns true if the parameters constitute a valid rasterizer surface.
113 bool IsValid() const {
114 return gpu_addr && host_ptr && height && width;
115 }
116
103 /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including 117 /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
104 /// mipmaps. 118 /// mipmaps.
105 std::size_t LayerMemorySize() const { 119 std::size_t LayerMemorySize() const {
@@ -132,10 +146,18 @@ struct SurfaceParams {
132 return offset; 146 return offset;
133 } 147 }
134 148
149 std::size_t GetMipmapSingleSize(u32 mip_level) const {
150 return InnerMipmapMemorySize(mip_level, false, is_layered);
151 }
152
135 u32 MipWidth(u32 mip_level) const { 153 u32 MipWidth(u32 mip_level) const {
136 return std::max(1U, width >> mip_level); 154 return std::max(1U, width >> mip_level);
137 } 155 }
138 156
157 u32 MipWidthGobAligned(u32 mip_level) const {
158 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
159 }
160
139 u32 MipHeight(u32 mip_level) const { 161 u32 MipHeight(u32 mip_level) const {
140 return std::max(1U, height >> mip_level); 162 return std::max(1U, height >> mip_level);
141 } 163 }
@@ -160,23 +182,37 @@ struct SurfaceParams {
160 } 182 }
161 183
162 u32 MipBlockDepth(u32 mip_level) const { 184 u32 MipBlockDepth(u32 mip_level) const {
163 if (mip_level == 0) 185 if (mip_level == 0) {
164 return block_depth; 186 return block_depth;
165 if (is_layered) 187 }
188
189 if (is_layered) {
166 return 1; 190 return 1;
167 u32 depth = MipDepth(mip_level); 191 }
192
193 const u32 mip_depth = MipDepth(mip_level);
168 u32 bd = 32; 194 u32 bd = 32;
169 while (bd > 1 && depth * 2 <= bd) { 195 while (bd > 1 && mip_depth * 2 <= bd) {
170 bd >>= 1; 196 bd >>= 1;
171 } 197 }
198
172 if (bd == 32) { 199 if (bd == 32) {
173 u32 bh = MipBlockHeight(mip_level); 200 const u32 bh = MipBlockHeight(mip_level);
174 if (bh >= 4) 201 if (bh >= 4) {
175 return 16; 202 return 16;
203 }
176 } 204 }
205
177 return bd; 206 return bd;
178 } 207 }
179 208
209 u32 RowAlign(u32 mip_level) const {
210 const u32 m_width = MipWidth(mip_level);
211 const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
212 const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
213 return (1U << l2);
214 }
215
180 /// Creates SurfaceParams from a texture configuration 216 /// Creates SurfaceParams from a texture configuration
181 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, 217 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
182 const GLShader::SamplerEntry& entry); 218 const GLShader::SamplerEntry& entry);
@@ -186,7 +222,7 @@ struct SurfaceParams {
186 222
187 /// Creates SurfaceParams for a depth buffer configuration 223 /// Creates SurfaceParams for a depth buffer configuration
188 static SurfaceParams CreateForDepthBuffer( 224 static SurfaceParams CreateForDepthBuffer(
189 u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, 225 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
190 u32 block_width, u32 block_height, u32 block_depth, 226 u32 block_width, u32 block_height, u32 block_depth,
191 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); 227 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
192 228
@@ -208,7 +244,49 @@ struct SurfaceParams {
208 } 244 }
209 245
210 /// Initializes parameters for caching, should be called after everything has been initialized 246 /// Initializes parameters for caching, should be called after everything has been initialized
211 void InitCacheParameters(Tegra::GPUVAddr gpu_addr); 247 void InitCacheParameters(GPUVAddr gpu_addr);
248
249 std::string TargetName() const {
250 switch (target) {
251 case SurfaceTarget::Texture1D:
252 return "1D";
253 case SurfaceTarget::Texture2D:
254 return "2D";
255 case SurfaceTarget::Texture3D:
256 return "3D";
257 case SurfaceTarget::Texture1DArray:
258 return "1DArray";
259 case SurfaceTarget::Texture2DArray:
260 return "2DArray";
261 case SurfaceTarget::TextureCubemap:
262 return "Cube";
263 default:
264 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
265 UNREACHABLE();
266 return fmt::format("TUK({})", static_cast<u32>(target));
267 }
268 }
269
270 std::string ClassName() const {
271 switch (identity) {
272 case SurfaceClass::Uploaded:
273 return "UP";
274 case SurfaceClass::RenderTarget:
275 return "RT";
276 case SurfaceClass::DepthBuffer:
277 return "DB";
278 case SurfaceClass::Copy:
279 return "CP";
280 default:
281 LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
282 UNREACHABLE();
283 return fmt::format("CUK({})", static_cast<u32>(identity));
284 }
285 }
286
287 std::string IdentityString() const {
288 return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
289 }
212 290
213 bool is_tiled; 291 bool is_tiled;
214 u32 block_width; 292 u32 block_width;
@@ -222,13 +300,16 @@ struct SurfaceParams {
222 u32 height; 300 u32 height;
223 u32 depth; 301 u32 depth;
224 u32 unaligned_height; 302 u32 unaligned_height;
303 u32 pitch;
225 SurfaceTarget target; 304 SurfaceTarget target;
305 SurfaceClass identity;
226 u32 max_mip_level; 306 u32 max_mip_level;
227 bool is_layered; 307 bool is_layered;
308 bool is_array;
228 bool srgb_conversion; 309 bool srgb_conversion;
229 // Parameters used for caching 310 // Parameters used for caching
230 VAddr addr; 311 u8* host_ptr;
231 Tegra::GPUVAddr gpu_addr; 312 GPUVAddr gpu_addr;
232 std::size_t size_in_bytes; 313 std::size_t size_in_bytes;
233 std::size_t size_in_bytes_gl; 314 std::size_t size_in_bytes_gl;
234 315
@@ -255,6 +336,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
255 static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { 336 static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
256 SurfaceReserveKey res; 337 SurfaceReserveKey res;
257 res.state = params; 338 res.state = params;
339 res.state.identity = {}; // Ignore the origin of the texture
258 res.state.gpu_addr = {}; // Ignore GPU vaddr in caching 340 res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
259 res.state.rt = {}; // Ignore rt config in caching 341 res.state.rt = {}; // Ignore rt config in caching
260 return res; 342 return res;
@@ -275,16 +357,20 @@ class RasterizerOpenGL;
275 357
276class CachedSurface final : public RasterizerCacheObject { 358class CachedSurface final : public RasterizerCacheObject {
277public: 359public:
278 CachedSurface(const SurfaceParams& params); 360 explicit CachedSurface(const SurfaceParams& params);
279 361
280 VAddr GetAddr() const override { 362 VAddr GetCpuAddr() const override {
281 return params.addr; 363 return cpu_addr;
282 } 364 }
283 365
284 std::size_t GetSizeInBytes() const override { 366 std::size_t GetSizeInBytes() const override {
285 return cached_size_in_bytes; 367 return cached_size_in_bytes;
286 } 368 }
287 369
370 std::size_t GetMemorySize() const {
371 return memory_size;
372 }
373
288 void Flush() override { 374 void Flush() override {
289 FlushGLBuffer(); 375 FlushGLBuffer();
290 } 376 }
@@ -293,31 +379,19 @@ public:
293 return texture; 379 return texture;
294 } 380 }
295 381
296 const OGLTexture& TextureLayer() { 382 const OGLTexture& Texture(bool as_array) {
297 if (params.is_layered) { 383 if (params.is_array == as_array) {
298 return Texture(); 384 return texture;
385 } else {
386 EnsureTextureDiscrepantView();
387 return discrepant_view;
299 } 388 }
300 EnsureTextureView();
301 return texture_view;
302 } 389 }
303 390
304 GLenum Target() const { 391 GLenum Target() const {
305 return gl_target; 392 return gl_target;
306 } 393 }
307 394
308 GLenum TargetLayer() const {
309 using VideoCore::Surface::SurfaceTarget;
310 switch (params.target) {
311 case SurfaceTarget::Texture1D:
312 return GL_TEXTURE_1D_ARRAY;
313 case SurfaceTarget::Texture2D:
314 return GL_TEXTURE_2D_ARRAY;
315 case SurfaceTarget::TextureCubemap:
316 return GL_TEXTURE_CUBE_MAP_ARRAY;
317 }
318 return Target();
319 }
320
321 const SurfaceParams& GetSurfaceParams() const { 395 const SurfaceParams& GetSurfaceParams() const {
322 return params; 396 return params;
323 } 397 }
@@ -329,19 +403,48 @@ public:
329 // Upload data in gl_buffer to this surface's texture 403 // Upload data in gl_buffer to this surface's texture
330 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); 404 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
331 405
406 void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
407 Tegra::Texture::SwizzleSource swizzle_y,
408 Tegra::Texture::SwizzleSource swizzle_z,
409 Tegra::Texture::SwizzleSource swizzle_w);
410
411 void MarkReinterpreted() {
412 reinterpreted = true;
413 }
414
415 bool IsReinterpreted() const {
416 return reinterpreted;
417 }
418
419 void MarkForReload(bool reload) {
420 must_reload = reload;
421 }
422
423 bool MustReload() const {
424 return must_reload;
425 }
426
427 bool IsUploaded() const {
428 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
429 }
430
332private: 431private:
333 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 432 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
334 433
335 void EnsureTextureView(); 434 void EnsureTextureDiscrepantView();
336 435
337 OGLTexture texture; 436 OGLTexture texture;
338 OGLTexture texture_view; 437 OGLTexture discrepant_view;
339 std::vector<std::vector<u8>> gl_buffer; 438 std::vector<std::vector<u8>> gl_buffer;
340 SurfaceParams params{}; 439 SurfaceParams params{};
341 GLenum gl_target{}; 440 GLenum gl_target{};
342 GLenum gl_internal_format{}; 441 GLenum gl_internal_format{};
343 bool gl_is_compressed{};
344 std::size_t cached_size_in_bytes{}; 442 std::size_t cached_size_in_bytes{};
443 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
444 std::size_t memory_size;
445 bool reinterpreted = false;
446 bool must_reload = false;
447 VAddr cpu_addr{};
345}; 448};
346 449
347class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 450class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -359,11 +462,16 @@ public:
359 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); 462 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
360 463
361 /// Tries to find a framebuffer using on the provided CPU address 464 /// Tries to find a framebuffer using on the provided CPU address
362 Surface TryFindFramebufferSurface(VAddr addr) const; 465 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
363 466
364 /// Copies the contents of one surface to another 467 /// Copies the contents of one surface to another
365 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 468 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
366 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); 469 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
470 const Common::Rectangle<u32>& src_rect,
471 const Common::Rectangle<u32>& dst_rect);
472
473 void SignalPreDrawCall();
474 void SignalPostDrawCall();
367 475
368private: 476private:
369 void LoadSurface(const Surface& surface); 477 void LoadSurface(const Surface& surface);
@@ -381,9 +489,17 @@ private:
381 /// Tries to get a reserved surface for the specified parameters 489 /// Tries to get a reserved surface for the specified parameters
382 Surface TryGetReservedSurface(const SurfaceParams& params); 490 Surface TryGetReservedSurface(const SurfaceParams& params);
383 491
492 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
493 // returns true if the reinterpret was successful, false in case it was not.
494 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
495
384 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 496 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
385 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 497 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
386 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 498 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
499 void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
500 void CopySurface(const Surface& src_surface, const Surface& dst_surface,
501 const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
502 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
387 503
388 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 504 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
389 /// previously been used. This is to prevent surfaces from being constantly created and 505 /// previously been used. This is to prevent surfaces from being constantly created and
@@ -393,12 +509,54 @@ private:
393 OGLFramebuffer read_framebuffer; 509 OGLFramebuffer read_framebuffer;
394 OGLFramebuffer draw_framebuffer; 510 OGLFramebuffer draw_framebuffer;
395 511
512 bool texception = false;
513
396 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 514 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
397 /// using the new format. 515 /// using the new format.
398 OGLBuffer copy_pbo; 516 OGLBuffer copy_pbo;
399 517
400 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 518 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
519 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
401 Surface last_depth_buffer; 520 Surface last_depth_buffer;
521
522 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
523 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
524
525 static auto GetReinterpretInterval(const Surface& object) {
526 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
527 object->GetCacheAddr() + object->GetMemorySize() - 1);
528 }
529
530 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
531 SurfaceIntervalCache reinterpreted_surfaces;
532
533 void RegisterReinterpretSurface(Surface reinterpret_surface) {
534 auto interval = GetReinterpretInterval(reinterpret_surface);
535 reinterpreted_surfaces.insert({interval, reinterpret_surface});
536 reinterpret_surface->MarkReinterpreted();
537 }
538
539 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
540 const SurfaceInterval interval{addr};
541 for (auto& pair :
542 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
543 return pair.second;
544 }
545 return nullptr;
546 }
547
548 void Register(const Surface& object) override {
549 RasterizerCache<Surface>::Register(object);
550 }
551
552 /// Unregisters an object from the cache
553 void Unregister(const Surface& object) override {
554 if (object->IsReinterpreted()) {
555 auto interval = GetReinterpretInterval(object);
556 reinterpreted_surfaces.erase(interval);
557 }
558 RasterizerCache<Surface>::Unregister(object);
559 }
402}; 560};
403 561
404} // namespace OpenGL 562} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 1da744158..bfe666a73 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
15 15
16namespace OpenGL { 16namespace OpenGL {
17 17
18void OGLTexture::Create() { 18void OGLTexture::Create(GLenum target) {
19 if (handle != 0) 19 if (handle != 0)
20 return; 20 return;
21 21
22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
23 glGenTextures(1, &handle); 23 glCreateTextures(target, 1, &handle);
24} 24}
25 25
26void OGLTexture::Release() { 26void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
71} 71}
72 72
73void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, 73void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
74 const char* frag_shader, bool separable_program) { 74 const char* frag_shader, bool separable_program,
75 bool hint_retrievable) {
75 OGLShader vert, geo, frag; 76 OGLShader vert, geo, frag;
76 if (vert_shader) 77 if (vert_shader)
77 vert.Create(vert_shader, GL_VERTEX_SHADER); 78 vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
81 frag.Create(frag_shader, GL_FRAGMENT_SHADER); 82 frag.Create(frag_shader, GL_FRAGMENT_SHADER);
82 83
83 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 84 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
84 Create(separable_program, vert.handle, geo.handle, frag.handle); 85 Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
85} 86}
86 87
87void OGLProgram::Release() { 88void OGLProgram::Release() {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index e33f1e973..fbb93ee49 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -28,7 +28,7 @@ public:
28 } 28 }
29 29
30 /// Creates a new internal OpenGL resource and stores the handle 30 /// Creates a new internal OpenGL resource and stores the handle
31 void Create(); 31 void Create(GLenum target);
32 32
33 /// Deletes the internal OpenGL resource 33 /// Deletes the internal OpenGL resource
34 void Release(); 34 void Release();
@@ -101,15 +101,15 @@ public:
101 } 101 }
102 102
103 template <typename... T> 103 template <typename... T>
104 void Create(bool separable_program, T... shaders) { 104 void Create(bool separable_program, bool hint_retrievable, T... shaders) {
105 if (handle != 0) 105 if (handle != 0)
106 return; 106 return;
107 handle = GLShader::LoadProgram(separable_program, shaders...); 107 handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
108 } 108 }
109 109
110 /// Creates a new internal OpenGL resource and stores the handle 110 /// Creates a new internal OpenGL resource and stores the handle
111 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, 111 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
112 bool separable_program = false); 112 bool separable_program = false, bool hint_retrievable = false);
113 113
114 /// Deletes the internal OpenGL resource 114 /// Deletes the internal OpenGL resource
115 void Release(); 115 void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c785fffa3..99f67494c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,200 +6,515 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/hash.h" 7#include "common/hash.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/memory.h"
10#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/memory_manager.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_cache.h" 12#include "video_core/renderer_opengl/gl_shader_cache.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h" 13#include "video_core/renderer_opengl/gl_shader_decompiler.h"
14#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
14#include "video_core/renderer_opengl/utils.h" 15#include "video_core/renderer_opengl/utils.h"
16#include "video_core/shader/shader_ir.h"
15 17
16namespace OpenGL { 18namespace OpenGL {
17 19
20using VideoCommon::Shader::ProgramCode;
21
22// One UBO is always reserved for emulation values
23constexpr u32 RESERVED_UBOS = 1;
24
25struct UnspecializedShader {
26 std::string code;
27 GLShader::ShaderEntries entries;
28 Maxwell::ShaderProgram program_type;
29};
30
31namespace {
32
18/// Gets the address for the specified shader stage program 33/// Gets the address for the specified shader stage program
19static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 34GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
20 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 35 const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
21 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; 36 const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
22 return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + 37 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
23 shader_config.offset);
24} 38}
25 39
26/// Gets the shader program code from memory for the specified address 40/// Gets the shader program code from memory for the specified address
27static GLShader::ProgramCode GetShaderCode(VAddr addr) { 41ProgramCode GetShaderCode(const u8* host_ptr) {
28 GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); 42 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
29 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 43 ASSERT_OR_EXECUTE(host_ptr != nullptr, {
44 std::fill(program_code.begin(), program_code.end(), 0);
45 return program_code;
46 });
47 std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
30 return program_code; 48 return program_code;
31} 49}
32 50
33/// Helper function to set shader uniform block bindings for a single shader stage 51/// Gets the shader type from a Maxwell program type
34static void SetShaderUniformBlockBinding(GLuint shader, const char* name, 52constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
35 Maxwell::ShaderStage binding, std::size_t expected_size) { 53 switch (program_type) {
36 const GLuint ub_index = glGetUniformBlockIndex(shader, name); 54 case Maxwell::ShaderProgram::VertexA:
37 if (ub_index == GL_INVALID_INDEX) { 55 case Maxwell::ShaderProgram::VertexB:
38 return; 56 return GL_VERTEX_SHADER;
57 case Maxwell::ShaderProgram::Geometry:
58 return GL_GEOMETRY_SHADER;
59 case Maxwell::ShaderProgram::Fragment:
60 return GL_FRAGMENT_SHADER;
61 default:
62 return GL_NONE;
39 } 63 }
64}
40 65
41 GLint ub_size = 0; 66/// Gets if the current instruction offset is a scheduler instruction
42 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); 67constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
43 ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size, 68 // Sched instructions appear once every 4 instructions.
44 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); 69 constexpr std::size_t SchedPeriod = 4;
45 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); 70 const std::size_t absolute_offset = offset - main_offset;
71 return (absolute_offset % SchedPeriod) == 0;
46} 72}
47 73
48/// Sets shader uniform block bindings for an entire shader program 74/// Describes primitive behavior on geometry shaders
49static void SetShaderUniformBlockBindings(GLuint shader) { 75constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
50 SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex, 76 switch (primitive_mode) {
51 sizeof(GLShader::MaxwellUniformData)); 77 case GL_POINTS:
52 SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry, 78 return {"points", "Points", 1};
53 sizeof(GLShader::MaxwellUniformData)); 79 case GL_LINES:
54 SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment, 80 case GL_LINE_STRIP:
55 sizeof(GLShader::MaxwellUniformData)); 81 return {"lines", "Lines", 2};
82 case GL_LINES_ADJACENCY:
83 case GL_LINE_STRIP_ADJACENCY:
84 return {"lines_adjacency", "LinesAdj", 4};
85 case GL_TRIANGLES:
86 case GL_TRIANGLE_STRIP:
87 case GL_TRIANGLE_FAN:
88 return {"triangles", "Triangles", 3};
89 case GL_TRIANGLES_ADJACENCY:
90 case GL_TRIANGLE_STRIP_ADJACENCY:
91 return {"triangles_adjacency", "TrianglesAdj", 6};
92 default:
93 return {"points", "Invalid", 1};
94 }
56} 95}
57 96
58CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) 97/// Calculates the size of a program stream
59 : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { 98std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
99 constexpr std::size_t start_offset = 10;
100 std::size_t offset = start_offset;
101 std::size_t size = start_offset * sizeof(u64);
102 while (offset < program.size()) {
103 const u64 instruction = program[offset];
104 if (!IsSchedInstruction(offset, start_offset)) {
105 if (instruction == 0 || (instruction >> 52) == 0x50b) {
106 // End on Maxwell's "nop" instruction
107 break;
108 }
109 }
110 size += sizeof(u64);
111 offset++;
112 }
113 // The last instruction is included in the program size
114 return std::min(size + sizeof(u64), program.size() * sizeof(u64));
115}
116
117/// Hashes one (or two) program streams
118u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
119 const ProgramCode& code_b) {
120 u64 unique_identifier =
121 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
122 if (program_type != Maxwell::ShaderProgram::VertexA) {
123 return unique_identifier;
124 }
125 // VertexA programs include two programs
60 126
61 GLShader::ProgramResult program_result; 127 std::size_t seed = 0;
62 GLenum gl_type{}; 128 boost::hash_combine(seed, unique_identifier);
63 129
64 switch (program_type) { 130 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
65 case Maxwell::ShaderProgram::VertexA: 131 CalculateProgramSize(code_b));
132 boost::hash_combine(seed, identifier_b);
133 return static_cast<u64>(seed);
134}
135
136/// Creates an unspecialized program from code streams
137GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
138 ProgramCode program_code_b) {
139 GLShader::ShaderSetup setup(program_code);
140 if (program_type == Maxwell::ShaderProgram::VertexA) {
66 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 141 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
67 // Conventional HW does not support this, so we combine VertexA and VertexB into one 142 // Conventional HW does not support this, so we combine VertexA and VertexB into one
68 // stage here. 143 // stage here.
69 setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); 144 setup.SetProgramB(program_code_b);
145 }
146 setup.program.unique_identifier =
147 GetUniqueIdentifier(program_type, program_code, program_code_b);
148
149 switch (program_type) {
150 case Maxwell::ShaderProgram::VertexA:
70 case Maxwell::ShaderProgram::VertexB: 151 case Maxwell::ShaderProgram::VertexB:
71 CalculateProperties(); 152 return GLShader::GenerateVertexShader(setup);
72 program_result = GLShader::GenerateVertexShader(setup);
73 gl_type = GL_VERTEX_SHADER;
74 break;
75 case Maxwell::ShaderProgram::Geometry: 153 case Maxwell::ShaderProgram::Geometry:
76 CalculateProperties(); 154 return GLShader::GenerateGeometryShader(setup);
77 program_result = GLShader::GenerateGeometryShader(setup);
78 gl_type = GL_GEOMETRY_SHADER;
79 break;
80 case Maxwell::ShaderProgram::Fragment: 155 case Maxwell::ShaderProgram::Fragment:
81 CalculateProperties(); 156 return GLShader::GenerateFragmentShader(setup);
82 program_result = GLShader::GenerateFragmentShader(setup);
83 gl_type = GL_FRAGMENT_SHADER;
84 break;
85 default: 157 default:
86 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 158 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
87 UNREACHABLE(); 159 UNREACHABLE();
160 return {};
161 }
162}
163
164CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
165 Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
166 GLenum primitive_mode, bool hint_retrievable = false) {
167 std::string source = "#version 430 core\n";
168 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
169
170 for (const auto& cbuf : entries.const_buffers) {
171 source +=
172 fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
173 }
174 for (const auto& gmem : entries.global_memory_entries) {
175 source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
176 gmem.GetCbufOffset(), base_bindings.gmem++);
177 }
178 for (const auto& sampler : entries.samplers) {
179 source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
180 base_bindings.sampler++);
181 }
182
183 if (program_type == Maxwell::ShaderProgram::Geometry) {
184 const auto [glsl_topology, debug_name, max_vertices] =
185 GetPrimitiveDescription(primitive_mode);
186
187 source += "layout (" + std::string(glsl_topology) + ") in;\n";
188 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
189 }
190
191 source += code;
192
193 OGLShader shader;
194 shader.Create(source.c_str(), GetShaderType(program_type));
195
196 auto program = std::make_shared<OGLProgram>();
197 program->Create(true, hint_retrievable, shader.handle);
198 return program;
199}
200
201std::set<GLenum> GetSupportedFormats() {
202 std::set<GLenum> supported_formats;
203
204 GLint num_formats{};
205 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
206
207 std::vector<GLint> formats(num_formats);
208 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
209
210 for (const GLint format : formats)
211 supported_formats.insert(static_cast<GLenum>(format));
212 return supported_formats;
213}
214
215} // namespace
216
217CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
218 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
219 const PrecompiledPrograms& precompiled_programs,
220 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
221 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
222 unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
223 precompiled_programs{precompiled_programs} {
224
225 const std::size_t code_size = CalculateProgramSize(program_code);
226 const std::size_t code_size_b =
227 program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
228
229 GLShader::ProgramResult program_result =
230 CreateProgram(program_type, program_code, program_code_b);
231 if (program_result.first.empty()) {
232 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
88 return; 233 return;
89 } 234 }
90 235
236 code = program_result.first;
91 entries = program_result.second; 237 entries = program_result.second;
92 shader_length = entries.shader_length; 238 shader_length = entries.shader_length;
93 239
94 if (program_type != Maxwell::ShaderProgram::Geometry) { 240 const ShaderDiskCacheRaw raw(unique_identifier, program_type,
95 OGLShader shader; 241 static_cast<u32>(code_size / sizeof(u64)),
96 shader.Create(program_result.first.c_str(), gl_type); 242 static_cast<u32>(code_size_b / sizeof(u64)),
97 program.Create(true, shader.handle); 243 std::move(program_code), std::move(program_code_b));
98 SetShaderUniformBlockBindings(program.handle); 244 disk_cache.SaveRaw(raw);
99 LabelGLObject(GL_PROGRAM, program.handle, addr);
100 } else {
101 // Store shader's code to lazily build it on draw
102 geometry_programs.code = program_result.first;
103 }
104} 245}
105 246
106GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { 247CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
107 const auto search{resource_cache.find(buffer.GetHash())}; 248 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
108 if (search == resource_cache.end()) { 249 const PrecompiledPrograms& precompiled_programs,
109 const GLuint index{ 250 GLShader::ProgramResult result, u8* host_ptr)
110 glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; 251 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
111 resource_cache[buffer.GetHash()] = index; 252 program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
112 return index; 253 precompiled_programs} {
113 }
114 254
115 return search->second; 255 code = std::move(result.first);
256 entries = result.second;
257 shader_length = entries.shader_length;
116} 258}
117 259
118GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { 260std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
119 const auto search{uniform_cache.find(sampler.GetHash())}; 261 BaseBindings base_bindings) {
120 if (search == uniform_cache.end()) { 262 GLuint handle{};
121 const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())}; 263 if (program_type == Maxwell::ShaderProgram::Geometry) {
122 uniform_cache[sampler.GetHash()] = index; 264 handle = GetGeometryShader(primitive_mode, base_bindings);
123 return index; 265 } else {
266 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
267 auto& program = entry->second;
268 if (is_cache_miss) {
269 program = TryLoadProgram(primitive_mode, base_bindings);
270 if (!program) {
271 program =
272 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
273 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
274 }
275
276 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
277 }
278
279 handle = program->handle;
124 } 280 }
125 281
126 return search->second; 282 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
283 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
284 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
285
286 return {handle, base_bindings};
127} 287}
128 288
129GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, 289GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
130 const std::string& glsl_topology, u32 max_vertices, 290 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
131 const std::string& debug_name) { 291 auto& programs = entry->second;
132 if (target_program.handle != 0) { 292
133 return target_program.handle; 293 switch (primitive_mode) {
294 case GL_POINTS:
295 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
296 case GL_LINES:
297 case GL_LINE_STRIP:
298 return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
299 case GL_LINES_ADJACENCY:
300 case GL_LINE_STRIP_ADJACENCY:
301 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
302 case GL_TRIANGLES:
303 case GL_TRIANGLE_STRIP:
304 case GL_TRIANGLE_FAN:
305 return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
306 case GL_TRIANGLES_ADJACENCY:
307 case GL_TRIANGLE_STRIP_ADJACENCY:
308 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
309 default:
310 UNREACHABLE_MSG("Unknown primitive mode.");
311 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
134 } 312 }
135 std::string source = "#version 430 core\n"; 313}
136 source += "layout (" + glsl_topology + ") in;\n";
137 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
138 source += geometry_programs.code;
139 314
140 OGLShader shader; 315GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
141 shader.Create(source.c_str(), GL_GEOMETRY_SHADER); 316 GLenum primitive_mode) {
142 target_program.Create(true, shader.handle); 317 if (target_program) {
143 SetShaderUniformBlockBindings(target_program.handle); 318 return target_program->handle;
144 LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); 319 }
145 return target_program.handle; 320 const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
321 target_program = TryLoadProgram(primitive_mode, base_bindings);
322 if (!target_program) {
323 target_program =
324 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
325 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
326 }
327
328 LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
329
330 return target_program->handle;
146}; 331};
147 332
148static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { 333CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
149 // sched instructions appear once every 4 instructions. 334 BaseBindings base_bindings) const {
150 static constexpr std::size_t SchedPeriod = 4; 335 const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
151 const std::size_t absolute_offset = offset - main_offset; 336 if (found == precompiled_programs.end()) {
152 return (absolute_offset % SchedPeriod) == 0; 337 return {};
338 }
339 return found->second;
153} 340}
154 341
155static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 342ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
156 constexpr std::size_t start_offset = 10; 343 BaseBindings base_bindings) const {
157 std::size_t offset = start_offset; 344 return {unique_identifier, base_bindings, primitive_mode};
158 std::size_t size = start_offset * sizeof(u64); 345}
159 while (offset < program.size()) { 346
160 const u64 inst = program[offset]; 347ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
161 if (!IsSchedInstruction(offset, start_offset)) { 348 : RasterizerCache{rasterizer}, disk_cache{system} {}
162 if (inst == 0 || (inst >> 52) == 0x50b) { 349
163 break; 350void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
351 const VideoCore::DiskResourceLoadCallback& callback) {
352 const auto transferable = disk_cache.LoadTransferable();
353 if (!transferable) {
354 return;
355 }
356 const auto [raws, usages] = *transferable;
357
358 auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
359
360 const auto supported_formats{GetSupportedFormats()};
361 const auto unspecialized{
362 GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
363 if (stop_loading)
364 return;
365
366 // Build shaders
367 if (callback)
368 callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
369 for (std::size_t i = 0; i < usages.size(); ++i) {
370 if (stop_loading)
371 return;
372
373 const auto& usage{usages[i]};
374 LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
375 i + 1, usages.size());
376
377 const auto& unspec{unspecialized.at(usage.unique_identifier)};
378 const auto dump_it = dumps.find(usage);
379
380 CachedProgram shader;
381 if (dump_it != dumps.end()) {
382 // If the shader is dumped, attempt to load it with
383 shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
384 if (!shader) {
385 // Invalidate the precompiled cache if a shader dumped shader was rejected
386 disk_cache.InvalidatePrecompiled();
387 dumps.clear();
164 } 388 }
165 } 389 }
166 size += sizeof(inst); 390 if (!shader) {
167 offset++; 391 shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
392 usage.bindings, usage.primitive, true);
393 }
394 precompiled_programs.insert({usage, std::move(shader)});
395
396 if (callback)
397 callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
398 }
399
400 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
401 // precompiling them
402
403 for (std::size_t i = 0; i < usages.size(); ++i) {
404 const auto& usage{usages[i]};
405 if (dumps.find(usage) == dumps.end()) {
406 const auto& program = precompiled_programs.at(usage);
407 disk_cache.SaveDump(usage, program->handle);
408 }
168 } 409 }
169 return size;
170} 410}
171 411
172void CachedShader::CalculateProperties() { 412CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
173 setup.program.real_size = CalculateProgramSize(setup.program.code); 413 const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
174 setup.program.real_size_b = 0; 414
175 setup.program.unique_identifier = Common::CityHash64( 415 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
176 reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size); 416 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
177 if (program_type == Maxwell::ShaderProgram::VertexA) { 417 return {};
178 std::size_t seed = 0; 418 }
179 boost::hash_combine(seed, setup.program.unique_identifier); 419
180 setup.program.real_size_b = CalculateProgramSize(setup.program.code_b); 420 CachedProgram shader = std::make_shared<OGLProgram>();
181 const u64 identifier_b = Common::CityHash64( 421 shader->handle = glCreateProgram();
182 reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b); 422 glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
183 boost::hash_combine(seed, identifier_b); 423 glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
184 setup.program.unique_identifier = static_cast<u64>(seed); 424 static_cast<GLsizei>(dump.binary.size()));
425
426 GLint link_status{};
427 glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
428 if (link_status == GL_FALSE) {
429 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
430 return {};
185 } 431 }
432
433 return shader;
186} 434}
187 435
188ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} 436std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
437 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
438 const std::vector<ShaderDiskCacheRaw>& raws,
439 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
440 std::unordered_map<u64, UnspecializedShader> unspecialized;
441
442 if (callback)
443 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
444
445 for (std::size_t i = 0; i < raws.size(); ++i) {
446 if (stop_loading)
447 return {};
448
449 const auto& raw{raws[i]};
450 const u64 unique_identifier = raw.GetUniqueIdentifier();
451 const u64 calculated_hash =
452 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
453 if (unique_identifier != calculated_hash) {
454 LOG_ERROR(
455 Render_OpenGL,
456 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
457 raw.GetUniqueIdentifier(), calculated_hash);
458 disk_cache.InvalidateTransferable();
459 return {};
460 }
461
462 GLShader::ProgramResult result;
463 if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
464 // If it's stored in the precompiled file, avoid decompiling it here
465 const auto& stored_decompiled{it->second};
466 result = {stored_decompiled.code, stored_decompiled.entries};
467 } else {
468 // Otherwise decompile the shader at boot and save the result to the decompiled file
469 result =
470 CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
471 disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
472 }
473
474 precompiled_shaders.insert({unique_identifier, result});
475
476 unspecialized.insert(
477 {raw.GetUniqueIdentifier(),
478 {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
479
480 if (callback)
481 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
482 }
483 return unspecialized;
484}
189 485
190Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 486Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
191 if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { 487 if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
192 return last_shaders[static_cast<u32>(program)]; 488 return last_shaders[static_cast<u32>(program)];
193 } 489 }
194 490
195 const VAddr program_addr{GetShaderAddress(program)}; 491 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
492 const GPUVAddr program_addr{GetShaderAddress(program)};
196 493
197 // Look up shader in the cache based on address 494 // Look up shader in the cache based on address
198 Shader shader{TryGet(program_addr)}; 495 const auto& host_ptr{memory_manager.GetPointer(program_addr)};
496 Shader shader{TryGet(host_ptr)};
199 497
200 if (!shader) { 498 if (!shader) {
201 // No shader found - create a new one 499 // No shader found - create a new one
202 shader = std::make_shared<CachedShader>(program_addr, program); 500 ProgramCode program_code{GetShaderCode(host_ptr)};
501 ProgramCode program_code_b;
502 if (program == Maxwell::ShaderProgram::VertexA) {
503 program_code_b = GetShaderCode(
504 memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
505 }
506 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
507 const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
508 const auto found = precompiled_shaders.find(unique_identifier);
509 if (found != precompiled_shaders.end()) {
510 shader =
511 std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
512 precompiled_programs, found->second, host_ptr);
513 } else {
514 shader = std::make_shared<CachedShader>(
515 cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
516 std::move(program_code), std::move(program_code_b), host_ptr);
517 }
203 Register(shader); 518 Register(shader);
204 } 519 }
205 520
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 768747968..0cf8e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,29 +5,51 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <map> 8#include <atomic>
9#include <memory> 9#include <memory>
10#include <set>
11#include <tuple>
12#include <unordered_map>
13#include <vector>
14
15#include <glad/glad.h>
10 16
11#include "common/assert.h"
12#include "common/common_types.h" 17#include "common/common_types.h"
13#include "video_core/rasterizer_cache.h" 18#include "video_core/rasterizer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_shader_gen.h" 20#include "video_core/renderer_opengl/gl_shader_decompiler.h"
21#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
22
23namespace Core {
24class System;
25} // namespace Core
16 26
17namespace OpenGL { 27namespace OpenGL {
18 28
19class CachedShader; 29class CachedShader;
20class RasterizerOpenGL; 30class RasterizerOpenGL;
31struct UnspecializedShader;
21 32
22using Shader = std::shared_ptr<CachedShader>; 33using Shader = std::shared_ptr<CachedShader>;
34using CachedProgram = std::shared_ptr<OGLProgram>;
23using Maxwell = Tegra::Engines::Maxwell3D::Regs; 35using Maxwell = Tegra::Engines::Maxwell3D::Regs;
36using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
37using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
24 38
25class CachedShader final : public RasterizerCacheObject { 39class CachedShader final : public RasterizerCacheObject {
26public: 40public:
27 CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); 41 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
28 42 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
29 VAddr GetAddr() const override { 43 const PrecompiledPrograms& precompiled_programs,
30 return addr; 44 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
45
46 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
47 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
48 const PrecompiledPrograms& precompiled_programs,
49 GLShader::ProgramResult result, u8* host_ptr);
50
51 VAddr GetCpuAddr() const override {
52 return cpu_addr;
31 } 53 }
32 54
33 std::size_t GetSizeInBytes() const override { 55 std::size_t GetSizeInBytes() const override {
@@ -43,82 +65,76 @@ public:
43 } 65 }
44 66
45 /// Gets the GL program handle for the shader 67 /// Gets the GL program handle for the shader
46 GLuint GetProgramHandle(GLenum primitive_mode) { 68 std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
47 if (program_type != Maxwell::ShaderProgram::Geometry) { 69 BaseBindings base_bindings);
48 return program.handle;
49 }
50 switch (primitive_mode) {
51 case GL_POINTS:
52 return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
53 case GL_LINES:
54 case GL_LINE_STRIP:
55 return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
56 case GL_LINES_ADJACENCY:
57 case GL_LINE_STRIP_ADJACENCY:
58 return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
59 "ShaderLinesAdjacency");
60 case GL_TRIANGLES:
61 case GL_TRIANGLE_STRIP:
62 case GL_TRIANGLE_FAN:
63 return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
64 "ShaderTriangles");
65 case GL_TRIANGLES_ADJACENCY:
66 case GL_TRIANGLE_STRIP_ADJACENCY:
67 return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
68 6, "ShaderTrianglesAdjacency");
69 default:
70 UNREACHABLE_MSG("Unknown primitive mode.");
71 return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
72 }
73 }
74 70
75 /// Gets the GL program resource location for the specified resource, caching as needed 71private:
76 GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); 72 // Geometry programs. These are needed because GLSL needs an input topology but it's not
73 // declared by the hardware. Workaround this issue by generating a different shader per input
74 // topology class.
75 struct GeometryPrograms {
76 CachedProgram points;
77 CachedProgram lines;
78 CachedProgram lines_adjacency;
79 CachedProgram triangles;
80 CachedProgram triangles_adjacency;
81 };
77 82
78 /// Gets the GL uniform location for the specified resource, caching as needed 83 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
79 GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
80 84
81private:
82 /// Generates a geometry shader or returns one that already exists. 85 /// Generates a geometry shader or returns one that already exists.
83 GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, 86 GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
84 u32 max_vertices, const std::string& debug_name); 87 GLenum primitive_mode);
88
89 CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
85 90
86 void CalculateProperties(); 91 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
87 92
88 VAddr addr; 93 u8* host_ptr{};
89 std::size_t shader_length; 94 VAddr cpu_addr{};
90 Maxwell::ShaderProgram program_type; 95 u64 unique_identifier{};
91 GLShader::ShaderSetup setup; 96 Maxwell::ShaderProgram program_type{};
97 ShaderDiskCacheOpenGL& disk_cache;
98 const PrecompiledPrograms& precompiled_programs;
99
100 std::size_t shader_length{};
92 GLShader::ShaderEntries entries; 101 GLShader::ShaderEntries entries;
93 102
94 // Non-geometry program. 103 std::string code;
95 OGLProgram program;
96 104
97 // Geometry programs. These are needed because GLSL needs an input topology but it's not 105 std::unordered_map<BaseBindings, CachedProgram> programs;
98 // declared by the hardware. Workaround this issue by generating a different shader per input 106 std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
99 // topology class. 107
100 struct { 108 std::unordered_map<u32, GLuint> cbuf_resource_cache;
101 std::string code; 109 std::unordered_map<u32, GLuint> gmem_resource_cache;
102 OGLProgram points; 110 std::unordered_map<u32, GLint> uniform_cache;
103 OGLProgram lines;
104 OGLProgram lines_adjacency;
105 OGLProgram triangles;
106 OGLProgram triangles_adjacency;
107 } geometry_programs;
108
109 std::map<u32, GLuint> resource_cache;
110 std::map<u32, GLint> uniform_cache;
111}; 111};
112 112
113class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 113class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
114public: 114public:
115 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer); 115 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
116
117 /// Loads disk cache for the current game
118 void LoadDiskCache(const std::atomic_bool& stop_loading,
119 const VideoCore::DiskResourceLoadCallback& callback);
116 120
117 /// Gets the current specified shader stage program 121 /// Gets the current specified shader stage program
118 Shader GetStageProgram(Maxwell::ShaderProgram program); 122 Shader GetStageProgram(Maxwell::ShaderProgram program);
119 123
120private: 124private:
125 std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
126 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
127 const std::vector<ShaderDiskCacheRaw>& raws,
128 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
129
130 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
131 const std::set<GLenum>& supported_formats);
132
121 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 133 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
134
135 ShaderDiskCacheOpenGL disk_cache;
136 PrecompiledShaders precompiled_shaders;
137 PrecompiledPrograms precompiled_programs;
122}; 138};
123 139
124} // namespace OpenGL 140} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1bb09e61b..28e490b3c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2,247 +2,51 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <map> 5#include <array>
6#include <optional>
7#include <set>
8#include <string> 6#include <string>
9#include <string_view> 7#include <string_view>
10#include <unordered_set> 8#include <utility>
9#include <variant>
10#include <vector>
11 11
12#include <fmt/format.h> 12#include <fmt/format.h>
13 13
14#include "common/alignment.h"
14#include "common/assert.h" 15#include "common/assert.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "video_core/engines/shader_bytecode.h" 17#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_header.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 18#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_shader_decompiler.h" 19#include "video_core/renderer_opengl/gl_shader_decompiler.h"
20#include "video_core/shader/shader_ir.h"
20 21
21namespace OpenGL::GLShader::Decompiler { 22namespace OpenGL::GLShader {
23
24namespace {
22 25
23using Tegra::Shader::Attribute; 26using Tegra::Shader::Attribute;
24using Tegra::Shader::Instruction; 27using Tegra::Shader::AttributeUse;
25using Tegra::Shader::LogicOperation; 28using Tegra::Shader::Header;
26using Tegra::Shader::OpCode; 29using Tegra::Shader::IpaInterpMode;
30using Tegra::Shader::IpaMode;
31using Tegra::Shader::IpaSampleMode;
27using Tegra::Shader::Register; 32using Tegra::Shader::Register;
28using Tegra::Shader::Sampler; 33using namespace VideoCommon::Shader;
29using Tegra::Shader::SubOp;
30
31constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
32constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
33
34constexpr u32 MAX_GEOMETRY_BUFFERS = 6;
35constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested
36
37static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag",
38 "overflow_flag"};
39
40enum class InternalFlag : u64 {
41 ZeroFlag = 0,
42 SignFlag = 1,
43 CarryFlag = 2,
44 OverflowFlag = 3,
45 Amount
46};
47
48class DecompileFail : public std::runtime_error {
49public:
50 using std::runtime_error::runtime_error;
51};
52
53/// Generates code to use for a swizzle operation.
54static std::string GetSwizzle(u64 elem) {
55 ASSERT(elem <= 3);
56 std::string swizzle = ".";
57 swizzle += "xyzw"[elem];
58 return swizzle;
59}
60
61/// Translate topology
62static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
63 switch (topology) {
64 case Tegra::Shader::OutputTopology::PointList:
65 return "points";
66 case Tegra::Shader::OutputTopology::LineStrip:
67 return "line_strip";
68 case Tegra::Shader::OutputTopology::TriangleStrip:
69 return "triangle_strip";
70 default:
71 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
72 return "points";
73 }
74}
75
76/// Describes the behaviour of code path of a given entry point and a return point.
77enum class ExitMethod {
78 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
79 AlwaysReturn, ///< All code paths reach the return point.
80 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
81 AlwaysEnd, ///< All code paths reach a END instruction.
82};
83 34
84/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. 35using Maxwell = Tegra::Engines::Maxwell3D::Regs;
85struct Subroutine { 36using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
86 /// Generates a name suitable for GLSL source code. 37using Operation = const OperationNode&;
87 std::string GetName() const {
88 return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
89 }
90
91 u32 begin; ///< Entry point of the subroutine.
92 u32 end; ///< Return point of the subroutine.
93 const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
94 ExitMethod exit_method; ///< Exit method of the subroutine.
95 std::set<u32> labels; ///< Addresses refereced by JMP instructions.
96
97 bool operator<(const Subroutine& rhs) const {
98 return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
99 }
100};
101
102/// Analyzes shader code and produces a set of subroutines.
103class ControlFlowAnalyzer {
104public:
105 ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
106 : program_code(program_code), shader_coverage_begin(main_offset),
107 shader_coverage_end(main_offset + 1) {
108
109 // Recursively finds all subroutines.
110 const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
111 if (program_main.exit_method != ExitMethod::AlwaysEnd)
112 throw DecompileFail("Program does not always end");
113 }
114
115 std::set<Subroutine> GetSubroutines() {
116 return std::move(subroutines);
117 }
118
119 std::size_t GetShaderLength() const {
120 return shader_coverage_end * sizeof(u64);
121 }
122
123private:
124 const ProgramCode& program_code;
125 std::set<Subroutine> subroutines;
126 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
127 u32 shader_coverage_begin;
128 u32 shader_coverage_end;
129
130 /// Adds and analyzes a new subroutine if it is not added yet.
131 const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
132 Subroutine subroutine{begin, end, suffix, ExitMethod::Undetermined, {}};
133
134 const auto iter = subroutines.find(subroutine);
135 if (iter != subroutines.end()) {
136 return *iter;
137 }
138 38
139 subroutine.exit_method = Scan(begin, end, subroutine.labels); 39enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
140 if (subroutine.exit_method == ExitMethod::Undetermined) {
141 throw DecompileFail("Recursive function detected");
142 }
143
144 return *subroutines.insert(std::move(subroutine)).first;
145 }
146 40
147 /// Merges exit method of two parallel branches. 41struct TextureAoffi {};
148 static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { 42using TextureArgument = std::pair<Type, Node>;
149 if (a == ExitMethod::Undetermined) { 43using TextureIR = std::variant<TextureAoffi, TextureArgument>;
150 return b;
151 }
152 if (b == ExitMethod::Undetermined) {
153 return a;
154 }
155 if (a == b) {
156 return a;
157 }
158 return ExitMethod::Conditional;
159 }
160
161 /// Scans a range of code for labels and determines the exit method.
162 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
163 const auto [iter, inserted] =
164 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
165 ExitMethod& exit_method = iter->second;
166 if (!inserted)
167 return exit_method;
168
169 for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
170 shader_coverage_begin = std::min(shader_coverage_begin, offset);
171 shader_coverage_end = std::max(shader_coverage_end, offset + 1);
172
173 const Instruction instr = {program_code[offset]};
174 if (const auto opcode = OpCode::Decode(instr)) {
175 switch (opcode->get().GetId()) {
176 case OpCode::Id::EXIT: {
177 // The EXIT instruction can be predicated, which means that the shader can
178 // conditionally end on this instruction. We have to consider the case where the
179 // condition is not met and check the exit method of that other basic block.
180 using Tegra::Shader::Pred;
181 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
182 return exit_method = ExitMethod::AlwaysEnd;
183 } else {
184 const ExitMethod not_met = Scan(offset + 1, end, labels);
185 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
186 }
187 }
188 case OpCode::Id::BRA: {
189 const u32 target = offset + instr.bra.GetBranchTarget();
190 labels.insert(target);
191 const ExitMethod no_jmp = Scan(offset + 1, end, labels);
192 const ExitMethod jmp = Scan(target, end, labels);
193 return exit_method = ParallelExit(no_jmp, jmp);
194 }
195 case OpCode::Id::SSY:
196 case OpCode::Id::PBK: {
197 // The SSY and PBK use a similar encoding as the BRA instruction.
198 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
199 "Constant buffer branching is not supported");
200 const u32 target = offset + instr.bra.GetBranchTarget();
201 labels.insert(target);
202 // Continue scanning for an exit method.
203 break;
204 }
205 }
206 }
207 }
208 return exit_method = ExitMethod::AlwaysReturn;
209 }
210};
211 44
212template <typename T> 45enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
213class ShaderScopedScope { 46constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
214public: 47 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
215 explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) 48constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
216 : writer(writer), end_expr(std::move(end_expr)) { 49 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
217
218 if (begin_expr.empty()) {
219 writer.AddLine('{');
220 } else {
221 writer.AddExpression(begin_expr);
222 writer.AddLine(" {");
223 }
224 ++writer.scope;
225 }
226
227 ShaderScopedScope(const ShaderScopedScope&) = delete;
228
229 ~ShaderScopedScope() {
230 --writer.scope;
231 if (end_expr.empty()) {
232 writer.AddLine('}');
233 } else {
234 writer.AddExpression("} ");
235 writer.AddExpression(end_expr);
236 writer.AddLine(';');
237 }
238 }
239
240 ShaderScopedScope& operator=(const ShaderScopedScope&) = delete;
241
242private:
243 T& writer;
244 std::string end_expr;
245};
246 50
247class ShaderWriter { 51class ShaderWriter {
248public: 52public:
@@ -271,16 +75,17 @@ public:
271 shader_source += '\n'; 75 shader_source += '\n';
272 } 76 }
273 77
274 std::string GetResult() { 78 std::string GenerateTemporary() {
275 return std::move(shader_source); 79 std::string temporary = "tmp";
80 temporary += std::to_string(temporary_index++);
81 return temporary;
276 } 82 }
277 83
278 ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, 84 std::string GetResult() {
279 std::string end_expr = {}) { 85 return std::move(shader_source);
280 return ShaderScopedScope(*this, begin_expr, end_expr);
281 } 86 }
282 87
283 int scope = 0; 88 s32 scope = 0;
284 89
285private: 90private:
286 void AppendIndentation() { 91 void AppendIndentation() {
@@ -288,3663 +93,1553 @@ private:
288 } 93 }
289 94
290 std::string shader_source; 95 std::string shader_source;
96 u32 temporary_index = 1;
291}; 97};
292 98
293/** 99/// Generates code to use for a swizzle operation.
294 * Represents an emulated shader register, used to track the state of that register for emulation 100std::string GetSwizzle(u32 elem) {
295 * with GLSL. At this time, a register can be used as a float or an integer. This class is used for 101 ASSERT(elem <= 3);
296 * bookkeeping within the GLSL program. 102 std::string swizzle = ".";
297 */ 103 swizzle += "xyzw"[elem];
298class GLSLRegister { 104 return swizzle;
299public: 105}
300 enum class Type {
301 Float,
302 Integer,
303 UnsignedInteger,
304 };
305
306 GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
307 106
308 /// Gets the GLSL type string for a register 107/// Translate topology
309 static std::string GetTypeString() { 108std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
310 return "float"; 109 switch (topology) {
110 case Tegra::Shader::OutputTopology::PointList:
111 return "points";
112 case Tegra::Shader::OutputTopology::LineStrip:
113 return "line_strip";
114 case Tegra::Shader::OutputTopology::TriangleStrip:
115 return "triangle_strip";
116 default:
117 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
118 return "points";
311 } 119 }
120}
312 121
313 /// Gets the GLSL register prefix string, used for declarations and referencing 122/// Returns true if an object has to be treated as precise
314 static std::string GetPrefixString() { 123bool IsPrecise(Operation operand) {
315 return "reg_"; 124 const auto& meta = operand.GetMeta();
316 }
317 125
318 /// Returns a GLSL string representing the current state of the register 126 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
319 std::string GetString() const { 127 return arithmetic->precise;
320 return GetPrefixString() + std::to_string(index) + '_' + suffix;
321 } 128 }
322 129 if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) {
323 /// Returns the index of the register 130 return half_arithmetic->precise;
324 std::size_t GetIndex() const {
325 return index;
326 } 131 }
132 return false;
133}
327 134
328private: 135bool IsPrecise(Node node) {
329 const std::size_t index; 136 if (const auto operation = std::get_if<OperationNode>(node)) {
330 const std::string& suffix; 137 return IsPrecise(*operation);
331}; 138 }
139 return false;
140}
332 141
333/** 142class GLSLDecompiler final {
334 * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
335 * of all registers (e.g. whether they are currently being used as Floats or Integers), and
336 * generates the necessary GLSL code to perform conversions as needed. This class is used for
337 * bookkeeping within the GLSL program.
338 */
339class GLSLRegisterManager {
340public: 143public:
341 GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, 144 explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix)
342 const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, 145 : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
343 const Tegra::Shader::Header& header)
344 : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
345 fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
346 BuildRegisterList();
347 BuildInputList();
348 }
349
350 void SetConditionalCodesFromExpression(const std::string& expresion) {
351 SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0");
352 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete.");
353 }
354
355 void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) {
356 SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem)));
357 }
358
359 /**
360 * Returns code that does an integer size conversion for the specified size.
361 * @param value Value to perform integer size conversion on.
362 * @param size Register size to use for conversion instructions.
363 * @returns GLSL string corresponding to the value converted to the specified size.
364 */
365 static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
366 switch (size) {
367 case Register::Size::Byte:
368 return "((" + value + " << 24) >> 24)";
369 case Register::Size::Short:
370 return "((" + value + " << 16) >> 16)";
371 case Register::Size::Word:
372 // Default - do nothing
373 return value;
374 default:
375 UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
376 return value;
377 }
378 }
379 146
380 /** 147 void Decompile() {
381 * Gets a register as an float. 148 DeclareVertex();
382 * @param reg The register to get. 149 DeclareGeometry();
383 * @param elem The element to use for the operation. 150 DeclareRegisters();
384 * @returns GLSL string corresponding to the register as a float. 151 DeclarePredicates();
385 */ 152 DeclareLocalMemory();
386 std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) { 153 DeclareInternalFlags();
387 return GetRegister(reg, elem); 154 DeclareInputAttributes();
388 } 155 DeclareOutputAttributes();
389 156 DeclareConstantBuffers();
390 /** 157 DeclareGlobalMemory();
391 * Gets a register as an integer. 158 DeclareSamplers();
392 * @param reg The register to get.
393 * @param elem The element to use for the operation.
394 * @param is_signed Whether to get the register as a signed (or unsigned) integer.
395 * @param size Register size to use for conversion instructions.
396 * @returns GLSL string corresponding to the register as an integer.
397 */
398 std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
399 Register::Size size = Register::Size::Word) {
400 const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"};
401 const std::string value{func + '(' + GetRegister(reg, elem) + ')'};
402 return ConvertIntegerSize(value, size);
403 }
404
405 /**
406 * Writes code that does a register assignment to float value operation.
407 * @param reg The destination register to use.
408 * @param elem The element to use for the operation.
409 * @param value The code representing the value to assign.
410 * @param dest_num_components Number of components in the destination.
411 * @param value_num_components Number of components in the value.
412 * @param is_saturated Optional, when True, saturates the provided value.
413 * @param sets_cc Optional, when True, sets the corresponding values to the implemented
414 * condition flags.
415 * @param dest_elem Optional, the destination element to use for the operation.
416 */
417 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
418 u64 dest_num_components, u64 value_num_components,
419 bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0,
420 bool precise = false) {
421 const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value;
422 SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem,
423 precise);
424 if (sets_cc) {
425 if (reg == Register::ZeroIndex) {
426 SetConditionalCodesFromExpression(clamped_value);
427 } else {
428 SetConditionalCodesFromRegister(reg, dest_elem);
429 }
430 }
431 }
432 159
433 /** 160 code.AddLine("void execute_" + suffix + "() {");
434 * Writes code that does a register assignment to integer value operation. 161 ++code.scope;
435 * @param reg The destination register to use.
436 * @param elem The element to use for the operation.
437 * @param value The code representing the value to assign.
438 * @param dest_num_components Number of components in the destination.
439 * @param value_num_components Number of components in the value.
440 * @param is_saturated Optional, when True, saturates the provided value.
441 * @param sets_cc Optional, when True, sets the corresponding values to the implemented
442 * condition flags.
443 * @param dest_elem Optional, the destination element to use for the operation.
444 * @param size Register size to use for conversion instructions.
445 */
446 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
447 const std::string& value, u64 dest_num_components,
448 u64 value_num_components, bool is_saturated = false,
449 bool sets_cc = false, u64 dest_elem = 0,
450 Register::Size size = Register::Size::Word) {
451 UNIMPLEMENTED_IF(is_saturated);
452 const std::string final_value = ConvertIntegerSize(value, size);
453 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
454
455 SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components,
456 value_num_components, dest_elem, false);
457
458 if (sets_cc) {
459 if (reg == Register::ZeroIndex) {
460 SetConditionalCodesFromExpression(final_value);
461 } else {
462 SetConditionalCodesFromRegister(reg, dest_elem);
463 }
464 }
465 }
466 162
467 /** 163 // VM's program counter
468 * Writes code that does a register assignment to a half float value operation. 164 const auto first_address = ir.GetBasicBlocks().begin()->first;
469 * @param reg The destination register to use. 165 code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;");
470 * @param elem The element to use for the operation.
471 * @param value The code representing the value to assign. Type has to be half float.
472 * @param merge Half float kind of assignment.
473 * @param dest_num_components Number of components in the destination.
474 * @param value_num_components Number of components in the value.
475 * @param is_saturated Optional, when True, saturates the provided value.
476 * @param dest_elem Optional, the destination element to use for the operation.
477 */
478 void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value,
479 Tegra::Shader::HalfMerge merge, u64 dest_num_components,
480 u64 value_num_components, bool is_saturated = false,
481 u64 dest_elem = 0) {
482 UNIMPLEMENTED_IF(is_saturated);
483
484 const std::string result = [&]() {
485 switch (merge) {
486 case Tegra::Shader::HalfMerge::H0_H1:
487 return "uintBitsToFloat(packHalf2x16(" + value + "))";
488 case Tegra::Shader::HalfMerge::F32:
489 // Half float instructions take the first component when doing a float cast.
490 return "float(" + value + ".x)";
491 case Tegra::Shader::HalfMerge::Mrg_H0:
492 // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the
493 // pack. I couldn't test this on hardware but it shouldn't really matter since most
494 // of the time when a Mrg_* flag is used both components will be mirrored. That
495 // being said, it deserves a test.
496 return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
497 " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))";
498 case Tegra::Shader::HalfMerge::Mrg_H1:
499 return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
500 " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))";
501 default:
502 UNREACHABLE();
503 return std::string("0");
504 }
505 }();
506 166
507 SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); 167 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
508 } 168 // unlikely that shaders will use 20 nested SSYs and PBKs.
169 constexpr u32 FLOW_STACK_SIZE = 20;
170 code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE));
171 code.AddLine("uint flow_stack_top = 0u;");
509 172
510 /** 173 code.AddLine("while (true) {");
511 * Writes code that does a register assignment to input attribute operation. Input attributes 174 ++code.scope;
512 * are stored as floats, so this may require conversion.
513 * @param reg The destination register to use.
514 * @param elem The element to use for the operation.
515 * @param attribute The input attribute to use as the source value.
516 * @param input_mode The input mode.
517 * @param vertex The register that decides which vertex to read from (used in GS).
518 */
519 void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
520 const Tegra::Shader::IpaMode& input_mode,
521 std::optional<Register> vertex = {}) {
522 const std::string dest = GetRegisterAsFloat(reg);
523 const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
524 shader.AddLine(dest + " = " + src + ';');
525 }
526 175
527 std::string GetLocalMemoryAsFloat(const std::string& index) { 176 code.AddLine("switch (jmp_to) {");
528 return "lmem[" + index + ']';
529 }
530
531 std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
532 const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
533 return func + "(lmem[" + index + "])";
534 }
535 177
536 void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { 178 for (const auto& pair : ir.GetBasicBlocks()) {
537 shader.AddLine("lmem[" + index + "] = " + value + ';'); 179 const auto [address, bb] = pair;
538 } 180 code.AddLine(fmt::format("case 0x{:x}u: {{", address));
181 ++code.scope;
539 182
540 void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, 183 VisitBlock(bb);
541 bool is_signed = false) {
542 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
543 shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
544 }
545 184
546 std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const { 185 --code.scope;
547 switch (cc) { 186 code.AddLine('}');
548 case Tegra::Shader::ConditionCode::NEU:
549 return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
550 default:
551 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
552 return "false";
553 } 187 }
554 }
555 188
556 std::string GetInternalFlag(const InternalFlag flag) const { 189 code.AddLine("default: return;");
557 const auto index = static_cast<u32>(flag); 190 code.AddLine('}');
558 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
559 191
560 return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix; 192 for (std::size_t i = 0; i < 2; ++i) {
193 --code.scope;
194 code.AddLine('}');
195 }
561 } 196 }
562 197
563 void SetInternalFlag(const InternalFlag flag, const std::string& value) const { 198 std::string GetResult() {
564 shader.AddLine(GetInternalFlag(flag) + " = " + value + ';'); 199 return code.GetResult();
565 } 200 }
566 201
567 /** 202 ShaderEntries GetShaderEntries() const {
568 * Writes code that does a output attribute assignment to register operation. Output attributes 203 ShaderEntries entries;
569 * are stored as floats, so this may require conversion. 204 for (const auto& cbuf : ir.GetConstantBuffers()) {
570 * @param attribute The destination output attribute. 205 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
571 * @param elem The element to use for the operation. 206 cbuf.first);
572 * @param val_reg The register to use as the source value.
573 * @param buf_reg The register that tells which buffer to write to (used in geometry shaders).
574 */
575 void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg,
576 const Register& buf_reg) {
577 const std::string dest = GetOutputAttribute(attribute);
578 const std::string src = GetRegisterAsFloat(val_reg);
579 if (dest.empty())
580 return;
581
582 // Can happen with unknown/unimplemented output attributes, in which case we ignore the
583 // instruction for now.
584 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
585 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
586 // shader. These instructions use a dirty register as buffer index, to avoid some
587 // drivers from complaining about out of boundary writes, guard them.
588 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
589 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
590 shader.AddLine("amem[" + buf_index + "][" +
591 std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
592 " = " + src + ';');
593 return;
594 } 207 }
595 208 for (const auto& sampler : ir.GetSamplers()) {
596 switch (attribute) { 209 entries.samplers.emplace_back(sampler);
597 case Attribute::Index::ClipDistances0123:
598 case Attribute::Index::ClipDistances4567: {
599 const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem;
600 UNIMPLEMENTED_IF_MSG(
601 ((header.vtg.clip_distances >> index) & 1) == 0,
602 "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
603
604 clip_distances[index] = true;
605 fixed_pipeline_output_attributes_used.insert(attribute);
606 shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
607 break;
608 } 210 }
609 case Attribute::Index::PointSize: 211 for (const auto& gmem : ir.GetGlobalMemoryBases()) {
610 fixed_pipeline_output_attributes_used.insert(attribute); 212 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
611 shader.AddLine(dest + " = " + src + ';');
612 break;
613 default:
614 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
615 break;
616 } 213 }
214 entries.clip_distances = ir.GetClipDistances();
215 entries.shader_length = ir.GetLength();
216 return entries;
617 } 217 }
618 218
619 /// Generates code representing a uniform (C buffer) register, interpreted as the input type. 219private:
620 std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, 220 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
621 Register::Size size = Register::Size::Word) { 221 using OperationDecompilersArray =
622 declr_const_buffers[index].MarkAsUsed(index, offset, stage); 222 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
623 std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" +
624 std::to_string(offset % 4) + ']';
625 223
626 if (type == GLSLRegister::Type::Float) { 224 void DeclareVertex() {
627 // Do nothing, default 225 if (stage != ShaderStage::Vertex)
628 } else if (type == GLSLRegister::Type::Integer) { 226 return;
629 value = "floatBitsToInt(" + value + ')';
630 } else if (type == GLSLRegister::Type::UnsignedInteger) {
631 value = "floatBitsToUint(" + value + ')';
632 } else {
633 UNREACHABLE();
634 }
635 227
636 return ConvertIntegerSize(value, size); 228 DeclareVertexRedeclarations();
637 } 229 }
638 230
639 std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, 231 void DeclareGeometry() {
640 GLSLRegister::Type type) { 232 if (stage != ShaderStage::Geometry)
641 declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); 233 return;
642
643 const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
644 const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
645 final_offset + " % 4]";
646
647 if (type == GLSLRegister::Type::Float) {
648 return value;
649 } else if (type == GLSLRegister::Type::Integer) {
650 return "floatBitsToInt(" + value + ')';
651 } else {
652 UNREACHABLE();
653 return value;
654 }
655 }
656
657 /// Add declarations.
658 void GenerateDeclarations(const std::string& suffix) {
659 GenerateVertex();
660 GenerateRegisters(suffix);
661 GenerateLocalMemory();
662 GenerateInternalFlags();
663 GenerateInputAttrs();
664 GenerateOutputAttrs();
665 GenerateConstBuffers();
666 GenerateSamplers();
667 GenerateGeometry();
668 }
669 234
670 /// Returns a list of constant buffer declarations. 235 const auto topology = GetTopologyName(header.common3.output_topology);
671 std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const { 236 const auto max_vertices = std::to_string(header.common4.max_output_vertices);
672 std::vector<ConstBufferEntry> result; 237 code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;");
673 std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(), 238 code.AddNewLine();
674 std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); });
675 return result;
676 }
677 239
678 /// Returns a list of samplers used in the shader. 240 DeclareVertexRedeclarations();
679 const std::vector<SamplerEntry>& GetSamplers() const {
680 return used_samplers;
681 } 241 }
682 242
683 /// Returns an array of the used clip distances. 243 void DeclareVertexRedeclarations() {
684 const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const { 244 bool clip_distances_declared = false;
685 return clip_distances;
686 }
687 245
688 /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if 246 code.AddLine("out gl_PerVertex {");
689 /// necessary. 247 ++code.scope;
690 std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
691 bool is_array, bool is_shadow) {
692 const auto offset = static_cast<std::size_t>(sampler.index.Value());
693 248
694 // If this sampler has already been used, return the existing mapping. 249 code.AddLine("vec4 gl_Position;");
695 const auto itr =
696 std::find_if(used_samplers.begin(), used_samplers.end(),
697 [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
698 250
699 if (itr != used_samplers.end()) { 251 for (const auto o : ir.GetOutputAttributes()) {
700 ASSERT(itr->GetType() == type && itr->IsArray() == is_array && 252 if (o == Attribute::Index::PointSize)
701 itr->IsShadow() == is_shadow); 253 code.AddLine("float gl_PointSize;");
702 return itr->GetName(); 254 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
255 o == Attribute::Index::ClipDistances4567)) {
256 code.AddLine("float gl_ClipDistance[];");
257 clip_distances_declared = true;
258 }
703 } 259 }
704 260
705 // Otherwise create a new mapping for this sampler 261 --code.scope;
706 const std::size_t next_index = used_samplers.size(); 262 code.AddLine("};");
707 const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow}; 263 code.AddNewLine();
708 used_samplers.emplace_back(entry);
709 return entry.GetName();
710 } 264 }
711 265
712 void SetLocalMemory(u64 lmem) { 266 void DeclareRegisters() {
713 local_memory_size = lmem; 267 const auto& registers = ir.GetRegisters();
268 for (const u32 gpr : registers) {
269 code.AddLine("float " + GetRegister(gpr) + " = 0;");
270 }
271 if (!registers.empty())
272 code.AddNewLine();
714 } 273 }
715 274
716private: 275 void DeclarePredicates() {
717 /// Generates declarations for registers. 276 const auto& predicates = ir.GetPredicates();
718 void GenerateRegisters(const std::string& suffix) { 277 for (const auto pred : predicates) {
719 for (const auto& reg : regs) { 278 code.AddLine("bool " + GetPredicate(pred) + " = false;");
720 declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
721 std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
722 } 279 }
723 declarations.AddNewLine(); 280 if (!predicates.empty())
281 code.AddNewLine();
724 } 282 }
725 283
726 /// Generates declarations for local memory. 284 void DeclareLocalMemory() {
727 void GenerateLocalMemory() { 285 if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
728 if (local_memory_size > 0) { 286 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
729 declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + 287 code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];");
730 "];"); 288 code.AddNewLine();
731 declarations.AddNewLine();
732 } 289 }
733 } 290 }
734 291
735 /// Generates declarations for internal flags. 292 void DeclareInternalFlags() {
736 void GenerateInternalFlags() {
737 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { 293 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
738 const InternalFlag code = static_cast<InternalFlag>(flag); 294 const InternalFlag flag_code = static_cast<InternalFlag>(flag);
739 declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); 295 code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;");
740 } 296 }
741 declarations.AddNewLine(); 297 code.AddNewLine();
742 } 298 }
743 299
744 /// Generates declarations for input attributes. 300 std::string GetInputFlags(AttributeUse attribute) {
745 void GenerateInputAttrs() { 301 std::string out;
746 for (const auto element : declr_input_attribute) { 302
303 switch (attribute) {
304 case AttributeUse::Constant:
305 out += "flat ";
306 break;
307 case AttributeUse::ScreenLinear:
308 out += "noperspective ";
309 break;
310 case AttributeUse::Perspective:
311 // Default, Smooth
312 break;
313 default:
314 LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
315 UNREACHABLE();
316 }
317 return out;
318 }
319
320 void DeclareInputAttributes() {
321 const auto& attributes = ir.GetInputAttributes();
322 for (const auto element : attributes) {
323 const Attribute::Index index = element.first;
324 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
325 // Skip when it's not a generic attribute
326 continue;
327 }
328
747 // TODO(bunnei): Use proper number of elements for these 329 // TODO(bunnei): Use proper number of elements for these
748 u32 idx = 330 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
749 static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0); 331 if (stage != ShaderStage::Vertex) {
750 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
751 // If inputs are varyings, add an offset 332 // If inputs are varyings, add an offset
752 idx += GENERIC_VARYING_START_LOCATION; 333 idx += GENERIC_VARYING_START_LOCATION;
753 } 334 }
754 335
755 std::string attr{GetInputAttribute(element.first, element.second)}; 336 std::string attr = GetInputAttribute(index);
756 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { 337 if (stage == ShaderStage::Geometry) {
757 attr = "gs_" + attr + "[]"; 338 attr = "gs_" + attr + "[]";
758 } 339 }
759 declarations.AddLine("layout (location = " + std::to_string(idx) + ") " + 340 std::string suffix;
760 GetInputFlags(element.first) + "in vec4 " + attr + ';'); 341 if (stage == ShaderStage::Fragment) {
342 const auto input_mode =
343 header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
344 suffix = GetInputFlags(input_mode);
345 }
346 code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
347 attr + ';');
761 } 348 }
762 349 if (!attributes.empty())
763 declarations.AddNewLine(); 350 code.AddNewLine();
764 } 351 }
765 352
766 /// Generates declarations for output attributes. 353 void DeclareOutputAttributes() {
767 void GenerateOutputAttrs() { 354 const auto& attributes = ir.GetOutputAttributes();
768 for (const auto& index : declr_output_attribute) { 355 for (const auto index : attributes) {
356 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
357 // Skip when it's not a generic attribute
358 continue;
359 }
769 // TODO(bunnei): Use proper number of elements for these 360 // TODO(bunnei): Use proper number of elements for these
770 const u32 idx = static_cast<u32>(index) - 361 const auto idx = static_cast<u32>(index) -
771 static_cast<u32>(Attribute::Index::Attribute_0) + 362 static_cast<u32>(Attribute::Index::Attribute_0) +
772 GENERIC_VARYING_START_LOCATION; 363 GENERIC_VARYING_START_LOCATION;
773 declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + 364 code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
774 GetOutputAttribute(index) + ';'); 365 GetOutputAttribute(index) + ';');
775 }
776 declarations.AddNewLine();
777 }
778
779 /// Generates declarations for constant buffers.
780 void GenerateConstBuffers() {
781 for (const auto& entry : GetConstBuffersDeclarations()) {
782 declarations.AddLine("layout (std140) uniform " + entry.GetName());
783 declarations.AddLine('{');
784 declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) +
785 "[MAX_CONSTBUFFER_ELEMENTS];");
786 declarations.AddLine("};");
787 declarations.AddNewLine();
788 } 366 }
789 declarations.AddNewLine(); 367 if (!attributes.empty())
368 code.AddNewLine();
790 } 369 }
791 370
792 /// Generates declarations for samplers. 371 void DeclareConstantBuffers() {
793 void GenerateSamplers() { 372 for (const auto& entry : ir.GetConstantBuffers()) {
794 const auto& samplers = GetSamplers(); 373 const auto [index, size] = entry;
795 for (const auto& sampler : samplers) { 374 code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
796 declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() + 375 ") uniform " + GetConstBufferBlock(index) + " {");
797 ';'); 376 code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
377 code.AddLine("};");
378 code.AddNewLine();
798 } 379 }
799 declarations.AddNewLine();
800 } 380 }
801 381
802 /// Generates declarations used for geometry shaders. 382 void DeclareGlobalMemory() {
803 void GenerateGeometry() { 383 for (const auto& entry : ir.GetGlobalMemoryBases()) {
804 if (stage != Maxwell3D::Regs::ShaderStage::Geometry) 384 const std::string binding =
805 return; 385 fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
806 386 code.AddLine("layout (std430, binding = " + binding + ") buffer " +
807 declarations.AddLine( 387 GetGlobalMemoryBlock(entry) + " {");
808 "layout (" + GetTopologyName(header.common3.output_topology) + 388 code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
809 ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;"); 389 code.AddLine("};");
810 declarations.AddNewLine(); 390 code.AddNewLine();
811
812 declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" +
813 std::to_string(MAX_ATTRIBUTES) + "];");
814 declarations.AddNewLine();
815
816 constexpr char buffer[] = "amem[output_buffer]";
817 declarations.AddLine("void emit_vertex(uint output_buffer) {");
818 ++declarations.scope;
819 for (const auto element : declr_output_attribute) {
820 declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' +
821 std::to_string(static_cast<u32>(element)) + "];");
822 } 391 }
823
824 declarations.AddLine("position = " + std::string(buffer) + '[' +
825 std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];");
826
827 // If a geometry shader is attached, it will always flip (it's the last stage before
828 // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
829 declarations.AddLine("position.xy *= viewport_flip.xy;");
830 declarations.AddLine("gl_Position = position;");
831 declarations.AddLine("position.w = 1.0;");
832 declarations.AddLine("EmitVertex();");
833 --declarations.scope;
834 declarations.AddLine('}');
835 declarations.AddNewLine();
836 } 392 }
837 393
838 void GenerateVertex() { 394 void DeclareSamplers() {
839 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) 395 const auto& samplers = ir.GetSamplers();
840 return; 396 for (const auto& sampler : samplers) {
841 bool clip_distances_declared = false; 397 std::string sampler_type = [&]() {
398 switch (sampler.GetType()) {
399 case Tegra::Shader::TextureType::Texture1D:
400 return "sampler1D";
401 case Tegra::Shader::TextureType::Texture2D:
402 return "sampler2D";
403 case Tegra::Shader::TextureType::Texture3D:
404 return "sampler3D";
405 case Tegra::Shader::TextureType::TextureCube:
406 return "samplerCube";
407 default:
408 UNREACHABLE();
409 return "sampler2D";
410 }
411 }();
412 if (sampler.IsArray())
413 sampler_type += "Array";
414 if (sampler.IsShadow())
415 sampler_type += "Shadow";
842 416
843 declarations.AddLine("out gl_PerVertex {"); 417 code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
844 ++declarations.scope; 418 ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
845 declarations.AddLine("vec4 gl_Position;");
846 for (auto& o : fixed_pipeline_output_attributes_used) {
847 if (o == Attribute::Index::PointSize)
848 declarations.AddLine("float gl_PointSize;");
849 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
850 o == Attribute::Index::ClipDistances4567)) {
851 declarations.AddLine("float gl_ClipDistance[];");
852 clip_distances_declared = true;
853 }
854 } 419 }
855 --declarations.scope; 420 if (!samplers.empty())
856 declarations.AddLine("};"); 421 code.AddNewLine();
857 } 422 }
858 423
859 /// Generates code representing a temporary (GPR) register. 424 void VisitBlock(const NodeBlock& bb) {
860 std::string GetRegister(const Register& reg, unsigned elem) { 425 for (const Node node : bb) {
861 if (reg == Register::ZeroIndex) { 426 if (const std::string expr = Visit(node); !expr.empty()) {
862 return "0"; 427 code.AddLine(expr);
863 } 428 }
864
865 return regs[reg.GetSwizzledIndex(elem)].GetString();
866 }
867
868 /**
869 * Writes code that does a register assignment to value operation.
870 * @param reg The destination register to use.
871 * @param elem The element to use for the operation.
872 * @param value The code representing the value to assign.
873 * @param dest_num_components Number of components in the destination.
874 * @param value_num_components Number of components in the value.
875 * @param dest_elem Optional, the destination element to use for the operation.
876 */
877 void SetRegister(const Register& reg, u64 elem, const std::string& value,
878 u64 dest_num_components, u64 value_num_components, u64 dest_elem,
879 bool precise) {
880 if (reg == Register::ZeroIndex) {
881 // Setting RZ is a nop in hardware.
882 return;
883 }
884
885 std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
886 if (dest_num_components > 1) {
887 dest += GetSwizzle(elem);
888 }
889
890 std::string src = '(' + value + ')';
891 if (value_num_components > 1) {
892 src += GetSwizzle(elem);
893 }
894
895 if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
896 const auto scope = shader.Scope();
897
898 // This avoids optimizations of constant propagation and keeps the code as the original
899 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
900 shader.AddLine("precise float tmp = " + src + ';');
901 shader.AddLine(dest + " = tmp;");
902 } else {
903 shader.AddLine(dest + " = " + src + ';');
904 } 429 }
905 } 430 }
906 431
907 /// Build the GLSL register list. 432 std::string Visit(Node node) {
908 void BuildRegisterList() { 433 if (const auto operation = std::get_if<OperationNode>(node)) {
909 regs.reserve(Register::NumRegisters); 434 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
910 435 if (operation_index >= operation_decompilers.size()) {
911 for (std::size_t index = 0; index < Register::NumRegisters; ++index) { 436 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
912 regs.emplace_back(index, suffix); 437 return {};
913 } 438 }
914 } 439 const auto decompiler = operation_decompilers[operation_index];
440 if (decompiler == nullptr) {
441 UNREACHABLE_MSG("Undefined operation: {}", operation_index);
442 return {};
443 }
444 return (this->*decompiler)(*operation);
915 445
916 void BuildInputList() { 446 } else if (const auto gpr = std::get_if<GprNode>(node)) {
917 const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) - 447 const u32 index = gpr->GetIndex();
918 static_cast<u32>(Attribute::Index::Attribute_0) + 1; 448 if (index == Register::ZeroIndex) {
919 declr_input_attribute.reserve(size); 449 return "0";
920 } 450 }
451 return GetRegister(index);
921 452
922 /// Generates code representing an input attribute register. 453 } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
923 std::string GetInputAttribute(Attribute::Index attribute, 454 const u32 value = immediate->GetValue();
924 const Tegra::Shader::IpaMode& input_mode, 455 if (value < 10) {
925 std::optional<Register> vertex = {}) { 456 // For eyecandy avoid using hex numbers on single digits
926 auto GeometryPass = [&](const std::string& name) { 457 return fmt::format("utof({}u)", immediate->GetValue());
927 if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
928 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set
929 // an 0x80000000 index for those and the shader fails to build. Find out why this
930 // happens and what's its intent.
931 return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) +
932 " % MAX_VERTEX_INPUT]";
933 } 458 }
934 return name; 459 return fmt::format("utof(0x{:x}u)", immediate->GetValue());
935 };
936 460
937 switch (attribute) { 461 } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
938 case Attribute::Index::Position: 462 const auto value = [&]() -> std::string {
939 if (stage != Maxwell3D::Regs::ShaderStage::Fragment) { 463 switch (const auto index = predicate->GetIndex(); index) {
940 return GeometryPass("position"); 464 case Tegra::Shader::Pred::UnusedIndex:
941 } else { 465 return "true";
942 return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)"; 466 case Tegra::Shader::Pred::NeverExecute:
467 return "false";
468 default:
469 return GetPredicate(index);
470 }
471 }();
472 if (predicate->IsNegated()) {
473 return "!(" + value + ')';
943 } 474 }
944 case Attribute::Index::PointCoord: 475 return value;
945 return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)"; 476
946 case Attribute::Index::TessCoordInstanceIDVertexID: 477 } else if (const auto abuf = std::get_if<AbufNode>(node)) {
947 // TODO(Subv): Find out what the values are for the first two elements when inside a 478 const auto attribute = abuf->GetIndex();
948 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 479 const auto element = abuf->GetElement();
949 // shader. 480
950 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); 481 const auto GeometryPass = [&](const std::string& name) {
951 // Config pack's first value is instance_id. 482 if (stage == ShaderStage::Geometry && abuf->GetBuffer()) {
952 return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))"; 483 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
953 case Attribute::Index::FrontFacing: 484 // set an 0x80000000 index for those and the shader fails to build. Find out why
954 // TODO(Subv): Find out what the values are for the other elements. 485 // this happens and what's its intent.
955 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 486 return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) +
956 return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))"; 487 ") % MAX_VERTEX_INPUT]";
957 default: 488 }
958 const u32 index{static_cast<u32>(attribute) - 489 return name;
959 static_cast<u32>(Attribute::Index::Attribute_0)}; 490 };
960 if (attribute >= Attribute::Index::Attribute_0 && 491
961 attribute <= Attribute::Index::Attribute_31) { 492 switch (attribute) {
962 if (declr_input_attribute.count(attribute) == 0) { 493 case Attribute::Index::Position:
963 declr_input_attribute[attribute] = input_mode; 494 if (stage != ShaderStage::Fragment) {
495 return GeometryPass("position") + GetSwizzle(element);
964 } else { 496 } else {
965 UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode, 497 return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
966 "Multiple input modes for the same attribute"); 498 }
499 case Attribute::Index::PointCoord:
500 switch (element) {
501 case 0:
502 return "gl_PointCoord.x";
503 case 1:
504 return "gl_PointCoord.y";
505 case 2:
506 case 3:
507 return "0";
967 } 508 }
968 return GeometryPass("input_attribute_" + std::to_string(index)); 509 UNREACHABLE();
510 return "0";
511 case Attribute::Index::TessCoordInstanceIDVertexID:
512 // TODO(Subv): Find out what the values are for the first two elements when inside a
513 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
514 // shader.
515 ASSERT(stage == ShaderStage::Vertex);
516 switch (element) {
517 case 2:
518 // Config pack's first value is instance_id.
519 return "uintBitsToFloat(config_pack[0])";
520 case 3:
521 return "uintBitsToFloat(gl_VertexID)";
522 }
523 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
524 return "0";
525 case Attribute::Index::FrontFacing:
526 // TODO(Subv): Find out what the values are for the other elements.
527 ASSERT(stage == ShaderStage::Fragment);
528 switch (element) {
529 case 3:
530 return "itof(gl_FrontFacing ? -1 : 0)";
531 }
532 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
533 return "0";
534 default:
535 if (attribute >= Attribute::Index::Attribute_0 &&
536 attribute <= Attribute::Index::Attribute_31) {
537 return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
538 }
539 break;
969 } 540 }
970
971 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 541 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
972 }
973 542
974 return "vec4(0, 0, 0, 0)"; 543 } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
975 } 544 const Node offset = cbuf->GetOffset();
976 545 if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
977 std::string GetInputFlags(const Attribute::Index attribute) { 546 // Direct access
978 const Tegra::Shader::IpaSampleMode sample_mode = 547 const u32 offset_imm = immediate->GetValue();
979 declr_input_attribute[attribute].sampling_mode; 548 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
980 const Tegra::Shader::IpaInterpMode interp_mode = 549 return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
981 declr_input_attribute[attribute].interpolation_mode; 550 offset_imm / (4 * 4), (offset_imm / 4) % 4);
982 std::string out; 551
983 switch (interp_mode) { 552 } else if (std::holds_alternative<OperationNode>(*offset)) {
984 case Tegra::Shader::IpaInterpMode::Flat: { 553 // Indirect access
985 out += "flat "; 554 const std::string final_offset = code.GenerateTemporary();
986 break; 555 code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
987 } 556 return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
988 case Tegra::Shader::IpaInterpMode::Linear: { 557 final_offset, final_offset);
989 out += "noperspective ";
990 break;
991 }
992 case Tegra::Shader::IpaInterpMode::Perspective: {
993 // Default, Smooth
994 break;
995 }
996 default: {
997 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
998 }
999 }
1000 switch (sample_mode) {
1001 case Tegra::Shader::IpaSampleMode::Centroid:
1002 // It can be implemented with the "centroid " keyword in glsl
1003 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
1004 break;
1005 case Tegra::Shader::IpaSampleMode::Default:
1006 // Default, n/a
1007 break;
1008 default: {
1009 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
1010 break;
1011 }
1012 }
1013 return out;
1014 }
1015 558
1016 /// Generates code representing the declaration name of an output attribute register. 559 } else {
1017 std::string GetOutputAttribute(Attribute::Index attribute) { 560 UNREACHABLE_MSG("Unmanaged offset node type");
1018 switch (attribute) {
1019 case Attribute::Index::PointSize:
1020 return "gl_PointSize";
1021 case Attribute::Index::Position:
1022 return "position";
1023 case Attribute::Index::ClipDistances0123:
1024 case Attribute::Index::ClipDistances4567: {
1025 return "gl_ClipDistance";
1026 }
1027 default:
1028 const u32 index{static_cast<u32>(attribute) -
1029 static_cast<u32>(Attribute::Index::Attribute_0)};
1030 if (attribute >= Attribute::Index::Attribute_0) {
1031 declr_output_attribute.insert(attribute);
1032 return "output_attribute_" + std::to_string(index);
1033 } 561 }
1034 562
1035 UNIMPLEMENTED_MSG("Unhandled output attribute={}", index); 563 } else if (const auto gmem = std::get_if<GmemNode>(node)) {
1036 return {}; 564 const std::string real = Visit(gmem->GetRealAddress());
1037 } 565 const std::string base = Visit(gmem->GetBaseAddress());
1038 } 566 const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
1039 567 return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
1040 ShaderWriter& shader;
1041 ShaderWriter& declarations;
1042 std::vector<GLSLRegister> regs;
1043 std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute;
1044 std::set<Attribute::Index> declr_output_attribute;
1045 std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
1046 std::vector<SamplerEntry> used_samplers;
1047 const Maxwell3D::Regs::ShaderStage& stage;
1048 const std::string& suffix;
1049 const Tegra::Shader::Header& header;
1050 std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
1051 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
1052 u64 local_memory_size;
1053};
1054
1055class GLSLGenerator {
1056public:
1057 GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
1058 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix,
1059 std::size_t shader_length)
1060 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
1061 stage(stage), suffix(suffix), shader_length(shader_length) {
1062 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
1063 local_memory_size = header.GetLocalMemorySize();
1064 regs.SetLocalMemory(local_memory_size);
1065 Generate(suffix);
1066 }
1067 568
1068 std::string GetShaderCode() { 569 } else if (const auto lmem = std::get_if<LmemNode>(node)) {
1069 return declarations.GetResult() + shader.GetResult(); 570 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
1070 }
1071 571
1072 /// Returns entries in the shader that are useful for external functions 572 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
1073 ShaderEntries GetEntries() const { 573 return GetInternalFlag(internal_flag->GetFlag());
1074 return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(),
1075 shader_length};
1076 }
1077 574
1078private: 575 } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
1079 /// Gets the Subroutine object corresponding to the specified address. 576 // It's invalid to call conditional on nested nodes, use an operation instead
1080 const Subroutine& GetSubroutine(u32 begin, u32 end) const { 577 code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
1081 const auto iter = subroutines.find(Subroutine{begin, end, suffix}); 578 ++code.scope;
1082 ASSERT(iter != subroutines.end());
1083 return *iter;
1084 }
1085 579
1086 /// Generates code representing a 19-bit immediate value 580 VisitBlock(conditional->GetCode());
1087 static std::string GetImmediate19(const Instruction& instr) {
1088 return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
1089 }
1090 581
1091 /// Generates code representing a 32-bit immediate value 582 --code.scope;
1092 static std::string GetImmediate32(const Instruction& instr) { 583 code.AddLine('}');
1093 return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); 584 return {};
1094 }
1095 585
1096 /// Generates code representing a vec2 pair unpacked from a half float immediate 586 } else if (const auto comment = std::get_if<CommentNode>(node)) {
1097 static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) { 587 return "// " + comment->GetText();
1098 const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates()));
1099 if (!negate) {
1100 return immediate;
1101 } 588 }
1102 const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : ""; 589 UNREACHABLE();
1103 const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : ""; 590 return {};
1104 const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)";
1105
1106 return '(' + immediate + " * " + negate_vec + ')';
1107 } 591 }
1108 592
1109 /// Generates code representing a texture sampler. 593 std::string ApplyPrecise(Operation operation, const std::string& value) {
1110 std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, 594 if (!IsPrecise(operation)) {
1111 bool is_shadow) { 595 return value;
1112 return regs.AccessSampler(sampler, type, is_array, is_shadow);
1113 }
1114
1115 /**
1116 * Adds code that calls a subroutine.
1117 * @param subroutine the subroutine to call.
1118 */
1119 void CallSubroutine(const Subroutine& subroutine) {
1120 if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
1121 shader.AddLine(subroutine.GetName() + "();");
1122 shader.AddLine("return true;");
1123 } else if (subroutine.exit_method == ExitMethod::Conditional) {
1124 shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
1125 } else {
1126 shader.AddLine(subroutine.GetName() + "();");
1127 } 596 }
1128 } 597 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
598 const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
1129 599
1130 /* 600 const std::string temporary = code.GenerateTemporary();
1131 * Writes code that assigns a predicate boolean variable. 601 code.AddLine(precise + "float " + temporary + " = " + value + ';');
1132 * @param pred The id of the predicate to write to. 602 return temporary;
1133 * @param value The expression value to assign to the predicate. 603 }
1134 */
1135 void SetPredicate(u64 pred, const std::string& value) {
1136 using Tegra::Shader::Pred;
1137 // Can't assign to the constant predicate.
1138 ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
1139
1140 std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
1141 shader.AddLine(variable + " = " + value + ';');
1142 declr_predicates.insert(std::move(variable));
1143 }
1144
1145 /*
1146 * Returns the condition to use in the 'if' for a predicated instruction.
1147 * @param instr Instruction to generate the if condition for.
1148 * @returns string containing the predicate condition.
1149 */
1150 std::string GetPredicateCondition(u64 index, bool negate) {
1151 using Tegra::Shader::Pred;
1152 std::string variable;
1153
1154 // Index 7 is used as an 'Always True' condition.
1155 if (index == static_cast<u64>(Pred::UnusedIndex)) {
1156 variable = "true";
1157 } else {
1158 variable = 'p' + std::to_string(index) + '_' + suffix;
1159 declr_predicates.insert(variable);
1160 }
1161 if (negate) {
1162 return "!(" + variable + ')';
1163 }
1164 604
1165 return variable; 605 std::string VisitOperand(Operation operation, std::size_t operand_index) {
1166 } 606 const auto& operand = operation[operand_index];
1167 607 const bool parent_precise = IsPrecise(operation);
1168 /** 608 const bool child_precise = IsPrecise(operand);
1169 * Returns the comparison string to use to compare two values in the 'set' family of 609 const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
1170 * instructions. 610 if (!parent_precise || child_precise || child_trivial) {
1171 * @param condition The condition used in the 'set'-family instruction. 611 return Visit(operand);
1172 * @param op_a First operand to use for the comparison.
1173 * @param op_b Second operand to use for the comparison.
1174 * @returns String corresponding to the GLSL operator that matches the desired comparison.
1175 */
1176 std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
1177 const std::string& op_a, const std::string& op_b) const {
1178 using Tegra::Shader::PredCondition;
1179 static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
1180 {PredCondition::LessThan, "<"},
1181 {PredCondition::Equal, "=="},
1182 {PredCondition::LessEqual, "<="},
1183 {PredCondition::GreaterThan, ">"},
1184 {PredCondition::NotEqual, "!="},
1185 {PredCondition::GreaterEqual, ">="},
1186 {PredCondition::LessThanWithNan, "<"},
1187 {PredCondition::NotEqualWithNan, "!="},
1188 {PredCondition::LessEqualWithNan, "<="},
1189 {PredCondition::GreaterThanWithNan, ">"},
1190 {PredCondition::GreaterEqualWithNan, ">="}};
1191
1192 const auto& comparison{PredicateComparisonStrings.find(condition)};
1193 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(),
1194 "Unknown predicate comparison operation");
1195
1196 std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
1197 if (condition == PredCondition::LessThanWithNan ||
1198 condition == PredCondition::NotEqualWithNan ||
1199 condition == PredCondition::LessEqualWithNan ||
1200 condition == PredCondition::GreaterThanWithNan ||
1201 condition == PredCondition::GreaterEqualWithNan) {
1202 predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
1203 } 612 }
1204 613
1205 return predicate; 614 const std::string temporary = code.GenerateTemporary();
1206 } 615 code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
1207 616 return temporary;
1208 /** 617 }
1209 * Returns the operator string to use to combine two predicates in the 'setp' family of
1210 * instructions.
1211 * @params operation The operator used in the 'setp'-family instruction.
1212 * @returns String corresponding to the GLSL operator that matches the desired operator.
1213 */
1214 std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const {
1215 using Tegra::Shader::PredOperation;
1216 static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = {
1217 {PredOperation::And, "&&"},
1218 {PredOperation::Or, "||"},
1219 {PredOperation::Xor, "^^"},
1220 };
1221
1222 auto op = PredicateOperationStrings.find(operation);
1223 UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation");
1224 return op->second;
1225 }
1226
1227 /**
1228 * Transforms the input string GLSL operand into one that applies the abs() function and negates
1229 * the output if necessary. When both abs and neg are true, the negation will be applied after
1230 * taking the absolute value.
1231 * @param operand The input operand to take the abs() of, negate, or both.
1232 * @param abs Whether to apply the abs() function to the input operand.
1233 * @param neg Whether to negate the input operand.
1234 * @returns String corresponding to the operand after being transformed by the abs() and
1235 * negation operations.
1236 */
1237 static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) {
1238 std::string result = operand;
1239
1240 if (abs) {
1241 result = "abs(" + result + ')';
1242 }
1243 618
1244 if (neg) { 619 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
1245 result = "-(" + result + ')'; 620 std::string value = VisitOperand(operation, operand_index);
1246 } 621 switch (type) {
622 case Type::HalfFloat: {
623 const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
624 if (!half_meta) {
625 value = "toHalf2(" + value + ')';
626 }
1247 627
1248 return result; 628 switch (half_meta->types.at(operand_index)) {
1249 }
1250
1251 /*
1252 * Transforms the input string GLSL operand into an unpacked half float pair.
1253 * @note This function returns a float type pair instead of a half float pair. This is because
1254 * real half floats are not standardized in GLSL but unpackHalf2x16 (which returns a vec2) is.
1255 * @param operand Input operand. It has to be an unsigned integer.
1256 * @param type How to unpack the unsigned integer to a half float pair.
1257 * @param abs Get the absolute value of unpacked half floats.
1258 * @param neg Get the negative value of unpacked half floats.
1259 * @returns String corresponding to a half float pair.
1260 */
1261 static std::string GetHalfFloat(const std::string& operand,
1262 Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1,
1263 bool abs = false, bool neg = false) {
1264 // "vec2" calls emitted in this function are intended to alias components.
1265 const std::string value = [&]() {
1266 switch (type) {
1267 case Tegra::Shader::HalfType::H0_H1: 629 case Tegra::Shader::HalfType::H0_H1:
1268 return "unpackHalf2x16(" + operand + ')'; 630 return "toHalf2(" + value + ')';
1269 case Tegra::Shader::HalfType::F32: 631 case Tegra::Shader::HalfType::F32:
1270 return "vec2(uintBitsToFloat(" + operand + "))"; 632 return "vec2(" + value + ')';
1271 case Tegra::Shader::HalfType::H0_H0: 633 case Tegra::Shader::HalfType::H0_H0:
1272 case Tegra::Shader::HalfType::H1_H1: { 634 return "vec2(toHalf2(" + value + ")[0])";
1273 const bool high = type == Tegra::Shader::HalfType::H1_H1; 635 case Tegra::Shader::HalfType::H1_H1:
1274 const char unpack_index = "xy"[high ? 1 : 0]; 636 return "vec2(toHalf2(" + value + ")[1])";
1275 return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')';
1276 }
1277 default:
1278 UNREACHABLE();
1279 return std::string("vec2(0)");
1280 } 637 }
1281 }();
1282
1283 return GetOperandAbsNeg(value, abs, neg);
1284 }
1285
1286 /*
1287 * Returns whether the instruction at the specified offset is a 'sched' instruction.
1288 * Sched instructions always appear before a sequence of 3 instructions.
1289 */
1290 bool IsSchedInstruction(u32 offset) const {
1291 // sched instructions appear once every 4 instructions.
1292 static constexpr std::size_t SchedPeriod = 4;
1293 u32 absolute_offset = offset - main_offset;
1294
1295 return (absolute_offset % SchedPeriod) == 0;
1296 }
1297
1298 void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
1299 const std::string& op_b,
1300 Tegra::Shader::PredicateResultMode predicate_mode,
1301 Tegra::Shader::Pred predicate, const bool set_cc) {
1302 std::string result{};
1303 switch (logic_op) {
1304 case LogicOperation::And: {
1305 result = '(' + op_a + " & " + op_b + ')';
1306 break;
1307 }
1308 case LogicOperation::Or: {
1309 result = '(' + op_a + " | " + op_b + ')';
1310 break;
1311 }
1312 case LogicOperation::Xor: {
1313 result = '(' + op_a + " ^ " + op_b + ')';
1314 break;
1315 }
1316 case LogicOperation::PassB: {
1317 result = op_b;
1318 break;
1319 }
1320 default:
1321 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
1322 }
1323
1324 if (dest != Tegra::Shader::Register::ZeroIndex) {
1325 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
1326 } 638 }
1327
1328 using Tegra::Shader::PredicateResultMode;
1329 // Write the predicate value depending on the predicate mode.
1330 switch (predicate_mode) {
1331 case PredicateResultMode::None:
1332 // Do nothing.
1333 return;
1334 case PredicateResultMode::NotZero:
1335 // Set the predicate to true if the result is not zero.
1336 SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
1337 break;
1338 default: 639 default:
1339 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", 640 return CastOperand(value, type);
1340 static_cast<u32>(predicate_mode));
1341 } 641 }
1342 } 642 }
1343 643
1344 void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, 644 std::string CastOperand(const std::string& value, Type type) const {
1345 const std::string& op_c, const std::string& imm_lut, 645 switch (type) {
1346 const bool set_cc) { 646 case Type::Bool:
1347 if (dest == Tegra::Shader::Register::ZeroIndex) { 647 case Type::Bool2:
1348 return; 648 case Type::Float:
1349 } 649 return value;
1350 650 case Type::Int:
1351 static constexpr std::array<const char*, 32> shift_amounts = { 651 return "ftoi(" + value + ')';
1352 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", 652 case Type::Uint:
1353 "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", 653 return "ftou(" + value + ')';
1354 "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"}; 654 case Type::HalfFloat:
1355 655 // Can't be handled as a stand-alone value
1356 std::string result; 656 UNREACHABLE();
1357 result += '('; 657 return value;
1358
1359 for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
1360 if (i)
1361 result += '|';
1362 result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
1363 ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a +
1364 " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")";
1365 } 658 }
1366 659 UNREACHABLE();
1367 result += ')'; 660 return value;
1368
1369 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
1370 } 661 }
1371 662
1372 void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { 663 std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
1373 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 664 switch (type) {
1374 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 665 case Type::Bool:
1375 666 case Type::Bool2:
1376 std::size_t written_components = 0; 667 case Type::Float:
1377 for (u32 component = 0; component < 4; ++component) { 668 if (needs_parenthesis) {
1378 if (!instr.texs.IsComponentEnabled(component)) { 669 return '(' + value + ')';
1379 continue;
1380 }
1381
1382 if (written_components < 2) {
1383 // Write the first two swizzle components to gpr0 and gpr0+1
1384 regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false,
1385 written_components % 2);
1386 } else {
1387 ASSERT(instr.texs.HasTwoDestinations());
1388 // Write the rest of the swizzle components to gpr28 and gpr28+1
1389 regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false,
1390 written_components % 2);
1391 } 670 }
1392 671 return value;
1393 ++written_components; 672 case Type::Int:
1394 } 673 return "itof(" + value + ')';
1395 } 674 case Type::Uint:
1396 675 return "utof(" + value + ')';
1397 void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { 676 case Type::HalfFloat:
1398 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half 677 return "fromHalf2(" + value + ')';
1399 // float instruction). 678 }
1400 679 UNREACHABLE();
1401 std::array<std::string, 4> components; 680 return value;
1402 u32 written_components = 0;
1403
1404 for (u32 component = 0; component < 4; ++component) {
1405 if (!instr.texs.IsComponentEnabled(component))
1406 continue;
1407 components[written_components++] = texture + GetSwizzle(component);
1408 }
1409 if (written_components == 0)
1410 return;
1411
1412 const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
1413 return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
1414 };
1415
1416 regs.SetRegisterToHalfFloat(
1417 instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
1418 Tegra::Shader::HalfMerge::H0_H1, 1, 1);
1419
1420 if (written_components > 2) {
1421 ASSERT(instr.texs.HasTwoDestinations());
1422 regs.SetRegisterToHalfFloat(
1423 instr.gpr28, 0,
1424 BuildComponent(components[2], components[3], written_components > 3),
1425 Tegra::Shader::HalfMerge::H0_H1, 1, 1);
1426 }
1427 } 681 }
1428 682
1429 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { 683 std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
1430 switch (texture_type) { 684 Type type_a, bool needs_parenthesis = true) {
1431 case Tegra::Shader::TextureType::Texture1D: 685 return ApplyPrecise(operation,
1432 return 1; 686 BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')',
1433 case Tegra::Shader::TextureType::Texture2D: 687 result_type, needs_parenthesis));
1434 return 2;
1435 case Tegra::Shader::TextureType::Texture3D:
1436 case Tegra::Shader::TextureType::TextureCube:
1437 return 3;
1438 default:
1439 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
1440 return 0;
1441 }
1442 } 688 }
1443 689
1444 /* 690 std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
1445 * Emits code to push the input target address to the flow address stack, incrementing the stack 691 Type type_a, Type type_b) {
1446 * top. 692 const std::string op_a = VisitOperand(operation, 0, type_a);
1447 */ 693 const std::string op_b = VisitOperand(operation, 1, type_b);
1448 void EmitPushToFlowStack(u32 target) {
1449 const auto scope = shader.Scope();
1450 694
1451 shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); 695 return ApplyPrecise(
1452 shader.AddLine("flow_stack_top++;"); 696 operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type));
1453 } 697 }
1454 698
1455 /* 699 std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
1456 * Emits code to pop an address from the flow address stack, setting the jump address to the 700 Type type_a, Type type_b) {
1457 * popped address and decrementing the stack top. 701 const std::string op_a = VisitOperand(operation, 0, type_a);
1458 */ 702 const std::string op_b = VisitOperand(operation, 1, type_b);
1459 void EmitPopFromFlowStack() {
1460 const auto scope = shader.Scope();
1461 703
1462 shader.AddLine("flow_stack_top--;"); 704 return ApplyPrecise(operation,
1463 shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); 705 BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type));
1464 shader.AddLine("break;");
1465 } 706 }
1466 707
1467 /// Writes the output values from a fragment shader to the corresponding GLSL output variables. 708 std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
1468 void EmitFragmentOutputsWrite() { 709 Type type_a, Type type_b, Type type_c) {
1469 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 710 const std::string op_a = VisitOperand(operation, 0, type_a);
711 const std::string op_b = VisitOperand(operation, 1, type_b);
712 const std::string op_c = VisitOperand(operation, 2, type_c);
1470 713
1471 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented"); 714 return ApplyPrecise(
1472 715 operation,
1473 shader.AddLine("if (alpha_test[0] != 0) {"); 716 BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type));
1474 ++shader.scope;
1475 // We start on the register containing the alpha value in the first RT.
1476 u32 current_reg = 3;
1477 for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets;
1478 ++render_target) {
1479 // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
1480 // multiple render targets are used.
1481 if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
1482 header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
1483 header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
1484 header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
1485 shader.AddLine(fmt::format("if (!AlphaFunc({})) discard;",
1486 regs.GetRegisterAsFloat(current_reg)));
1487 current_reg += 4;
1488 }
1489 }
1490 --shader.scope;
1491 shader.AddLine('}');
1492
1493 // Write the color outputs using the data in the shader registers, disabled
1494 // rendertargets/components are skipped in the register assignment.
1495 current_reg = 0;
1496 for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets;
1497 ++render_target) {
1498 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1499 for (u32 component = 0; component < 4; ++component) {
1500 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1501 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
1502 regs.GetRegisterAsFloat(current_reg)));
1503 ++current_reg;
1504 }
1505 }
1506 }
1507
1508 if (header.ps.omap.depth) {
1509 // The depth output is always 2 registers after the last color output, and current_reg
1510 // already contains one past the last color register.
1511
1512 shader.AddLine(
1513 "gl_FragDepth = " +
1514 regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
1515 ';');
1516 }
1517 } 717 }
1518 718
1519 /// Unpacks a video instruction operand (e.g. VMAD). 719 std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
1520 std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed, 720 Type type_a, Type type_b, Type type_c, Type type_d) {
1521 Tegra::Shader::VideoType type, u64 byte_height) { 721 const std::string op_a = VisitOperand(operation, 0, type_a);
1522 const std::string value = [&]() { 722 const std::string op_b = VisitOperand(operation, 1, type_b);
1523 if (!is_chunk) { 723 const std::string op_c = VisitOperand(operation, 2, type_c);
1524 const auto offset = static_cast<u32>(byte_height * 8); 724 const std::string op_d = VisitOperand(operation, 3, type_d);
1525 return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
1526 }
1527 const std::string zero = "0";
1528
1529 switch (type) {
1530 case Tegra::Shader::VideoType::Size16_Low:
1531 return '(' + op + " & 0xffff)";
1532 case Tegra::Shader::VideoType::Size16_High:
1533 return '(' + op + " >> 16)";
1534 case Tegra::Shader::VideoType::Size32:
1535 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
1536 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
1537 // explanation is found: abort.
1538 UNIMPLEMENTED();
1539 return zero;
1540 case Tegra::Shader::VideoType::Invalid:
1541 UNREACHABLE_MSG("Invalid instruction encoding");
1542 return zero;
1543 default:
1544 UNREACHABLE();
1545 return zero;
1546 }
1547 }();
1548
1549 if (is_signed) {
1550 return "int(" + value + ')';
1551 }
1552 return value;
1553 };
1554
1555 /// Gets the A operand for a video instruction.
1556 std::string GetVideoOperandA(Instruction instr) {
1557 return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
1558 instr.video.is_byte_chunk_a != 0, instr.video.signed_a,
1559 instr.video.type_a, instr.video.byte_height_a);
1560 }
1561 725
1562 /// Gets the B operand for a video instruction. 726 return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " +
1563 std::string GetVideoOperandB(Instruction instr) { 727 op_c + ", " + op_d + ')',
1564 if (instr.video.use_register_b) { 728 result_type));
1565 return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
1566 instr.video.is_byte_chunk_b != 0, instr.video.signed_b,
1567 instr.video.type_b, instr.video.byte_height_b);
1568 } else {
1569 return '(' +
1570 std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
1571 : instr.alu.GetImm20_16()) +
1572 ')';
1573 }
1574 } 729 }
1575 730
1576 std::pair<size_t, std::string> ValidateAndGetCoordinateElement( 731 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
1577 const Tegra::Shader::TextureType texture_type, const bool depth_compare, 732 const std::vector<TextureIR>& extras) {
1578 const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) { 733 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
1579 const size_t coord_count = TextureCoordinates(texture_type);
1580
1581 size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
1582 const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
1583 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
1584 UNIMPLEMENTED_MSG("Unsupported Texture operation");
1585 total_coord_count = std::min(total_coord_count, max_coords);
1586 }
1587 // 1D.DC opengl is using a vec3 but 2nd component is ignored later.
1588 total_coord_count +=
1589 (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D)
1590 ? 1
1591 : 0;
1592
1593 constexpr std::array<const char*, 5> coord_container{
1594 {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(",
1595 "vec4 coord = vec4("}};
1596
1597 return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]);
1598 }
1599
1600 std::string GetTextureCode(const Tegra::Shader::Instruction& instr,
1601 const Tegra::Shader::TextureType texture_type,
1602 const Tegra::Shader::TextureProcessMode process_mode,
1603 const bool depth_compare, const bool is_array,
1604 const size_t bias_offset) {
1605
1606 if ((texture_type == Tegra::Shader::TextureType::Texture3D &&
1607 (is_array || depth_compare)) ||
1608 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array &&
1609 depth_compare)) {
1610 UNIMPLEMENTED_MSG("This method is not supported.");
1611 }
1612
1613 const std::string sampler =
1614 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
1615
1616 const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ ||
1617 process_mode == Tegra::Shader::TextureProcessMode::LL ||
1618 process_mode == Tegra::Shader::TextureProcessMode::LLA;
1619
1620 // LOD selection (either via bias or explicit textureLod) not supported in GL for
1621 // sampler2DArrayShadow and samplerCubeArrayShadow.
1622 const bool gl_lod_supported = !(
1623 (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
1624 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
1625 734
1626 const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture("; 735 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1627 std::string texture = read_method + sampler + ", coord"; 736 ASSERT(meta);
1628 737
1629 UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None && 738 const std::size_t count = operation.GetOperandsCount();
1630 !gl_lod_supported); 739 const bool has_array = meta->sampler.IsArray();
740 const bool has_shadow = meta->sampler.IsShadow();
1631 741
1632 if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) { 742 std::string expr = "texture" + function_suffix;
1633 if (process_mode == Tegra::Shader::TextureProcessMode::LZ) { 743 if (!meta->aoffi.empty()) {
1634 texture += ", 0.0"; 744 expr += "Offset";
1635 } else {
1636 // If present, lod or bias are always stored in the register indexed by the
1637 // gpr20
1638 // field with an offset depending on the usage of the other registers
1639 texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset);
1640 }
1641 }
1642 texture += ")";
1643 return texture;
1644 }
1645
1646 std::pair<std::string, std::string> GetTEXCode(
1647 const Instruction& instr, const Tegra::Shader::TextureType texture_type,
1648 const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
1649 const bool is_array) {
1650 const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
1651 process_mode != Tegra::Shader::TextureProcessMode::LZ);
1652
1653 const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
1654 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
1655 // If enabled arrays index is always stored in the gpr8 field
1656 const u64 array_register = instr.gpr8.Value();
1657 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
1658 const u64 coord_register = array_register + (is_array ? 1 : 0);
1659
1660 std::string coord = coord_dcl;
1661 for (size_t i = 0; i < coord_count;) {
1662 coord += regs.GetRegisterAsFloat(coord_register + i);
1663 ++i;
1664 if (i != coord_count) {
1665 coord += ',';
1666 }
1667 }
1668 // 1D.DC in opengl the 2nd component is ignored.
1669 if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) {
1670 coord += ",0.0";
1671 }
1672 if (is_array) {
1673 coord += ',' + regs.GetRegisterAsInteger(array_register);
1674 }
1675 if (depth_compare) {
1676 // Depth is always stored in the register signaled by gpr20
1677 // or in the next register if lod or bias are used
1678 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
1679 coord += ',' + regs.GetRegisterAsFloat(depth_register);
1680 }
1681 coord += ");";
1682 return std::make_pair(
1683 coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0));
1684 }
1685
1686 std::pair<std::string, std::string> GetTEXSCode(
1687 const Instruction& instr, const Tegra::Shader::TextureType texture_type,
1688 const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
1689 const bool is_array) {
1690 const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
1691 process_mode != Tegra::Shader::TextureProcessMode::LZ);
1692
1693 const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
1694 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
1695 // If enabled arrays index is always stored in the gpr8 field
1696 const u64 array_register = instr.gpr8.Value();
1697 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
1698 const u64 coord_register = array_register + (is_array ? 1 : 0);
1699 const u64 last_coord_register =
1700 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
1701 ? static_cast<u64>(instr.gpr20.Value())
1702 : coord_register + 1;
1703
1704 std::string coord = coord_dcl;
1705 for (size_t i = 0; i < coord_count; ++i) {
1706 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
1707 coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i);
1708 if (i < coord_count - 1) {
1709 coord += ',';
1710 }
1711 } 745 }
746 expr += '(' + GetSampler(meta->sampler) + ", ";
747 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
748 expr += '(';
749 for (std::size_t i = 0; i < count; ++i) {
750 expr += Visit(operation[i]);
1712 751
1713 if (is_array) { 752 const std::size_t next = i + 1;
1714 coord += ',' + regs.GetRegisterAsInteger(array_register); 753 if (next < count)
754 expr += ", ";
1715 } 755 }
1716 if (depth_compare) { 756 if (has_array) {
1717 // Depth is always stored in the register signaled by gpr20 757 expr += ", float(ftoi(" + Visit(meta->array) + "))";
1718 // or in the next register if lod or bias are used
1719 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
1720 coord += ',' + regs.GetRegisterAsFloat(depth_register);
1721 } 758 }
1722 coord += ");"; 759 if (has_shadow) {
1723 760 expr += ", " + Visit(meta->depth_compare);
1724 return std::make_pair(coord,
1725 GetTextureCode(instr, texture_type, process_mode, depth_compare,
1726 is_array, (coord_count > 2 ? 1 : 0)));
1727 }
1728
1729 std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr,
1730 const Tegra::Shader::TextureType texture_type,
1731 const bool depth_compare, const bool is_array) {
1732
1733 const size_t coord_count = TextureCoordinates(texture_type);
1734 const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
1735 const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
1736
1737 constexpr std::array<const char*, 5> coord_container{
1738 {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}};
1739
1740 // If enabled arrays index is always stored in the gpr8 field
1741 const u64 array_register = instr.gpr8.Value();
1742 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
1743 const u64 coord_register = array_register + (is_array ? 1 : 0);
1744
1745 std::string coord = coord_container[total_coord_count];
1746 for (size_t i = 0; i < coord_count;) {
1747 coord += regs.GetRegisterAsFloat(coord_register + i);
1748 ++i;
1749 if (i != coord_count) {
1750 coord += ',';
1751 }
1752 }
1753
1754 if (is_array) {
1755 coord += ',' + regs.GetRegisterAsInteger(array_register);
1756 }
1757 coord += ");";
1758
1759 const std::string sampler =
1760 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
1761
1762 std::string texture = "textureGather(" + sampler + ", coord, ";
1763 if (depth_compare) {
1764 // Depth is always stored in the register signaled by gpr20
1765 texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')';
1766 } else {
1767 texture += std::to_string(instr.tld4.component) + ')';
1768 } 761 }
1769 return std::make_pair(coord, texture); 762 expr += ')';
1770 }
1771
1772 std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr,
1773 const Tegra::Shader::TextureType texture_type,
1774 const bool is_array) {
1775
1776 const size_t coord_count = TextureCoordinates(texture_type);
1777 const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
1778 const bool lod_enabled =
1779 instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL;
1780
1781 constexpr std::array<const char*, 4> coord_container{
1782 {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}};
1783
1784 std::string coord = coord_container[total_coord_count];
1785
1786 // If enabled arrays index is always stored in the gpr8 field
1787 const u64 array_register = instr.gpr8.Value();
1788
1789 // if is array gpr20 is used
1790 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
1791
1792 const u64 last_coord_register =
1793 ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array
1794 ? static_cast<u64>(instr.gpr20.Value())
1795 : coord_register + 1;
1796 763
1797 for (size_t i = 0; i < coord_count; ++i) { 764 for (const auto& variant : extras) {
1798 const bool last = (i == (coord_count - 1)) && (coord_count > 1); 765 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1799 coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i); 766 expr += GenerateTextureArgument(*argument);
1800 if (i < coord_count - 1) { 767 } else if (std::get_if<TextureAoffi>(&variant)) {
1801 coord += ','; 768 expr += GenerateTextureAoffi(meta->aoffi);
769 } else {
770 UNREACHABLE();
1802 } 771 }
1803 } 772 }
1804 if (is_array) {
1805 coord += ',' + regs.GetRegisterAsInteger(array_register);
1806 }
1807 coord += ");";
1808
1809 const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false);
1810
1811 std::string texture = "texelFetch(" + sampler + ", coords";
1812
1813 if (lod_enabled) {
1814 // When lod is used always is in grp20
1815 texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')';
1816 } else {
1817 texture += ", 0)";
1818 }
1819 return std::make_pair(coord, texture);
1820 }
1821
1822 /**
1823 * Compiles a single instruction from Tegra to GLSL.
1824 * @param offset the offset of the Tegra shader instruction.
1825 * @return the offset of the next instruction to execute. Usually it is the current offset
1826 * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
1827 */
1828 u32 CompileInstr(u32 offset) {
1829 // Ignore sched instructions when generating code.
1830 if (IsSchedInstruction(offset)) {
1831 return offset + 1;
1832 }
1833
1834 const Instruction instr = {program_code[offset]};
1835 const auto opcode = OpCode::Decode(instr);
1836
1837 // Decoding failure
1838 if (!opcode) {
1839 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
1840 return offset + 1;
1841 }
1842 773
1843 shader.AddLine( 774 return expr + ')';
1844 fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value)); 775 }
1845
1846 using Tegra::Shader::Pred;
1847 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
1848 "NeverExecute predicate not implemented");
1849
1850 // Some instructions (like SSY) don't have a predicate field, they are always
1851 // unconditionally executed.
1852 bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
1853 776
1854 if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { 777 std::string GenerateTextureArgument(TextureArgument argument) {
1855 shader.AddLine("if (" + 778 const auto [type, operand] = argument;
1856 GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + 779 if (operand == nullptr) {
1857 ')'); 780 return {};
1858 shader.AddLine('{');
1859 ++shader.scope;
1860 } 781 }
1861 782
1862 switch (opcode->get().GetType()) { 783 std::string expr = ", ";
1863 case OpCode::Type::Arithmetic: { 784 switch (type) {
1864 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 785 case Type::Int:
1865 786 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
1866 std::string op_b; 787 // Inline the string as an immediate integer in GLSL (some extra arguments are
1867 788 // required to be constant)
1868 if (instr.is_b_imm) { 789 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
1869 op_b = GetImmediate19(instr);
1870 } else { 790 } else {
1871 if (instr.is_b_gpr) { 791 expr += "ftoi(" + Visit(operand) + ')';
1872 op_b = regs.GetRegisterAsFloat(instr.gpr20);
1873 } else {
1874 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1875 GLSLRegister::Type::Float);
1876 }
1877 }
1878
1879 switch (opcode->get().GetId()) {
1880 case OpCode::Id::MOV_C:
1881 case OpCode::Id::MOV_R: {
1882 // MOV does not have neither 'abs' nor 'neg' bits.
1883 regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
1884 break;
1885 }
1886
1887 case OpCode::Id::FMUL_C:
1888 case OpCode::Id::FMUL_R:
1889 case OpCode::Id::FMUL_IMM: {
1890 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
1891 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0,
1892 "FMUL tab5cb8_2({}) is not implemented",
1893 instr.fmul.tab5cb8_2.Value());
1894 UNIMPLEMENTED_IF_MSG(
1895 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
1896 instr.fmul.tab5c68_0
1897 .Value()); // SMO typical sends 1 here which seems to be the default
1898
1899 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
1900
1901 std::string postfactor_op;
1902 if (instr.fmul.postfactor != 0) {
1903 s8 postfactor = static_cast<s8>(instr.fmul.postfactor);
1904
1905 // postfactor encoded as 3-bit 1's complement in instruction,
1906 // interpreted with below logic.
1907 if (postfactor >= 4) {
1908 postfactor = 7 - postfactor;
1909 } else {
1910 postfactor = 0 - postfactor;
1911 }
1912
1913 if (postfactor > 0) {
1914 postfactor_op = " * " + std::to_string(1 << postfactor);
1915 } else {
1916 postfactor_op = " / " + std::to_string(1 << -postfactor);
1917 }
1918 }
1919
1920 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1,
1921 instr.alu.saturate_d, instr.generates_cc, 0, true);
1922 break;
1923 }
1924 case OpCode::Id::FADD_C:
1925 case OpCode::Id::FADD_R:
1926 case OpCode::Id::FADD_IMM: {
1927 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1928 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1929
1930 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
1931 instr.alu.saturate_d, instr.generates_cc, 0, true);
1932 break;
1933 }
1934 case OpCode::Id::MUFU: {
1935 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1936 switch (instr.sub_op) {
1937 case SubOp::Cos:
1938 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
1939 instr.alu.saturate_d, false, 0, true);
1940 break;
1941 case SubOp::Sin:
1942 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
1943 instr.alu.saturate_d, false, 0, true);
1944 break;
1945 case SubOp::Ex2:
1946 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
1947 instr.alu.saturate_d, false, 0, true);
1948 break;
1949 case SubOp::Lg2:
1950 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
1951 instr.alu.saturate_d, false, 0, true);
1952 break;
1953 case SubOp::Rcp:
1954 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
1955 instr.alu.saturate_d, false, 0, true);
1956 break;
1957 case SubOp::Rsq:
1958 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
1959 instr.alu.saturate_d, false, 0, true);
1960 break;
1961 case SubOp::Sqrt:
1962 regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
1963 instr.alu.saturate_d, false, 0, true);
1964 break;
1965 default:
1966 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
1967 static_cast<unsigned>(instr.sub_op.Value()));
1968 }
1969 break;
1970 }
1971 case OpCode::Id::FMNMX_C:
1972 case OpCode::Id::FMNMX_R:
1973 case OpCode::Id::FMNMX_IMM: {
1974 UNIMPLEMENTED_IF_MSG(
1975 instr.generates_cc,
1976 "Condition codes generation in FMNMX is partially implemented");
1977
1978 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1979 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1980
1981 std::string condition =
1982 GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
1983 std::string parameters = op_a + ',' + op_b;
1984 regs.SetRegisterToFloat(instr.gpr0, 0,
1985 '(' + condition + ") ? min(" + parameters + ") : max(" +
1986 parameters + ')',
1987 1, 1, false, instr.generates_cc, 0, true);
1988 break;
1989 }
1990 case OpCode::Id::RRO_C:
1991 case OpCode::Id::RRO_R:
1992 case OpCode::Id::RRO_IMM: {
1993 // Currently RRO is only implemented as a register move.
1994 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1995 regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
1996 LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
1997 break;
1998 }
1999 default: {
2000 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
2001 }
2002 } 792 }
2003 break; 793 break;
2004 } 794 case Type::Float:
2005 case OpCode::Type::ArithmeticImmediate: { 795 expr += Visit(operand);
2006 switch (opcode->get().GetId()) {
2007 case OpCode::Id::MOV32_IMM: {
2008 regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
2009 break;
2010 }
2011 case OpCode::Id::FMUL32_IMM: {
2012 regs.SetRegisterToFloat(
2013 instr.gpr0, 0,
2014 regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1,
2015 instr.fmul32.saturate, instr.op_32.generates_cc, 0, true);
2016 break;
2017 }
2018 case OpCode::Id::FADD32I: {
2019 UNIMPLEMENTED_IF_MSG(
2020 instr.op_32.generates_cc,
2021 "Condition codes generation in FADD32I is partially implemented");
2022
2023 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
2024 std::string op_b = GetImmediate32(instr);
2025
2026 if (instr.fadd32i.abs_a) {
2027 op_a = "abs(" + op_a + ')';
2028 }
2029
2030 if (instr.fadd32i.negate_a) {
2031 op_a = "-(" + op_a + ')';
2032 }
2033
2034 if (instr.fadd32i.abs_b) {
2035 op_b = "abs(" + op_b + ')';
2036 }
2037
2038 if (instr.fadd32i.negate_b) {
2039 op_b = "-(" + op_b + ')';
2040 }
2041
2042 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false,
2043 instr.op_32.generates_cc, 0, true);
2044 break;
2045 }
2046 }
2047 break;
2048 }
2049 case OpCode::Type::Bfe: {
2050 UNIMPLEMENTED_IF(instr.bfe.negate_b);
2051
2052 std::string op_a = instr.bfe.negate_a ? "-" : "";
2053 op_a += regs.GetRegisterAsInteger(instr.gpr8);
2054
2055 switch (opcode->get().GetId()) {
2056 case OpCode::Id::BFE_IMM: {
2057 std::string inner_shift =
2058 '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
2059 std::string outer_shift =
2060 '(' + inner_shift + " >> " +
2061 std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
2062
2063 regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false,
2064 instr.generates_cc);
2065 break;
2066 }
2067 default: {
2068 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
2069 }
2070 }
2071
2072 break; 796 break;
2073 } 797 default: {
2074 case OpCode::Type::Bfi: { 798 const auto type_int = static_cast<u32>(type);
2075 const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { 799 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
2076 switch (opcode->get().GetId()) { 800 expr += '0';
2077 case OpCode::Id::BFI_IMM_R:
2078 return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2079 std::to_string(instr.alu.GetSignedImm20_20())};
2080 default:
2081 UNREACHABLE();
2082 return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2083 std::to_string(instr.alu.GetSignedImm20_20())};
2084 }
2085 }();
2086 const std::string offset = '(' + packed_shift + " & 0xff)";
2087 const std::string bits = "((" + packed_shift + " >> 8) & 0xff)";
2088 const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
2089 regs.SetRegisterToInteger(instr.gpr0, false, 0,
2090 "bitfieldInsert(" + base + ", " + insert + ", " + offset +
2091 ", " + bits + ')',
2092 1, 1, false, instr.generates_cc);
2093 break; 801 break;
2094 } 802 }
2095 case OpCode::Type::Shift: {
2096 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
2097 std::string op_b;
2098
2099 if (instr.is_b_imm) {
2100 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
2101 } else {
2102 if (instr.is_b_gpr) {
2103 op_b += regs.GetRegisterAsInteger(instr.gpr20);
2104 } else {
2105 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2106 GLSLRegister::Type::Integer);
2107 }
2108 }
2109
2110 switch (opcode->get().GetId()) {
2111 case OpCode::Id::SHR_C:
2112 case OpCode::Id::SHR_R:
2113 case OpCode::Id::SHR_IMM: {
2114 if (!instr.shift.is_signed) {
2115 // Logical shift right
2116 op_a = "uint(" + op_a + ')';
2117 }
2118
2119 // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
2120 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
2121 1, 1, false, instr.generates_cc);
2122 break;
2123 }
2124 case OpCode::Id::SHL_C:
2125 case OpCode::Id::SHL_R:
2126 case OpCode::Id::SHL_IMM:
2127 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2128 "Condition codes generation in SHL is not implemented");
2129 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false,
2130 instr.generates_cc);
2131 break;
2132 default: {
2133 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
2134 }
2135 }
2136 break;
2137 } 803 }
2138 case OpCode::Type::ArithmeticIntegerImmediate: { 804 return expr;
2139 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); 805 }
2140 std::string op_b = std::to_string(instr.alu.imm20_32.Value());
2141
2142 switch (opcode->get().GetId()) {
2143 case OpCode::Id::IADD32I:
2144 UNIMPLEMENTED_IF_MSG(
2145 instr.op_32.generates_cc,
2146 "Condition codes generation in IADD32I is partially implemented");
2147
2148 if (instr.iadd32i.negate_a)
2149 op_a = "-(" + op_a + ')';
2150
2151 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
2152 instr.iadd32i.saturate, instr.op_32.generates_cc);
2153 break;
2154 case OpCode::Id::LOP32I: {
2155
2156 if (instr.alu.lop32i.invert_a)
2157 op_a = "~(" + op_a + ')';
2158
2159 if (instr.alu.lop32i.invert_b)
2160 op_b = "~(" + op_b + ')';
2161 806
2162 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, 807 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
2163 Tegra::Shader::PredicateResultMode::None, 808 if (aoffi.empty()) {
2164 Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc); 809 return {};
2165 break;
2166 }
2167 default: {
2168 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
2169 opcode->get().GetName());
2170 }
2171 }
2172 break;
2173 } 810 }
2174 case OpCode::Type::ArithmeticInteger: { 811 constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
2175 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); 812 std::string expr = ", ";
2176 std::string op_b; 813 expr += coord_constructors.at(aoffi.size() - 1);
2177 if (instr.is_b_imm) { 814 expr += '(';
2178 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; 815
816 for (std::size_t index = 0; index < aoffi.size(); ++index) {
817 const auto operand{aoffi.at(index)};
818 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
819 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
820 // to be constant by the standard).
821 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
2179 } else { 822 } else {
2180 if (instr.is_b_gpr) { 823 expr += "ftoi(" + Visit(operand) + ')';
2181 op_b += regs.GetRegisterAsInteger(instr.gpr20);
2182 } else {
2183 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2184 GLSLRegister::Type::Integer);
2185 }
2186 }
2187
2188 switch (opcode->get().GetId()) {
2189 case OpCode::Id::IADD_C:
2190 case OpCode::Id::IADD_R:
2191 case OpCode::Id::IADD_IMM: {
2192 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2193 "Condition codes generation in IADD is partially implemented");
2194
2195 if (instr.alu_integer.negate_a)
2196 op_a = "-(" + op_a + ')';
2197
2198 if (instr.alu_integer.negate_b)
2199 op_b = "-(" + op_b + ')';
2200
2201 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
2202 instr.alu.saturate_d, instr.generates_cc);
2203 break;
2204 }
2205 case OpCode::Id::IADD3_C:
2206 case OpCode::Id::IADD3_R:
2207 case OpCode::Id::IADD3_IMM: {
2208 UNIMPLEMENTED_IF_MSG(
2209 instr.generates_cc,
2210 "Condition codes generation in IADD3 is partially implemented");
2211
2212 std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
2213
2214 auto apply_height = [](auto height, auto& oprand) {
2215 switch (height) {
2216 case Tegra::Shader::IAdd3Height::None:
2217 break;
2218 case Tegra::Shader::IAdd3Height::LowerHalfWord:
2219 oprand = "((" + oprand + ") & 0xFFFF)";
2220 break;
2221 case Tegra::Shader::IAdd3Height::UpperHalfWord:
2222 oprand = "((" + oprand + ") >> 16)";
2223 break;
2224 default:
2225 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}",
2226 static_cast<u32>(height.Value()));
2227 }
2228 };
2229
2230 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
2231 apply_height(instr.iadd3.height_a, op_a);
2232 apply_height(instr.iadd3.height_b, op_b);
2233 apply_height(instr.iadd3.height_c, op_c);
2234 }
2235
2236 if (instr.iadd3.neg_a)
2237 op_a = "-(" + op_a + ')';
2238
2239 if (instr.iadd3.neg_b)
2240 op_b = "-(" + op_b + ')';
2241
2242 if (instr.iadd3.neg_c)
2243 op_c = "-(" + op_c + ')';
2244
2245 std::string result;
2246 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
2247 switch (instr.iadd3.mode) {
2248 case Tegra::Shader::IAdd3Mode::RightShift:
2249 // TODO(tech4me): According to
2250 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
2251 // The addition between op_a and op_b should be done in uint33, more
2252 // investigation required
2253 result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')';
2254 break;
2255 case Tegra::Shader::IAdd3Mode::LeftShift:
2256 result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')';
2257 break;
2258 default:
2259 result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
2260 break;
2261 }
2262 } else {
2263 result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
2264 }
2265
2266 regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false,
2267 instr.generates_cc);
2268 break;
2269 } 824 }
2270 case OpCode::Id::ISCADD_C: 825 if (index + 1 < aoffi.size()) {
2271 case OpCode::Id::ISCADD_R: 826 expr += ", ";
2272 case OpCode::Id::ISCADD_IMM: {
2273 UNIMPLEMENTED_IF_MSG(
2274 instr.generates_cc,
2275 "Condition codes generation in ISCADD is partially implemented");
2276
2277 if (instr.alu_integer.negate_a)
2278 op_a = "-(" + op_a + ')';
2279
2280 if (instr.alu_integer.negate_b)
2281 op_b = "-(" + op_b + ')';
2282
2283 const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
2284
2285 regs.SetRegisterToInteger(instr.gpr0, true, 0,
2286 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1,
2287 false, instr.generates_cc);
2288 break;
2289 }
2290 case OpCode::Id::POPC_C:
2291 case OpCode::Id::POPC_R:
2292 case OpCode::Id::POPC_IMM: {
2293 if (instr.popc.invert) {
2294 op_b = "~(" + op_b + ')';
2295 }
2296 regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1);
2297 break;
2298 } 827 }
2299 case OpCode::Id::SEL_C:
2300 case OpCode::Id::SEL_R:
2301 case OpCode::Id::SEL_IMM: {
2302 const std::string condition =
2303 GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
2304 regs.SetRegisterToInteger(instr.gpr0, true, 0,
2305 '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
2306 break;
2307 }
2308 case OpCode::Id::LOP_C:
2309 case OpCode::Id::LOP_R:
2310 case OpCode::Id::LOP_IMM: {
2311
2312 if (instr.alu.lop.invert_a)
2313 op_a = "~(" + op_a + ')';
2314
2315 if (instr.alu.lop.invert_b)
2316 op_b = "~(" + op_b + ')';
2317
2318 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
2319 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
2320 instr.generates_cc);
2321 break;
2322 }
2323 case OpCode::Id::LOP3_C:
2324 case OpCode::Id::LOP3_R:
2325 case OpCode::Id::LOP3_IMM: {
2326 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
2327 std::string lut;
2328
2329 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
2330 lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
2331 } else {
2332 lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
2333 }
2334
2335 WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
2336 break;
2337 }
2338 case OpCode::Id::IMNMX_C:
2339 case OpCode::Id::IMNMX_R:
2340 case OpCode::Id::IMNMX_IMM: {
2341 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
2342 UNIMPLEMENTED_IF_MSG(
2343 instr.generates_cc,
2344 "Condition codes generation in IMNMX is partially implemented");
2345
2346 const std::string condition =
2347 GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
2348 const std::string parameters = op_a + ',' + op_b;
2349 regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
2350 '(' + condition + ") ? min(" + parameters + ") : max(" +
2351 parameters + ')',
2352 1, 1, false, instr.generates_cc);
2353 break;
2354 }
2355 case OpCode::Id::LEA_R2:
2356 case OpCode::Id::LEA_R1:
2357 case OpCode::Id::LEA_IMM:
2358 case OpCode::Id::LEA_RZ:
2359 case OpCode::Id::LEA_HI: {
2360 std::string op_c;
2361
2362 switch (opcode->get().GetId()) {
2363 case OpCode::Id::LEA_R2: {
2364 op_a = regs.GetRegisterAsInteger(instr.gpr20);
2365 op_b = regs.GetRegisterAsInteger(instr.gpr39);
2366 op_c = std::to_string(instr.lea.r2.entry_a);
2367 break;
2368 }
2369
2370 case OpCode::Id::LEA_R1: {
2371 const bool neg = instr.lea.r1.neg != 0;
2372 op_a = regs.GetRegisterAsInteger(instr.gpr8);
2373 if (neg)
2374 op_a = "-(" + op_a + ')';
2375 op_b = regs.GetRegisterAsInteger(instr.gpr20);
2376 op_c = std::to_string(instr.lea.r1.entry_a);
2377 break;
2378 }
2379
2380 case OpCode::Id::LEA_IMM: {
2381 const bool neg = instr.lea.imm.neg != 0;
2382 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2383 if (neg)
2384 op_b = "-(" + op_b + ')';
2385 op_a = std::to_string(instr.lea.imm.entry_a);
2386 op_c = std::to_string(instr.lea.imm.entry_b);
2387 break;
2388 }
2389
2390 case OpCode::Id::LEA_RZ: {
2391 const bool neg = instr.lea.rz.neg != 0;
2392 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2393 if (neg)
2394 op_b = "-(" + op_b + ')';
2395 op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
2396 GLSLRegister::Type::Integer);
2397 op_c = std::to_string(instr.lea.rz.entry_a);
2398
2399 break;
2400 }
2401
2402 case OpCode::Id::LEA_HI:
2403 default: {
2404 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2405 op_a = std::to_string(instr.lea.imm.entry_a);
2406 op_c = std::to_string(instr.lea.imm.entry_b);
2407 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
2408 }
2409 }
2410 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
2411 "Unhandled LEA Predicate");
2412 const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
2413 regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false,
2414 instr.generates_cc);
2415
2416 break;
2417 }
2418 default: {
2419 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}",
2420 opcode->get().GetName());
2421 }
2422 }
2423
2424 break;
2425 } 828 }
2426 case OpCode::Type::ArithmeticHalf: { 829 expr += ')';
2427 if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
2428 opcode->get().GetId() == OpCode::Id::HADD2_R) {
2429 UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
2430 }
2431 const bool negate_a =
2432 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
2433 const bool negate_b =
2434 opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
2435
2436 const std::string op_a =
2437 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a,
2438 instr.alu_half.abs_a != 0, negate_a);
2439
2440 std::string op_b;
2441 switch (opcode->get().GetId()) {
2442 case OpCode::Id::HADD2_C:
2443 case OpCode::Id::HMUL2_C:
2444 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2445 GLSLRegister::Type::UnsignedInteger);
2446 break;
2447 case OpCode::Id::HADD2_R:
2448 case OpCode::Id::HMUL2_R:
2449 op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false);
2450 break;
2451 default:
2452 UNREACHABLE();
2453 op_b = "0";
2454 break;
2455 }
2456 op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b);
2457
2458 const std::string result = [&]() {
2459 switch (opcode->get().GetId()) {
2460 case OpCode::Id::HADD2_C:
2461 case OpCode::Id::HADD2_R:
2462 return '(' + op_a + " + " + op_b + ')';
2463 case OpCode::Id::HMUL2_C:
2464 case OpCode::Id::HMUL2_R:
2465 return '(' + op_a + " * " + op_b + ')';
2466 default:
2467 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}",
2468 opcode->get().GetName());
2469 return std::string("0");
2470 }
2471 }();
2472 830
2473 regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1, 831 return expr;
2474 instr.alu_half.saturate != 0); 832 }
2475 break;
2476 }
2477 case OpCode::Type::ArithmeticHalfImmediate: {
2478 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
2479 UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
2480 } else {
2481 UNIMPLEMENTED_IF(instr.alu_half_imm.precision !=
2482 Tegra::Shader::HalfPrecision::None);
2483 }
2484 833
2485 const std::string op_a = GetHalfFloat( 834 std::string Assign(Operation operation) {
2486 regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a, 835 const Node dest = operation[0];
2487 instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0); 836 const Node src = operation[1];
2488 837
2489 const std::string op_b = UnpackHalfImmediate(instr, true); 838 std::string target;
839 if (const auto gpr = std::get_if<GprNode>(dest)) {
840 if (gpr->GetIndex() == Register::ZeroIndex) {
841 // Writing to Register::ZeroIndex is a no op
842 return {};
843 }
844 target = GetRegister(gpr->GetIndex());
2490 845
2491 const std::string result = [&]() { 846 } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
2492 switch (opcode->get().GetId()) { 847 target = [&]() -> std::string {
2493 case OpCode::Id::HADD2_IMM: 848 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
2494 return op_a + " + " + op_b; 849 case Attribute::Index::Position:
2495 case OpCode::Id::HMUL2_IMM: 850 return "position" + GetSwizzle(abuf->GetElement());
2496 return op_a + " * " + op_b; 851 case Attribute::Index::PointSize:
852 return "gl_PointSize";
853 case Attribute::Index::ClipDistances0123:
854 return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']';
855 case Attribute::Index::ClipDistances4567:
856 return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']';
2497 default: 857 default:
2498 UNREACHABLE(); 858 if (attribute >= Attribute::Index::Attribute_0 &&
2499 return std::string("0"); 859 attribute <= Attribute::Index::Attribute_31) {
860 return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
861 }
862 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
863 static_cast<u32>(attribute));
864 return "0";
2500 } 865 }
2501 }(); 866 }();
2502 867
2503 regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1, 868 } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
2504 instr.alu_half_imm.saturate != 0); 869 target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
2505 break;
2506 }
2507 case OpCode::Type::Ffma: {
2508 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
2509 std::string op_b = instr.ffma.negate_b ? "-" : "";
2510 std::string op_c = instr.ffma.negate_c ? "-" : "";
2511
2512 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
2513 UNIMPLEMENTED_IF_MSG(
2514 instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
2515 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
2516 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
2517 instr.ffma.tab5980_1.Value());
2518 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2519 "Condition codes generation in FFMA is partially implemented");
2520
2521 switch (opcode->get().GetId()) {
2522 case OpCode::Id::FFMA_CR: {
2523 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2524 GLSLRegister::Type::Float);
2525 op_c += regs.GetRegisterAsFloat(instr.gpr39);
2526 break;
2527 }
2528 case OpCode::Id::FFMA_RR: {
2529 op_b += regs.GetRegisterAsFloat(instr.gpr20);
2530 op_c += regs.GetRegisterAsFloat(instr.gpr39);
2531 break;
2532 }
2533 case OpCode::Id::FFMA_RC: {
2534 op_b += regs.GetRegisterAsFloat(instr.gpr39);
2535 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2536 GLSLRegister::Type::Float);
2537 break;
2538 }
2539 case OpCode::Id::FFMA_IMM: {
2540 op_b += GetImmediate19(instr);
2541 op_c += regs.GetRegisterAsFloat(instr.gpr39);
2542 break;
2543 }
2544 default: {
2545 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
2546 }
2547 }
2548 870
2549 regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', 871 } else {
2550 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true); 872 UNREACHABLE_MSG("Assign called without a proper target");
2551 break;
2552 } 873 }
2553 case OpCode::Type::Hfma2: {
2554 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
2555 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None);
2556 } else {
2557 UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None);
2558 }
2559 const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR
2560 ? instr.hfma2.rr.saturate != 0
2561 : instr.hfma2.saturate != 0;
2562
2563 const std::string op_a =
2564 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a);
2565 std::string op_b, op_c;
2566
2567 switch (opcode->get().GetId()) {
2568 case OpCode::Id::HFMA2_CR:
2569 op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2570 GLSLRegister::Type::UnsignedInteger),
2571 instr.hfma2.type_b, false, instr.hfma2.negate_b);
2572 op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2573 instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
2574 break;
2575 case OpCode::Id::HFMA2_RC:
2576 op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2577 instr.hfma2.type_reg39, false, instr.hfma2.negate_b);
2578 op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2579 GLSLRegister::Type::UnsignedInteger),
2580 instr.hfma2.type_b, false, instr.hfma2.negate_c);
2581 break;
2582 case OpCode::Id::HFMA2_RR:
2583 op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
2584 instr.hfma2.type_b, false, instr.hfma2.negate_b);
2585 op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2586 instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c);
2587 break;
2588 case OpCode::Id::HFMA2_IMM_R:
2589 op_b = UnpackHalfImmediate(instr, true);
2590 op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2591 instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
2592 break;
2593 default:
2594 UNREACHABLE();
2595 op_c = op_b = "vec2(0)";
2596 break;
2597 }
2598 874
2599 const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; 875 code.AddLine(target + " = " + Visit(src) + ';');
876 return {};
877 }
2600 878
2601 regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); 879 std::string Composite(Operation operation) {
2602 break; 880 std::string value = "vec4(";
881 for (std::size_t i = 0; i < 4; ++i) {
882 value += Visit(operation[i]);
883 if (i < 3)
884 value += ", ";
2603 } 885 }
2604 case OpCode::Type::Conversion: { 886 value += ')';
2605 switch (opcode->get().GetId()) { 887 return value;
2606 case OpCode::Id::I2I_R: { 888 }
2607 UNIMPLEMENTED_IF(instr.conversion.selector);
2608
2609 std::string op_a = regs.GetRegisterAsInteger(
2610 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
2611 889
2612 if (instr.conversion.abs_a) { 890 template <Type type>
2613 op_a = "abs(" + op_a + ')'; 891 std::string Add(Operation operation) {
2614 } 892 return GenerateBinaryInfix(operation, "+", type, type, type);
893 }
2615 894
2616 if (instr.conversion.negate_a) { 895 template <Type type>
2617 op_a = "-(" + op_a + ')'; 896 std::string Mul(Operation operation) {
2618 } 897 return GenerateBinaryInfix(operation, "*", type, type, type);
898 }
2619 899
2620 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 900 template <Type type>
2621 1, instr.alu.saturate_d, instr.generates_cc, 0, 901 std::string Div(Operation operation) {
2622 instr.conversion.dest_size); 902 return GenerateBinaryInfix(operation, "/", type, type, type);
2623 break; 903 }
2624 }
2625 case OpCode::Id::I2F_R:
2626 case OpCode::Id::I2F_C: {
2627 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
2628 UNIMPLEMENTED_IF(instr.conversion.selector);
2629 std::string op_a;
2630
2631 if (instr.is_b_gpr) {
2632 op_a =
2633 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed,
2634 instr.conversion.src_size);
2635 } else {
2636 op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2637 instr.conversion.is_input_signed
2638 ? GLSLRegister::Type::Integer
2639 : GLSLRegister::Type::UnsignedInteger,
2640 instr.conversion.src_size);
2641 }
2642 904
2643 if (instr.conversion.abs_a) { 905 template <Type type>
2644 op_a = "abs(" + op_a + ')'; 906 std::string Fma(Operation operation) {
2645 } 907 return GenerateTernary(operation, "fma", type, type, type, type);
908 }
2646 909
2647 if (instr.conversion.negate_a) { 910 template <Type type>
2648 op_a = "-(" + op_a + ')'; 911 std::string Negate(Operation operation) {
2649 } 912 return GenerateUnary(operation, "-", type, type, true);
913 }
2650 914
2651 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc); 915 template <Type type>
2652 break; 916 std::string Absolute(Operation operation) {
2653 } 917 return GenerateUnary(operation, "abs", type, type, false);
2654 case OpCode::Id::F2F_R: { 918 }
2655 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
2656 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
2657 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
2658 919
2659 if (instr.conversion.abs_a) { 920 std::string FClamp(Operation operation) {
2660 op_a = "abs(" + op_a + ')'; 921 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
2661 } 922 Type::Float);
923 }
2662 924
2663 if (instr.conversion.negate_a) { 925 template <Type type>
2664 op_a = "-(" + op_a + ')'; 926 std::string Min(Operation operation) {
2665 } 927 return GenerateBinaryCall(operation, "min", type, type, type);
928 }
2666 929
2667 switch (instr.conversion.f2f.rounding) { 930 template <Type type>
2668 case Tegra::Shader::F2fRoundingOp::None: 931 std::string Max(Operation operation) {
2669 break; 932 return GenerateBinaryCall(operation, "max", type, type, type);
2670 case Tegra::Shader::F2fRoundingOp::Round: 933 }
2671 op_a = "roundEven(" + op_a + ')';
2672 break;
2673 case Tegra::Shader::F2fRoundingOp::Floor:
2674 op_a = "floor(" + op_a + ')';
2675 break;
2676 case Tegra::Shader::F2fRoundingOp::Ceil:
2677 op_a = "ceil(" + op_a + ')';
2678 break;
2679 case Tegra::Shader::F2fRoundingOp::Trunc:
2680 op_a = "trunc(" + op_a + ')';
2681 break;
2682 default:
2683 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
2684 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
2685 break;
2686 }
2687 934
2688 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d, 935 std::string Select(Operation operation) {
2689 instr.generates_cc); 936 const std::string condition = Visit(operation[0]);
2690 break; 937 const std::string true_case = Visit(operation[1]);
2691 } 938 const std::string false_case = Visit(operation[2]);
2692 case OpCode::Id::F2I_R: 939 return ApplyPrecise(operation,
2693 case OpCode::Id::F2I_C: { 940 '(' + condition + " ? " + true_case + " : " + false_case + ')');
2694 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); 941 }
2695 std::string op_a{};
2696 942
2697 if (instr.is_b_gpr) { 943 std::string FCos(Operation operation) {
2698 op_a = regs.GetRegisterAsFloat(instr.gpr20); 944 return GenerateUnary(operation, "cos", Type::Float, Type::Float, false);
2699 } else { 945 }
2700 op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2701 GLSLRegister::Type::Float);
2702 }
2703 946
2704 if (instr.conversion.abs_a) { 947 std::string FSin(Operation operation) {
2705 op_a = "abs(" + op_a + ')'; 948 return GenerateUnary(operation, "sin", Type::Float, Type::Float, false);
2706 } 949 }
2707 950
2708 if (instr.conversion.negate_a) { 951 std::string FExp2(Operation operation) {
2709 op_a = "-(" + op_a + ')'; 952 return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false);
2710 } 953 }
2711 954
2712 switch (instr.conversion.f2i.rounding) { 955 std::string FLog2(Operation operation) {
2713 case Tegra::Shader::F2iRoundingOp::None: 956 return GenerateUnary(operation, "log2", Type::Float, Type::Float, false);
2714 break; 957 }
2715 case Tegra::Shader::F2iRoundingOp::Floor:
2716 op_a = "floor(" + op_a + ')';
2717 break;
2718 case Tegra::Shader::F2iRoundingOp::Ceil:
2719 op_a = "ceil(" + op_a + ')';
2720 break;
2721 case Tegra::Shader::F2iRoundingOp::Trunc:
2722 op_a = "trunc(" + op_a + ')';
2723 break;
2724 default:
2725 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
2726 static_cast<u32>(instr.conversion.f2i.rounding.Value()));
2727 break;
2728 }
2729 958
2730 if (instr.conversion.is_output_signed) { 959 std::string FInverseSqrt(Operation operation) {
2731 op_a = "int(" + op_a + ')'; 960 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false);
2732 } else { 961 }
2733 op_a = "uint(" + op_a + ')';
2734 }
2735 962
2736 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 963 std::string FSqrt(Operation operation) {
2737 1, false, instr.generates_cc, 0, 964 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false);
2738 instr.conversion.dest_size); 965 }
2739 break;
2740 }
2741 default: {
2742 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
2743 }
2744 }
2745 break;
2746 }
2747 case OpCode::Type::Memory: {
2748 switch (opcode->get().GetId()) {
2749 case OpCode::Id::LD_A: {
2750 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
2751 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
2752 "Indirect attribute loads are not supported");
2753 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
2754 "Unaligned attribute loads are not supported");
2755
2756 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
2757 Tegra::Shader::IpaSampleMode::Default};
2758
2759 u64 next_element = instr.attribute.fmt20.element;
2760 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
2761
2762 const auto LoadNextElement = [&](u32 reg_offset) {
2763 regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
2764 static_cast<Attribute::Index>(next_index),
2765 input_mode, instr.gpr39.Value());
2766
2767 // Load the next attribute element into the following register. If the element
2768 // to load goes beyond the vec4 size, load the first element of the next
2769 // attribute.
2770 next_element = (next_element + 1) % 4;
2771 next_index = next_index + (next_element == 0 ? 1 : 0);
2772 };
2773
2774 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
2775 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
2776 LoadNextElement(reg_offset);
2777 }
2778 break;
2779 }
2780 case OpCode::Id::LD_C: {
2781 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
2782
2783 const auto scope = shader.Scope();
2784
2785 shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
2786 " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
2787
2788 const std::string op_a =
2789 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
2790 GLSLRegister::Type::Float);
2791
2792 switch (instr.ld_c.type.Value()) {
2793 case Tegra::Shader::UniformType::Single:
2794 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2795 break;
2796
2797 case Tegra::Shader::UniformType::Double: {
2798 const std::string op_b =
2799 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
2800 "index", GLSLRegister::Type::Float);
2801 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2802 regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
2803 break;
2804 }
2805 default:
2806 UNIMPLEMENTED_MSG("Unhandled type: {}",
2807 static_cast<unsigned>(instr.ld_c.type.Value()));
2808 }
2809 break;
2810 }
2811 case OpCode::Id::LD_L: {
2812 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
2813 static_cast<unsigned>(instr.ld_l.unknown.Value()));
2814 966
2815 const auto scope = shader.Scope(); 967 std::string FRoundEven(Operation operation) {
968 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false);
969 }
2816 970
2817 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + 971 std::string FFloor(Operation operation) {
2818 std::to_string(instr.smem_imm.Value()) + ')'; 972 return GenerateUnary(operation, "floor", Type::Float, Type::Float, false);
973 }
2819 974
2820 shader.AddLine("uint index = (" + op + " / 4);"); 975 std::string FCeil(Operation operation) {
976 return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false);
977 }
2821 978
2822 const std::string op_a = regs.GetLocalMemoryAsFloat("index"); 979 std::string FTrunc(Operation operation) {
980 return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false);
981 }
2823 982
2824 switch (instr.ldst_sl.type.Value()) { 983 template <Type type>
2825 case Tegra::Shader::StoreType::Bytes32: 984 std::string FCastInteger(Operation operation) {
2826 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 985 return GenerateUnary(operation, "float", Type::Float, type, false);
2827 break; 986 }
2828 default:
2829 UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
2830 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2831 }
2832 break;
2833 }
2834 case OpCode::Id::ST_A: {
2835 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
2836 "Indirect attribute loads are not supported");
2837 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
2838 "Unaligned attribute loads are not supported");
2839
2840 u64 next_element = instr.attribute.fmt20.element;
2841 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
2842
2843 const auto StoreNextElement = [&](u32 reg_offset) {
2844 regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
2845 next_element, instr.gpr0.Value() + reg_offset,
2846 instr.gpr39.Value());
2847
2848 // Load the next attribute element into the following register. If the element
2849 // to load goes beyond the vec4 size, load the first element of the next
2850 // attribute.
2851 next_element = (next_element + 1) % 4;
2852 next_index = next_index + (next_element == 0 ? 1 : 0);
2853 };
2854
2855 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
2856 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
2857 StoreNextElement(reg_offset);
2858 }
2859 987
2860 break; 988 std::string ICastFloat(Operation operation) {
2861 } 989 return GenerateUnary(operation, "int", Type::Int, Type::Float, false);
2862 case OpCode::Id::ST_L: { 990 }
2863 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
2864 static_cast<unsigned>(instr.st_l.unknown.Value()));
2865 991
2866 const auto scope = shader.Scope(); 992 std::string ICastUnsigned(Operation operation) {
993 return GenerateUnary(operation, "int", Type::Int, Type::Uint, false);
994 }
2867 995
2868 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + 996 template <Type type>
2869 std::to_string(instr.smem_imm.Value()) + ')'; 997 std::string LogicalShiftLeft(Operation operation) {
998 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
999 }
2870 1000
2871 shader.AddLine("uint index = (" + op + " / 4);"); 1001 std::string ILogicalShiftRight(Operation operation) {
1002 const std::string op_a = VisitOperand(operation, 0, Type::Uint);
1003 const std::string op_b = VisitOperand(operation, 1, Type::Uint);
2872 1004
2873 switch (instr.ldst_sl.type.Value()) { 1005 return ApplyPrecise(operation,
2874 case Tegra::Shader::StoreType::Bytes32: 1006 BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int));
2875 regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); 1007 }
2876 break;
2877 default:
2878 UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
2879 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2880 }
2881 break;
2882 }
2883 case OpCode::Id::TEX: {
2884 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
2885 const bool is_array = instr.tex.array != 0;
2886 const bool depth_compare =
2887 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2888 const auto process_mode = instr.tex.GetTextureProcessMode();
2889 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2890 "NODEP is not implemented");
2891 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2892 "AOFFI is not implemented");
2893
2894 const auto [coord, texture] =
2895 GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array);
2896
2897 const auto scope = shader.Scope();
2898 shader.AddLine(coord);
2899
2900 if (depth_compare) {
2901 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1);
2902 } else {
2903 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2904 std::size_t dest_elem{};
2905 for (std::size_t elem = 0; elem < 4; ++elem) {
2906 if (!instr.tex.IsComponentEnabled(elem)) {
2907 // Skip disabled components
2908 continue;
2909 }
2910 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
2911 dest_elem);
2912 ++dest_elem;
2913 }
2914 }
2915 break;
2916 }
2917 case OpCode::Id::TEXS: {
2918 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
2919 const bool is_array{instr.texs.IsArrayTexture()};
2920 const bool depth_compare =
2921 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2922 const auto process_mode = instr.texs.GetTextureProcessMode();
2923 1008
2924 UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 1009 std::string IArithmeticShiftRight(Operation operation) {
2925 "NODEP is not implemented"); 1010 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
1011 }
2926 1012
2927 const auto scope = shader.Scope(); 1013 template <Type type>
1014 std::string BitwiseAnd(Operation operation) {
1015 return GenerateBinaryInfix(operation, "&", type, type, type);
1016 }
2928 1017
2929 auto [coord, texture] = 1018 template <Type type>
2930 GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); 1019 std::string BitwiseOr(Operation operation) {
1020 return GenerateBinaryInfix(operation, "|", type, type, type);
1021 }
2931 1022
2932 shader.AddLine(coord); 1023 template <Type type>
1024 std::string BitwiseXor(Operation operation) {
1025 return GenerateBinaryInfix(operation, "^", type, type, type);
1026 }
2933 1027
2934 if (depth_compare) { 1028 template <Type type>
2935 texture = "vec4(" + texture + ')'; 1029 std::string BitwiseNot(Operation operation) {
2936 } 1030 return GenerateUnary(operation, "~", type, type, false);
2937 shader.AddLine("vec4 texture_tmp = " + texture + ';'); 1031 }
2938 1032
2939 if (instr.texs.fp32_flag) { 1033 std::string UCastFloat(Operation operation) {
2940 WriteTexsInstructionFloat(instr, "texture_tmp"); 1034 return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false);
2941 } else { 1035 }
2942 WriteTexsInstructionHalfFloat(instr, "texture_tmp");
2943 }
2944 break;
2945 }
2946 case OpCode::Id::TLDS: {
2947 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
2948 const bool is_array{instr.tlds.IsArrayTexture()};
2949 1036
2950 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 1037 std::string UCastSigned(Operation operation) {
2951 "NODEP is not implemented"); 1038 return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false);
2952 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), 1039 }
2953 "AOFFI is not implemented");
2954 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
2955 "MZ is not implemented");
2956 1040
2957 const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array); 1041 std::string UShiftRight(Operation operation) {
1042 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
1043 }
2958 1044
2959 const auto scope = shader.Scope(); 1045 template <Type type>
1046 std::string BitfieldInsert(Operation operation) {
1047 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
1048 Type::Int);
1049 }
2960 1050
2961 shader.AddLine(coord); 1051 template <Type type>
2962 shader.AddLine("vec4 texture_tmp = " + texture + ';'); 1052 std::string BitfieldExtract(Operation operation) {
2963 WriteTexsInstructionFloat(instr, "texture_tmp"); 1053 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
2964 break; 1054 }
2965 }
2966 case OpCode::Id::TLD4: {
2967
2968 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2969 "NODEP is not implemented");
2970 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2971 "AOFFI is not implemented");
2972 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
2973 "NDV is not implemented");
2974 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
2975 "PTP is not implemented");
2976
2977 auto texture_type = instr.tld4.texture_type.Value();
2978 const bool depth_compare =
2979 instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2980 const bool is_array = instr.tld4.array != 0;
2981
2982 const auto [coord, texture] =
2983 GetTLD4Code(instr, texture_type, depth_compare, is_array);
2984
2985 const auto scope = shader.Scope();
2986
2987 shader.AddLine(coord);
2988 std::size_t dest_elem{};
2989
2990 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2991 for (std::size_t elem = 0; elem < 4; ++elem) {
2992 if (!instr.tex.IsComponentEnabled(elem)) {
2993 // Skip disabled components
2994 continue;
2995 }
2996 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
2997 dest_elem);
2998 ++dest_elem;
2999 }
3000 break;
3001 }
3002 case OpCode::Id::TLD4S: {
3003 UNIMPLEMENTED_IF_MSG(
3004 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3005 "NODEP is not implemented");
3006 UNIMPLEMENTED_IF_MSG(
3007 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
3008 "AOFFI is not implemented");
3009 1055
3010 const auto scope = shader.Scope(); 1056 template <Type type>
1057 std::string BitCount(Operation operation) {
1058 return GenerateUnary(operation, "bitCount", type, type, false);
1059 }
3011 1060
3012 std::string coords; 1061 std::string HNegate(Operation operation) {
1062 const auto GetNegate = [&](std::size_t index) -> std::string {
1063 return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
1064 };
1065 const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" +
1066 GetNegate(1) + ", " + GetNegate(2) + "))";
1067 return BitwiseCastResult(value, Type::HalfFloat);
1068 }
3013 1069
3014 const bool depth_compare = 1070 std::string HMergeF32(Operation operation) {
3015 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 1071 return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
1072 }
3016 1073
3017 const std::string sampler = GetSampler( 1074 std::string HMergeH0(Operation operation) {
3018 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); 1075 return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" +
1076 Visit(operation[1]) + ")[0]))";
1077 }
3019 1078
3020 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1079 std::string HMergeH1(Operation operation) {
3021 coords = "vec2 coords = vec2(" + op_a + ", "; 1080 return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" +
3022 std::string texture = "textureGather(" + sampler + ", coords, "; 1081 Visit(operation[1]) + ")[1]))";
1082 }
3023 1083
3024 if (!depth_compare) { 1084 std::string HPack2(Operation operation) {
3025 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); 1085 return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))";
3026 coords += op_b + ");"; 1086 }
3027 texture += std::to_string(instr.tld4s.component) + ')';
3028 } else {
3029 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
3030 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
3031 coords += op_b + ");";
3032 texture += op_c + ')';
3033 }
3034 shader.AddLine(coords);
3035 shader.AddLine("vec4 texture_tmp = " + texture + ';');
3036 WriteTexsInstructionFloat(instr, "texture_tmp");
3037 break;
3038 }
3039 case OpCode::Id::TXQ: {
3040 UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3041 "NODEP is not implemented");
3042
3043 const auto scope = shader.Scope();
3044
3045 // TODO: The new commits on the texture refactor, change the way samplers work.
3046 // Sadly, not all texture instructions specify the type of texture their sampler
3047 // uses. This must be fixed at a later instance.
3048 const std::string sampler =
3049 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
3050 switch (instr.txq.query_type) {
3051 case Tegra::Shader::TextureQueryType::Dimension: {
3052 const std::string texture = "textureSize(" + sampler + ", " +
3053 regs.GetRegisterAsInteger(instr.gpr8) + ')';
3054 const std::string mip_level = "textureQueryLevels(" + sampler + ')';
3055 shader.AddLine("ivec2 sizes = " + texture + ';');
3056
3057 regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1);
3058 regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
3059 regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
3060 regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
3061 break;
3062 }
3063 default: {
3064 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
3065 static_cast<u32>(instr.txq.query_type.Value()));
3066 }
3067 }
3068 break;
3069 }
3070 case OpCode::Id::TMML: {
3071 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3072 "NODEP is not implemented");
3073 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
3074 "NDV is not implemented");
3075
3076 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
3077 const bool is_array = instr.tmml.array != 0;
3078 auto texture_type = instr.tmml.texture_type.Value();
3079 const std::string sampler =
3080 GetSampler(instr.sampler, texture_type, is_array, false);
3081
3082 const auto scope = shader.Scope();
3083
3084 // TODO: Add coordinates for different samplers once other texture types are
3085 // implemented.
3086 switch (texture_type) {
3087 case Tegra::Shader::TextureType::Texture1D: {
3088 shader.AddLine("float coords = " + x + ';');
3089 break;
3090 }
3091 case Tegra::Shader::TextureType::Texture2D: {
3092 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
3093 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
3094 break;
3095 }
3096 default:
3097 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
3098 1087
3099 // Fallback to interpreting as a 2D texture for now 1088 template <Type type>
3100 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 1089 std::string LogicalLessThan(Operation operation) {
3101 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 1090 return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
3102 texture_type = Tegra::Shader::TextureType::Texture2D; 1091 }
3103 }
3104 1092
3105 const std::string texture = "textureQueryLod(" + sampler + ", coords)"; 1093 template <Type type>
3106 shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); 1094 std::string LogicalEqual(Operation operation) {
1095 return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
1096 }
3107 1097
3108 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); 1098 template <Type type>
3109 regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); 1099 std::string LogicalLessEqual(Operation operation) {
3110 break; 1100 return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
3111 } 1101 }
3112 default: {
3113 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
3114 }
3115 }
3116 break;
3117 }
3118 case OpCode::Type::FloatSetPredicate: {
3119 const std::string op_a =
3120 GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), instr.fsetp.abs_a != 0,
3121 instr.fsetp.neg_a != 0);
3122 1102
3123 std::string op_b; 1103 template <Type type>
1104 std::string LogicalGreaterThan(Operation operation) {
1105 return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
1106 }
3124 1107
3125 if (instr.is_b_imm) { 1108 template <Type type>
3126 op_b += '(' + GetImmediate19(instr) + ')'; 1109 std::string LogicalNotEqual(Operation operation) {
3127 } else { 1110 return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
3128 if (instr.is_b_gpr) { 1111 }
3129 op_b += regs.GetRegisterAsFloat(instr.gpr20);
3130 } else {
3131 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3132 GLSLRegister::Type::Float);
3133 }
3134 }
3135 1112
3136 if (instr.fsetp.abs_b) { 1113 template <Type type>
3137 op_b = "abs(" + op_b + ')'; 1114 std::string LogicalGreaterEqual(Operation operation) {
3138 } 1115 return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
1116 }
3139 1117
3140 // We can't use the constant predicate as destination. 1118 std::string LogicalFIsNan(Operation operation) {
3141 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); 1119 return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false);
1120 }
3142 1121
3143 const std::string second_pred = 1122 std::string LogicalAssign(Operation operation) {
3144 GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); 1123 const Node dest = operation[0];
1124 const Node src = operation[1];
3145 1125
3146 const std::string combiner = GetPredicateCombiner(instr.fsetp.op); 1126 std::string target;
3147 1127
3148 const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); 1128 if (const auto pred = std::get_if<PredicateNode>(dest)) {
3149 // Set the primary predicate to the result of Predicate OP SecondPredicate 1129 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
3150 SetPredicate(instr.fsetp.pred3,
3151 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3152 1130
3153 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 1131 const auto index = pred->GetIndex();
3154 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, 1132 switch (index) {
3155 // if enabled 1133 case Tegra::Shader::Pred::NeverExecute:
3156 SetPredicate(instr.fsetp.pred0, 1134 case Tegra::Shader::Pred::UnusedIndex:
3157 "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); 1135 // Writing to these predicates is a no-op
1136 return {};
3158 } 1137 }
3159 break; 1138 target = GetPredicate(index);
1139 } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
1140 target = GetInternalFlag(flag->GetFlag());
3160 } 1141 }
3161 case OpCode::Type::IntegerSetPredicate: {
3162 const std::string op_a =
3163 regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
3164 std::string op_b;
3165
3166 if (instr.is_b_imm) {
3167 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
3168 } else {
3169 if (instr.is_b_gpr) {
3170 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
3171 } else {
3172 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3173 GLSLRegister::Type::Integer);
3174 }
3175 }
3176 1142
3177 // We can't use the constant predicate as destination. 1143 code.AddLine(target + " = " + Visit(src) + ';');
3178 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); 1144 return {};
1145 }
3179 1146
3180 const std::string second_pred = 1147 std::string LogicalAnd(Operation operation) {
3181 GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); 1148 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
1149 }
3182 1150
3183 const std::string combiner = GetPredicateCombiner(instr.isetp.op); 1151 std::string LogicalOr(Operation operation) {
1152 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
1153 }
3184 1154
3185 const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); 1155 std::string LogicalXor(Operation operation) {
3186 // Set the primary predicate to the result of Predicate OP SecondPredicate 1156 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
3187 SetPredicate(instr.isetp.pred3, 1157 }
3188 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3189 1158
3190 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 1159 std::string LogicalNegate(Operation operation) {
3191 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, 1160 return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false);
3192 // if enabled 1161 }
3193 SetPredicate(instr.isetp.pred0,
3194 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3195 }
3196 break;
3197 }
3198 case OpCode::Type::HalfSetPredicate: {
3199 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
3200
3201 const std::string op_a =
3202 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a,
3203 instr.hsetp2.abs_a, instr.hsetp2.negate_a);
3204
3205 const std::string op_b = [&]() {
3206 switch (opcode->get().GetId()) {
3207 case OpCode::Id::HSETP2_R:
3208 return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
3209 instr.hsetp2.type_b, instr.hsetp2.abs_a,
3210 instr.hsetp2.negate_b);
3211 default:
3212 UNREACHABLE();
3213 return std::string("vec2(0)");
3214 }
3215 }();
3216 1162
3217 // We can't use the constant predicate as destination. 1163 std::string LogicalPick2(Operation operation) {
3218 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 1164 const std::string pair = VisitOperand(operation, 0, Type::Bool2);
1165 return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']';
1166 }
3219 1167
3220 const std::string second_pred = 1168 std::string LogicalAll2(Operation operation) {
3221 GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 1169 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1170 }
3222 1171
3223 const std::string combiner = GetPredicateCombiner(instr.hsetp2.op); 1172 std::string LogicalAny2(Operation operation) {
1173 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1174 }
3224 1175
3225 const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||"; 1176 std::string Logical2HLessThan(Operation operation) {
3226 const std::string predicate = 1177 return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat,
3227 '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' + 1178 Type::HalfFloat);
3228 component_combiner + ' ' + 1179 }
3229 GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')';
3230 1180
3231 // Set the primary predicate to the result of Predicate OP SecondPredicate 1181 std::string Logical2HEqual(Operation operation) {
3232 SetPredicate(instr.hsetp2.pred3, 1182 return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat,
3233 '(' + predicate + ") " + combiner + " (" + second_pred + ')'); 1183 Type::HalfFloat);
1184 }
3234 1185
3235 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 1186 std::string Logical2HLessEqual(Operation operation) {
3236 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, 1187 return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat,
3237 // if enabled 1188 Type::HalfFloat);
3238 SetPredicate(instr.hsetp2.pred0, 1189 }
3239 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3240 }
3241 break;
3242 }
3243 case OpCode::Type::PredicateSetRegister: {
3244 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3245 "Condition codes generation in PSET is partially implemented");
3246
3247 const std::string op_a =
3248 GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
3249 const std::string op_b =
3250 GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
3251
3252 const std::string second_pred =
3253 GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
3254
3255 const std::string combiner = GetPredicateCombiner(instr.pset.op);
3256
3257 const std::string predicate =
3258 '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
3259 const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
3260 if (instr.pset.bf == 0) {
3261 const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
3262 regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false,
3263 instr.generates_cc);
3264 } else {
3265 const std::string value = '(' + result + ") ? 1.0 : 0.0";
3266 regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc);
3267 }
3268 break;
3269 }
3270 case OpCode::Type::PredicateSetPredicate: {
3271 switch (opcode->get().GetId()) {
3272 case OpCode::Id::PSETP: {
3273 const std::string op_a =
3274 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
3275 const std::string op_b =
3276 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
3277
3278 // We can't use the constant predicate as destination.
3279 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
3280
3281 const std::string second_pred =
3282 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
3283
3284 const std::string combiner = GetPredicateCombiner(instr.psetp.op);
3285
3286 const std::string predicate =
3287 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
3288
3289 // Set the primary predicate to the result of Predicate OP SecondPredicate
3290 SetPredicate(instr.psetp.pred3,
3291 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3292
3293 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3294 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
3295 // if enabled
3296 SetPredicate(instr.psetp.pred0,
3297 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3298 }
3299 break;
3300 }
3301 case OpCode::Id::CSETP: {
3302 const std::string pred =
3303 GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
3304 const std::string combiner = GetPredicateCombiner(instr.csetp.op);
3305 const std::string condition_code = regs.GetConditionCode(instr.csetp.cc);
3306 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
3307 SetPredicate(instr.csetp.pred3,
3308 '(' + condition_code + ") " + combiner + " (" + pred + ')');
3309 }
3310 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3311 SetPredicate(instr.csetp.pred0,
3312 "!(" + condition_code + ") " + combiner + " (" + pred + ')');
3313 }
3314 break;
3315 }
3316 default: {
3317 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
3318 }
3319 }
3320 break;
3321 }
3322 case OpCode::Type::RegisterSetPredicate: {
3323 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
3324 1190
3325 const std::string apply_mask = [&]() { 1191 std::string Logical2HGreaterThan(Operation operation) {
3326 switch (opcode->get().GetId()) { 1192 return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat,
3327 case OpCode::Id::R2P_IMM: 1193 Type::HalfFloat);
3328 return std::to_string(instr.r2p.immediate_mask); 1194 }
3329 default:
3330 UNREACHABLE();
3331 return std::to_string(instr.r2p.immediate_mask);
3332 }
3333 }();
3334 const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
3335 " >> " + std::to_string(instr.r2p.byte) + ')';
3336 1195
3337 constexpr u64 programmable_preds = 7; 1196 std::string Logical2HNotEqual(Operation operation) {
3338 for (u64 pred = 0; pred < programmable_preds; ++pred) { 1197 return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat,
3339 const auto shift = std::to_string(1 << pred); 1198 Type::HalfFloat);
1199 }
3340 1200
3341 shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {"); 1201 std::string Logical2HGreaterEqual(Operation operation) {
3342 ++shader.scope; 1202 return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat,
1203 Type::HalfFloat);
1204 }
3343 1205
3344 SetPredicate(pred, '(' + mask + " & " + shift + ") != 0"); 1206 std::string Texture(Operation operation) {
1207 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1208 ASSERT(meta);
3345 1209
3346 --shader.scope; 1210 std::string expr = GenerateTexture(
3347 shader.AddLine('}'); 1211 operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
3348 } 1212 if (meta->sampler.IsShadow()) {
3349 break; 1213 expr = "vec4(" + expr + ')';
3350 } 1214 }
3351 case OpCode::Type::FloatSet: { 1215 return expr + GetSwizzle(meta->element);
3352 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), 1216 }
3353 instr.fset.abs_a != 0, instr.fset.neg_a != 0);
3354
3355 std::string op_b;
3356
3357 if (instr.is_b_imm) {
3358 const std::string imm = GetImmediate19(instr);
3359 op_b = imm;
3360 } else {
3361 if (instr.is_b_gpr) {
3362 op_b = regs.GetRegisterAsFloat(instr.gpr20);
3363 } else {
3364 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3365 GLSLRegister::Type::Float);
3366 }
3367 }
3368
3369 op_b = GetOperandAbsNeg(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
3370
3371 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
3372 // condition is true, and to 0 otherwise.
3373 const std::string second_pred =
3374 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
3375
3376 const std::string combiner = GetPredicateCombiner(instr.fset.op);
3377 1217
3378 const std::string predicate = "((" + 1218 std::string TextureLod(Operation operation) {
3379 GetPredicateComparison(instr.fset.cond, op_a, op_b) + 1219 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
3380 ") " + combiner + " (" + second_pred + "))"; 1220 ASSERT(meta);
3381 1221
3382 if (instr.fset.bf) { 1222 std::string expr = GenerateTexture(
3383 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false, 1223 operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
3384 instr.generates_cc); 1224 if (meta->sampler.IsShadow()) {
3385 } else { 1225 expr = "vec4(" + expr + ')';
3386 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
3387 1, false, instr.generates_cc);
3388 }
3389 break;
3390 } 1226 }
3391 case OpCode::Type::IntegerSet: { 1227 return expr + GetSwizzle(meta->element);
3392 const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed); 1228 }
3393 1229
3394 std::string op_b; 1230 std::string TextureGather(Operation operation) {
1231 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1232 ASSERT(meta);
3395 1233
3396 if (instr.is_b_imm) { 1234 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
3397 op_b = std::to_string(instr.alu.GetSignedImm20_20()); 1235 return GenerateTexture(operation, "Gather",
3398 } else { 1236 {TextureArgument{type, meta->component}, TextureAoffi{}}) +
3399 if (instr.is_b_gpr) { 1237 GetSwizzle(meta->element);
3400 op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed); 1238 }
3401 } else {
3402 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3403 GLSLRegister::Type::Integer);
3404 }
3405 }
3406
3407 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
3408 // condition is true, and to 0 otherwise.
3409 const std::string second_pred =
3410 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
3411 1239
3412 const std::string combiner = GetPredicateCombiner(instr.iset.op); 1240 std::string TextureQueryDimensions(Operation operation) {
1241 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1242 ASSERT(meta);
3413 1243
3414 const std::string predicate = "((" + 1244 const std::string sampler = GetSampler(meta->sampler);
3415 GetPredicateComparison(instr.iset.cond, op_a, op_b) + 1245 const std::string lod = VisitOperand(operation, 0, Type::Int);
3416 ") " + combiner + " (" + second_pred + "))";
3417 1246
3418 if (instr.iset.bf) { 1247 switch (meta->element) {
3419 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); 1248 case 0:
3420 } else { 1249 case 1:
3421 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, 1250 return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
3422 1); 1251 GetSwizzle(meta->element) + "))";
3423 } 1252 case 2:
3424 break; 1253 return "0";
1254 case 3:
1255 return "itof(textureQueryLevels(" + sampler + "))";
3425 } 1256 }
3426 case OpCode::Type::HalfSet: { 1257 UNREACHABLE();
3427 UNIMPLEMENTED_IF(instr.hset2.ftz != 0); 1258 return "0";
3428 1259 }
3429 const std::string op_a =
3430 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a,
3431 instr.hset2.abs_a != 0, instr.hset2.negate_a != 0);
3432
3433 const std::string op_b = [&]() {
3434 switch (opcode->get().GetId()) {
3435 case OpCode::Id::HSET2_R:
3436 return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
3437 instr.hset2.type_b, instr.hset2.abs_b != 0,
3438 instr.hset2.negate_b != 0);
3439 default:
3440 UNREACHABLE();
3441 return std::string("vec2(0)");
3442 }
3443 }();
3444
3445 const std::string second_pred =
3446 GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0);
3447
3448 const std::string combiner = GetPredicateCombiner(instr.hset2.op);
3449
3450 // HSET2 operates on each half float in the pack.
3451 std::string result;
3452 for (int i = 0; i < 2; ++i) {
3453 const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000";
3454 const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000";
3455 const std::string value = instr.hset2.bf == 1 ? float_value : integer_value;
3456 1260
3457 const std::string comp = std::string(".") + "xy"[i]; 1261 std::string TextureQueryLod(Operation operation) {
3458 const std::string predicate = 1262 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
3459 "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) + 1263 ASSERT(meta);
3460 ") " + combiner + " (" + second_pred + "))";
3461 1264
3462 result += '(' + predicate + " ? " + value + " : 0)"; 1265 if (meta->element < 2) {
3463 if (i == 0) { 1266 return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
3464 result += " | "; 1267 GetSwizzle(meta->element) + "))";
3465 }
3466 }
3467 regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1);
3468 break;
3469 } 1268 }
3470 case OpCode::Type::Xmad: { 1269 return "0";
3471 UNIMPLEMENTED_IF(instr.xmad.sign_a); 1270 }
3472 UNIMPLEMENTED_IF(instr.xmad.sign_b);
3473 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3474 "Condition codes generation in XMAD is partially implemented");
3475
3476 std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
3477 std::string op_b;
3478 std::string op_c;
3479
3480 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
3481 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
3482 const bool is_signed{instr.xmad.sign_a == 1};
3483
3484 bool is_merge{};
3485 switch (opcode->get().GetId()) {
3486 case OpCode::Id::XMAD_CR: {
3487 is_merge = instr.xmad.merge_56;
3488 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3489 instr.xmad.sign_b ? GLSLRegister::Type::Integer
3490 : GLSLRegister::Type::UnsignedInteger);
3491 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
3492 break;
3493 }
3494 case OpCode::Id::XMAD_RR: {
3495 is_merge = instr.xmad.merge_37;
3496 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
3497 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
3498 break;
3499 }
3500 case OpCode::Id::XMAD_RC: {
3501 op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
3502 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3503 is_signed ? GLSLRegister::Type::Integer
3504 : GLSLRegister::Type::UnsignedInteger);
3505 break;
3506 }
3507 case OpCode::Id::XMAD_IMM: {
3508 is_merge = instr.xmad.merge_37;
3509 op_b += std::to_string(instr.xmad.imm20_16);
3510 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
3511 break;
3512 }
3513 default: {
3514 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
3515 }
3516 }
3517
3518 // TODO(bunnei): Ensure this is right with signed operands
3519 if (instr.xmad.high_a) {
3520 op_a = "((" + op_a + ") >> 16)";
3521 } else {
3522 op_a = "((" + op_a + ") & 0xFFFF)";
3523 }
3524
3525 std::string src2 = '(' + op_b + ')'; // Preserve original source 2
3526 if (instr.xmad.high_b) {
3527 op_b = '(' + src2 + " >> 16)";
3528 } else {
3529 op_b = '(' + src2 + " & 0xFFFF)";
3530 }
3531
3532 std::string product = '(' + op_a + " * " + op_b + ')';
3533 if (instr.xmad.product_shift_left) {
3534 product = '(' + product + " << 16)";
3535 }
3536 1271
3537 switch (instr.xmad.mode) { 1272 std::string TexelFetch(Operation operation) {
3538 case Tegra::Shader::XmadMode::None: 1273 constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
3539 break; 1274 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
3540 case Tegra::Shader::XmadMode::CLo: 1275 ASSERT(meta);
3541 op_c = "((" + op_c + ") & 0xFFFF)"; 1276 UNIMPLEMENTED_IF(meta->sampler.IsArray());
3542 break; 1277 const std::size_t count = operation.GetOperandsCount();
3543 case Tegra::Shader::XmadMode::CHi:
3544 op_c = "((" + op_c + ") >> 16)";
3545 break;
3546 case Tegra::Shader::XmadMode::CBcc:
3547 op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
3548 break;
3549 default: {
3550 UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}",
3551 static_cast<u32>(instr.xmad.mode.Value()));
3552 }
3553 }
3554 1278
3555 std::string sum{'(' + product + " + " + op_c + ')'}; 1279 std::string expr = "texelFetch(";
3556 if (is_merge) { 1280 expr += GetSampler(meta->sampler);
3557 sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; 1281 expr += ", ";
3558 }
3559 1282
3560 regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false, 1283 expr += constructors.at(operation.GetOperandsCount() - 1);
3561 instr.generates_cc); 1284 expr += '(';
3562 break; 1285 for (std::size_t i = 0; i < count; ++i) {
1286 expr += VisitOperand(operation, i, Type::Int);
1287 const std::size_t next = i + 1;
1288 if (next == count)
1289 expr += ')';
1290 else if (next < count)
1291 expr += ", ";
3563 } 1292 }
3564 default: { 1293 if (meta->lod) {
3565 switch (opcode->get().GetId()) { 1294 expr += ", ";
3566 case OpCode::Id::EXIT: { 1295 expr += CastOperand(Visit(meta->lod), Type::Int);
3567 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 1296 }
3568 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, 1297 expr += ')';
3569 "EXIT condition code used: {}", static_cast<u32>(cc));
3570
3571 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
3572 EmitFragmentOutputsWrite();
3573 }
3574
3575 switch (instr.flow.cond) {
3576 case Tegra::Shader::FlowCondition::Always:
3577 shader.AddLine("return true;");
3578 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
3579 // If this is an unconditional exit then just end processing here,
3580 // otherwise we have to account for the possibility of the condition
3581 // not being met, so continue processing the next instruction.
3582 offset = PROGRAM_END - 1;
3583 }
3584 break;
3585
3586 case Tegra::Shader::FlowCondition::Fcsm_Tr:
3587 // TODO(bunnei): What is this used for? If we assume this conditon is not
3588 // satisifed, dual vertex shaders in Farming Simulator make more sense
3589 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
3590 break;
3591 1298
3592 default: 1299 return expr + GetSwizzle(meta->element);
3593 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", 1300 }
3594 static_cast<u32>(instr.flow.cond.Value()));
3595 }
3596 break;
3597 }
3598 case OpCode::Id::KIL: {
3599 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
3600 1301
3601 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 1302 std::string Branch(Operation operation) {
3602 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, 1303 const auto target = std::get_if<ImmediateNode>(operation[0]);
3603 "KIL condition code used: {}", static_cast<u32>(cc)); 1304 UNIMPLEMENTED_IF(!target);
3604 1305
3605 // Enclose "discard" in a conditional, so that GLSL compilation does not complain 1306 code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue()));
3606 // about unexecuted instructions that may follow this. 1307 code.AddLine("break;");
3607 shader.AddLine("if (true) {"); 1308 return {};
3608 ++shader.scope; 1309 }
3609 shader.AddLine("discard;");
3610 --shader.scope;
3611 shader.AddLine("}");
3612 1310
3613 break; 1311 std::string PushFlowStack(Operation operation) {
3614 } 1312 const auto target = std::get_if<ImmediateNode>(operation[0]);
3615 case OpCode::Id::OUT_R: { 1313 UNIMPLEMENTED_IF(!target);
3616 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
3617 "Stream buffer is not supported");
3618 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
3619 "OUT is expected to be used in a geometry shader.");
3620
3621 if (instr.out.emit) {
3622 // gpr0 is used to store the next address. Hardware returns a pointer but
3623 // we just return the next index with a cyclic cap.
3624 const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)};
3625 const std::string next = "((" + current + " + 1" + ") % " +
3626 std::to_string(MAX_GEOMETRY_BUFFERS) + ')';
3627 shader.AddLine("emit_vertex(" + current + ");");
3628 regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1);
3629 }
3630 if (instr.out.cut) {
3631 shader.AddLine("EndPrimitive();");
3632 }
3633 1314
3634 break; 1315 code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()));
3635 } 1316 return {};
3636 case OpCode::Id::MOV_SYS: { 1317 }
3637 switch (instr.sys20) {
3638 case Tegra::Shader::SystemVariable::InvocationInfo: {
3639 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
3640 regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
3641 break;
3642 }
3643 case Tegra::Shader::SystemVariable::Ydirection: {
3644 // Config pack's third value is Y_NEGATE's state.
3645 regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1);
3646 break;
3647 }
3648 default: {
3649 UNIMPLEMENTED_MSG("Unhandled system move: {}",
3650 static_cast<u32>(instr.sys20.Value()));
3651 }
3652 }
3653 break;
3654 }
3655 case OpCode::Id::ISBERD: {
3656 UNIMPLEMENTED_IF(instr.isberd.o != 0);
3657 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
3658 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
3659 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
3660 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
3661 "ISBERD is expected to be used in a geometry shader.");
3662 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
3663 regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1);
3664 break;
3665 }
3666 case OpCode::Id::BRA: {
3667 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3668 "BRA with constant buffers are not implemented");
3669
3670 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3671 const u32 target = offset + instr.bra.GetBranchTarget();
3672 if (cc != Tegra::Shader::ConditionCode::T) {
3673 const std::string condition_code = regs.GetConditionCode(cc);
3674 shader.AddLine("if (" + condition_code + "){");
3675 shader.scope++;
3676 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3677 shader.scope--;
3678 shader.AddLine('}');
3679 } else {
3680 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3681 }
3682 break;
3683 }
3684 case OpCode::Id::IPA: {
3685 const auto& attribute = instr.attribute.fmt28;
3686 const auto& reg = instr.gpr0;
3687
3688 Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
3689 instr.ipa.sample_mode.Value()};
3690 regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index,
3691 input_mode);
3692 1318
3693 if (instr.ipa.saturate) { 1319 std::string PopFlowStack(Operation operation) {
3694 regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true); 1320 code.AddLine("jmp_to = flow_stack[--flow_stack_top];");
3695 } 1321 code.AddLine("break;");
3696 break; 1322 return {};
3697 } 1323 }
3698 case OpCode::Id::SSY: {
3699 // The SSY opcode tells the GPU where to re-converge divergent execution paths, it
3700 // sets the target of the jump that the SYNC instruction will make. The SSY opcode
3701 // has a similar structure to the BRA opcode.
3702 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3703 "Constant buffer flow is not supported");
3704
3705 const u32 target = offset + instr.bra.GetBranchTarget();
3706 EmitPushToFlowStack(target);
3707 break;
3708 }
3709 case OpCode::Id::PBK: {
3710 // PBK pushes to a stack the address where BRK will jump to. This shares stack with
3711 // SSY but using SYNC on a PBK address will kill the shader execution. We don't
3712 // emulate this because it's very unlikely a driver will emit such invalid shader.
3713 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3714 "Constant buffer PBK is not supported");
3715
3716 const u32 target = offset + instr.bra.GetBranchTarget();
3717 EmitPushToFlowStack(target);
3718 break;
3719 }
3720 case OpCode::Id::SYNC: {
3721 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3722 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3723 "SYNC condition code used: {}", static_cast<u32>(cc));
3724 1324
3725 // The SYNC opcode jumps to the address previously set by the SSY opcode 1325 std::string Exit(Operation operation) {
3726 EmitPopFromFlowStack(); 1326 if (stage != ShaderStage::Fragment) {
3727 break; 1327 code.AddLine("return;");
1328 return {};
1329 }
1330 const auto& used_registers = ir.GetRegisters();
1331 const auto SafeGetRegister = [&](u32 reg) -> std::string {
1332 // TODO(Rodrigo): Replace with contains once C++20 releases
1333 if (used_registers.find(reg) != used_registers.end()) {
1334 return GetRegister(reg);
3728 } 1335 }
3729 case OpCode::Id::BRK: { 1336 return "0.0f";
3730 // The BRK opcode jumps to the address previously set by the PBK opcode 1337 };
3731 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3732 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3733 "BRK condition code used: {}", static_cast<u32>(cc));
3734 1338
3735 EmitPopFromFlowStack(); 1339 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
3736 break;
3737 }
3738 case OpCode::Id::DEPBAR: {
3739 // TODO(Subv): Find out if we actually have to care about this instruction or if
3740 // the GLSL compiler takes care of that for us.
3741 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
3742 break;
3743 }
3744 case OpCode::Id::VMAD: {
3745 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3746 "Condition codes generation in VMAD is not implemented");
3747
3748 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
3749 const std::string op_a = GetVideoOperandA(instr);
3750 const std::string op_b = GetVideoOperandB(instr);
3751 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
3752
3753 std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
3754
3755 switch (instr.vmad.shr) {
3756 case Tegra::Shader::VmadShr::Shr7:
3757 result = '(' + result + " >> 7)";
3758 break;
3759 case Tegra::Shader::VmadShr::Shr15:
3760 result = '(' + result + " >> 15)";
3761 break;
3762 }
3763 1340
3764 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, 1341 code.AddLine("if (alpha_test[0] != 0) {");
3765 instr.vmad.saturate, instr.vmad.cc); 1342 ++code.scope;
3766 break; 1343 // We start on the register containing the alpha value in the first RT.
1344 u32 current_reg = 3;
1345 for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
1346 // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
1347 // multiple render targets are used.
1348 if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
1349 header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
1350 header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
1351 header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
1352 code.AddLine(
1353 fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)));
1354 current_reg += 4;
3767 } 1355 }
3768 case OpCode::Id::VSETP: { 1356 }
3769 const std::string op_a = GetVideoOperandA(instr); 1357 --code.scope;
3770 const std::string op_b = GetVideoOperandB(instr); 1358 code.AddLine('}');
3771
3772 // We can't use the constant predicate as destination.
3773 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
3774
3775 const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false);
3776
3777 const std::string combiner = GetPredicateCombiner(instr.vsetp.op);
3778
3779 const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b);
3780 // Set the primary predicate to the result of Predicate OP SecondPredicate
3781 SetPredicate(instr.vsetp.pred3,
3782 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3783 1359
3784 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 1360 // Write the color outputs using the data in the shader registers, disabled
3785 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, 1361 // rendertargets/components are skipped in the register assignment.
3786 // if enabled 1362 current_reg = 0;
3787 SetPredicate(instr.vsetp.pred0, 1363 for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
3788 "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); 1364 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1365 for (u32 component = 0; component < 4; ++component) {
1366 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1367 code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
1368 SafeGetRegister(current_reg)));
1369 ++current_reg;
3789 } 1370 }
3790 break;
3791 }
3792 default: {
3793 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
3794 break;
3795 }
3796 } 1371 }
3797
3798 break;
3799 }
3800 } 1372 }
3801 1373
3802 // Close the predicate condition scope. 1374 if (header.ps.omap.depth) {
3803 if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { 1375 // The depth output is always 2 registers after the last color output, and current_reg
3804 --shader.scope; 1376 // already contains one past the last color register.
3805 shader.AddLine('}'); 1377 code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';');
3806 } 1378 }
3807 1379
3808 return offset + 1; 1380 code.AddLine("return;");
1381 return {};
3809 } 1382 }
3810 1383
3811 /** 1384 std::string Discard(Operation operation) {
3812 * Compiles a range of instructions from Tegra to GLSL. 1385 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
3813 * @param begin the offset of the starting instruction. 1386 // about unexecuted instructions that may follow this.
3814 * @param end the offset where the compilation should stop (exclusive). 1387 code.AddLine("if (true) {");
3815 * @return the offset of the next instruction to compile. PROGRAM_END if the program 1388 ++code.scope;
3816 * terminates. 1389 code.AddLine("discard;");
3817 */ 1390 --code.scope;
3818 u32 CompileRange(u32 begin, u32 end) { 1391 code.AddLine("}");
3819 u32 program_counter; 1392 return {};
3820 for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
3821 program_counter = CompileInstr(program_counter);
3822 }
3823 return program_counter;
3824 } 1393 }
3825 1394
3826 void Generate(const std::string& suffix) { 1395 std::string EmitVertex(Operation operation) {
3827 // Add declarations for all subroutines 1396 ASSERT_MSG(stage == ShaderStage::Geometry,
3828 for (const auto& subroutine : subroutines) { 1397 "EmitVertex is expected to be used in a geometry shader.");
3829 shader.AddLine("bool " + subroutine.GetName() + "();");
3830 }
3831 shader.AddNewLine();
3832
3833 // Add the main entry point
3834 shader.AddLine("bool exec_" + suffix + "() {");
3835 ++shader.scope;
3836 CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
3837 --shader.scope;
3838 shader.AddLine("}\n");
3839
3840 // Add definitions for all subroutines
3841 for (const auto& subroutine : subroutines) {
3842 std::set<u32> labels = subroutine.labels;
3843 1398
3844 shader.AddLine("bool " + subroutine.GetName() + "() {"); 1399 // If a geometry shader is attached, it will always flip (it's the last stage before
3845 ++shader.scope; 1400 // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
3846 1401 code.AddLine("position.xy *= viewport_flip.xy;");
3847 if (labels.empty()) { 1402 code.AddLine("gl_Position = position;");
3848 if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { 1403 code.AddLine("position.w = 1.0;");
3849 shader.AddLine("return false;"); 1404 code.AddLine("EmitVertex();");
3850 } 1405 return {};
3851 } else { 1406 }
3852 labels.insert(subroutine.begin); 1407
3853 shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); 1408 std::string EndPrimitive(Operation operation) {
1409 ASSERT_MSG(stage == ShaderStage::Geometry,
1410 "EndPrimitive is expected to be used in a geometry shader.");
1411
1412 code.AddLine("EndPrimitive();");
1413 return {};
1414 }
1415
1416 std::string YNegate(Operation operation) {
1417 // Config pack's third value is Y_NEGATE's state.
1418 return "uintBitsToFloat(config_pack[2])";
1419 }
1420
1421 static constexpr OperationDecompilersArray operation_decompilers = {
1422 &GLSLDecompiler::Assign,
1423
1424 &GLSLDecompiler::Select,
1425
1426 &GLSLDecompiler::Add<Type::Float>,
1427 &GLSLDecompiler::Mul<Type::Float>,
1428 &GLSLDecompiler::Div<Type::Float>,
1429 &GLSLDecompiler::Fma<Type::Float>,
1430 &GLSLDecompiler::Negate<Type::Float>,
1431 &GLSLDecompiler::Absolute<Type::Float>,
1432 &GLSLDecompiler::FClamp,
1433 &GLSLDecompiler::Min<Type::Float>,
1434 &GLSLDecompiler::Max<Type::Float>,
1435 &GLSLDecompiler::FCos,
1436 &GLSLDecompiler::FSin,
1437 &GLSLDecompiler::FExp2,
1438 &GLSLDecompiler::FLog2,
1439 &GLSLDecompiler::FInverseSqrt,
1440 &GLSLDecompiler::FSqrt,
1441 &GLSLDecompiler::FRoundEven,
1442 &GLSLDecompiler::FFloor,
1443 &GLSLDecompiler::FCeil,
1444 &GLSLDecompiler::FTrunc,
1445 &GLSLDecompiler::FCastInteger<Type::Int>,
1446 &GLSLDecompiler::FCastInteger<Type::Uint>,
1447
1448 &GLSLDecompiler::Add<Type::Int>,
1449 &GLSLDecompiler::Mul<Type::Int>,
1450 &GLSLDecompiler::Div<Type::Int>,
1451 &GLSLDecompiler::Negate<Type::Int>,
1452 &GLSLDecompiler::Absolute<Type::Int>,
1453 &GLSLDecompiler::Min<Type::Int>,
1454 &GLSLDecompiler::Max<Type::Int>,
1455
1456 &GLSLDecompiler::ICastFloat,
1457 &GLSLDecompiler::ICastUnsigned,
1458 &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
1459 &GLSLDecompiler::ILogicalShiftRight,
1460 &GLSLDecompiler::IArithmeticShiftRight,
1461 &GLSLDecompiler::BitwiseAnd<Type::Int>,
1462 &GLSLDecompiler::BitwiseOr<Type::Int>,
1463 &GLSLDecompiler::BitwiseXor<Type::Int>,
1464 &GLSLDecompiler::BitwiseNot<Type::Int>,
1465 &GLSLDecompiler::BitfieldInsert<Type::Int>,
1466 &GLSLDecompiler::BitfieldExtract<Type::Int>,
1467 &GLSLDecompiler::BitCount<Type::Int>,
1468
1469 &GLSLDecompiler::Add<Type::Uint>,
1470 &GLSLDecompiler::Mul<Type::Uint>,
1471 &GLSLDecompiler::Div<Type::Uint>,
1472 &GLSLDecompiler::Min<Type::Uint>,
1473 &GLSLDecompiler::Max<Type::Uint>,
1474 &GLSLDecompiler::UCastFloat,
1475 &GLSLDecompiler::UCastSigned,
1476 &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
1477 &GLSLDecompiler::UShiftRight,
1478 &GLSLDecompiler::UShiftRight,
1479 &GLSLDecompiler::BitwiseAnd<Type::Uint>,
1480 &GLSLDecompiler::BitwiseOr<Type::Uint>,
1481 &GLSLDecompiler::BitwiseXor<Type::Uint>,
1482 &GLSLDecompiler::BitwiseNot<Type::Uint>,
1483 &GLSLDecompiler::BitfieldInsert<Type::Uint>,
1484 &GLSLDecompiler::BitfieldExtract<Type::Uint>,
1485 &GLSLDecompiler::BitCount<Type::Uint>,
1486
1487 &GLSLDecompiler::Add<Type::HalfFloat>,
1488 &GLSLDecompiler::Mul<Type::HalfFloat>,
1489 &GLSLDecompiler::Fma<Type::HalfFloat>,
1490 &GLSLDecompiler::Absolute<Type::HalfFloat>,
1491 &GLSLDecompiler::HNegate,
1492 &GLSLDecompiler::HMergeF32,
1493 &GLSLDecompiler::HMergeH0,
1494 &GLSLDecompiler::HMergeH1,
1495 &GLSLDecompiler::HPack2,
1496
1497 &GLSLDecompiler::LogicalAssign,
1498 &GLSLDecompiler::LogicalAnd,
1499 &GLSLDecompiler::LogicalOr,
1500 &GLSLDecompiler::LogicalXor,
1501 &GLSLDecompiler::LogicalNegate,
1502 &GLSLDecompiler::LogicalPick2,
1503 &GLSLDecompiler::LogicalAll2,
1504 &GLSLDecompiler::LogicalAny2,
1505
1506 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1507 &GLSLDecompiler::LogicalEqual<Type::Float>,
1508 &GLSLDecompiler::LogicalLessEqual<Type::Float>,
1509 &GLSLDecompiler::LogicalGreaterThan<Type::Float>,
1510 &GLSLDecompiler::LogicalNotEqual<Type::Float>,
1511 &GLSLDecompiler::LogicalGreaterEqual<Type::Float>,
1512 &GLSLDecompiler::LogicalFIsNan,
1513
1514 &GLSLDecompiler::LogicalLessThan<Type::Int>,
1515 &GLSLDecompiler::LogicalEqual<Type::Int>,
1516 &GLSLDecompiler::LogicalLessEqual<Type::Int>,
1517 &GLSLDecompiler::LogicalGreaterThan<Type::Int>,
1518 &GLSLDecompiler::LogicalNotEqual<Type::Int>,
1519 &GLSLDecompiler::LogicalGreaterEqual<Type::Int>,
1520
1521 &GLSLDecompiler::LogicalLessThan<Type::Uint>,
1522 &GLSLDecompiler::LogicalEqual<Type::Uint>,
1523 &GLSLDecompiler::LogicalLessEqual<Type::Uint>,
1524 &GLSLDecompiler::LogicalGreaterThan<Type::Uint>,
1525 &GLSLDecompiler::LogicalNotEqual<Type::Uint>,
1526 &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
1527
1528 &GLSLDecompiler::Logical2HLessThan,
1529 &GLSLDecompiler::Logical2HEqual,
1530 &GLSLDecompiler::Logical2HLessEqual,
1531 &GLSLDecompiler::Logical2HGreaterThan,
1532 &GLSLDecompiler::Logical2HNotEqual,
1533 &GLSLDecompiler::Logical2HGreaterEqual,
1534
1535 &GLSLDecompiler::Texture,
1536 &GLSLDecompiler::TextureLod,
1537 &GLSLDecompiler::TextureGather,
1538 &GLSLDecompiler::TextureQueryDimensions,
1539 &GLSLDecompiler::TextureQueryLod,
1540 &GLSLDecompiler::TexelFetch,
1541
1542 &GLSLDecompiler::Branch,
1543 &GLSLDecompiler::PushFlowStack,
1544 &GLSLDecompiler::PopFlowStack,
1545 &GLSLDecompiler::Exit,
1546 &GLSLDecompiler::Discard,
1547
1548 &GLSLDecompiler::EmitVertex,
1549 &GLSLDecompiler::EndPrimitive,
1550
1551 &GLSLDecompiler::YNegate,
1552 };
3854 1553
3855 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 1554 std::string GetRegister(u32 index) const {
3856 // unlikely that shaders will use 20 nested SSYs and PBKs. 1555 return GetDeclarationWithSuffix(index, "gpr");
3857 constexpr u32 FLOW_STACK_SIZE = 20; 1556 }
3858 shader.AddLine("uint flow_stack[" + std::to_string(FLOW_STACK_SIZE) + "];");
3859 shader.AddLine("uint flow_stack_top = 0u;");
3860 1557
3861 shader.AddLine("while (true) {"); 1558 std::string GetPredicate(Tegra::Shader::Pred pred) const {
3862 ++shader.scope; 1559 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
1560 }
3863 1561
3864 shader.AddLine("switch (jmp_to) {"); 1562 std::string GetInputAttribute(Attribute::Index attribute) const {
1563 const auto index{static_cast<u32>(attribute) -
1564 static_cast<u32>(Attribute::Index::Attribute_0)};
1565 return GetDeclarationWithSuffix(index, "input_attr");
1566 }
3865 1567
3866 for (auto label : labels) { 1568 std::string GetOutputAttribute(Attribute::Index attribute) const {
3867 shader.AddLine("case " + std::to_string(label) + "u: {"); 1569 const auto index{static_cast<u32>(attribute) -
3868 ++shader.scope; 1570 static_cast<u32>(Attribute::Index::Attribute_0)};
1571 return GetDeclarationWithSuffix(index, "output_attr");
1572 }
3869 1573
3870 const auto next_it = labels.lower_bound(label + 1); 1574 std::string GetConstBuffer(u32 index) const {
3871 const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; 1575 return GetDeclarationWithSuffix(index, "cbuf");
1576 }
3872 1577
3873 const u32 compile_end = CompileRange(label, next_label); 1578 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
3874 if (compile_end > next_label && compile_end != PROGRAM_END) { 1579 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
3875 // This happens only when there is a label inside a IF/LOOP block 1580 }
3876 shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
3877 labels.emplace(compile_end);
3878 }
3879 1581
3880 --shader.scope; 1582 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
3881 shader.AddLine('}'); 1583 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
3882 } 1584 suffix);
1585 }
3883 1586
3884 shader.AddLine("default: return false;"); 1587 std::string GetConstBufferBlock(u32 index) const {
3885 shader.AddLine('}'); 1588 return GetDeclarationWithSuffix(index, "cbuf_block");
1589 }
3886 1590
3887 --shader.scope; 1591 std::string GetLocalMemory() const {
3888 shader.AddLine('}'); 1592 return "lmem_" + suffix;
1593 }
3889 1594
3890 shader.AddLine("return false;"); 1595 std::string GetInternalFlag(InternalFlag flag) const {
3891 } 1596 constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag",
1597 "carry_flag", "overflow_flag"};
1598 const auto index = static_cast<u32>(flag);
1599 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
3892 1600
3893 --shader.scope; 1601 return std::string(InternalFlagNames[index]) + '_' + suffix;
3894 shader.AddLine("}\n"); 1602 }
3895 1603
3896 DEBUG_ASSERT(shader.scope == 0); 1604 std::string GetSampler(const Sampler& sampler) const {
3897 } 1605 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
1606 }
3898 1607
3899 GenerateDeclarations(); 1608 std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
1609 return name + '_' + std::to_string(index) + '_' + suffix;
3900 } 1610 }
3901 1611
3902 /// Add declarations for registers 1612 const ShaderIR& ir;
3903 void GenerateDeclarations() { 1613 const ShaderStage stage;
3904 regs.GenerateDeclarations(suffix); 1614 const std::string suffix;
1615 const Header header;
3905 1616
3906 for (const auto& pred : declr_predicates) { 1617 ShaderWriter code;
3907 declarations.AddLine("bool " + pred + " = false;"); 1618};
3908 }
3909 declarations.AddNewLine();
3910 }
3911 1619
3912private: 1620} // Anonymous namespace
3913 const std::set<Subroutine>& subroutines;
3914 const ProgramCode& program_code;
3915 Tegra::Shader::Header header;
3916 const u32 main_offset;
3917 Maxwell3D::Regs::ShaderStage stage;
3918 const std::string& suffix;
3919 u64 local_memory_size;
3920 std::size_t shader_length;
3921
3922 ShaderWriter shader;
3923 ShaderWriter declarations;
3924 GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
3925
3926 // Declarations
3927 std::set<std::string> declr_predicates;
3928}; // namespace OpenGL::GLShader::Decompiler
3929 1621
3930std::string GetCommonDeclarations() { 1622std::string GetCommonDeclarations() {
3931 return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n", 1623 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
3932 RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4)); 1624 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
1625 return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
1626 "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
1627 "#define ftoi floatBitsToInt\n"
1628 "#define ftou floatBitsToUint\n"
1629 "#define itof intBitsToFloat\n"
1630 "#define utof uintBitsToFloat\n\n"
1631 "float fromHalf2(vec2 pair) {\n"
1632 " return utof(packHalf2x16(pair));\n"
1633 "}\n\n"
1634 "vec2 toHalf2(float value) {\n"
1635 " return unpackHalf2x16(ftou(value));\n"
1636 "}\n";
3933} 1637}
3934 1638
3935std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, 1639ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) {
3936 Maxwell3D::Regs::ShaderStage stage, 1640 GLSLDecompiler decompiler(ir, stage, suffix);
3937 const std::string& suffix) { 1641 decompiler.Decompile();
3938 try { 1642 return {decompiler.GetResult(), decompiler.GetShaderEntries()};
3939 ControlFlowAnalyzer analyzer(program_code, main_offset, suffix);
3940 const auto subroutines = analyzer.GetSubroutines();
3941 GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix,
3942 analyzer.GetShaderLength());
3943 return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
3944 } catch (const DecompileFail& exception) {
3945 LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
3946 }
3947 return {};
3948} 1643}
3949 1644
3950} // namespace OpenGL::GLShader::Decompiler 1645} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index d01a4a7ee..4e04ab2f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,21 +5,67 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <functional>
9#include <optional>
10#include <string> 8#include <string>
9#include <utility>
10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_shader_gen.h" 13#include "video_core/shader/shader_ir.h"
14 14
15namespace OpenGL::GLShader::Decompiler { 15namespace VideoCommon::Shader {
16class ShaderIR;
17}
16 18
17using Tegra::Engines::Maxwell3D; 19namespace OpenGL::GLShader {
20
21struct ShaderEntries;
22
23using Maxwell = Tegra::Engines::Maxwell3D::Regs;
24using ProgramResult = std::pair<std::string, ShaderEntries>;
25using SamplerEntry = VideoCommon::Shader::Sampler;
26
27class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
28public:
29 explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
30 : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
31
32 u32 GetIndex() const {
33 return index;
34 }
35
36private:
37 u32 index{};
38};
39
40class GlobalMemoryEntry {
41public:
42 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
43 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
44
45 u32 GetCbufIndex() const {
46 return cbuf_index;
47 }
48
49 u32 GetCbufOffset() const {
50 return cbuf_offset;
51 }
52
53private:
54 u32 cbuf_index{};
55 u32 cbuf_offset{};
56};
57
58struct ShaderEntries {
59 std::vector<ConstBufferEntry> const_buffers;
60 std::vector<SamplerEntry> samplers;
61 std::vector<GlobalMemoryEntry> global_memory_entries;
62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
63 std::size_t shader_length{};
64};
18 65
19std::string GetCommonDeclarations(); 66std::string GetCommonDeclarations();
20 67
21std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, 68ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
22 Maxwell3D::Regs::ShaderStage stage, 69 const std::string& suffix);
23 const std::string& suffix);
24 70
25} // namespace OpenGL::GLShader::Decompiler 71} // namespace OpenGL::GLShader \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
new file mode 100644
index 000000000..8a43eb157
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -0,0 +1,624 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <fmt/format.h>
7
8#include "common/assert.h"
9#include "common/common_paths.h"
10#include "common/common_types.h"
11#include "common/file_util.h"
12#include "common/logging/log.h"
13#include "common/scm_rev.h"
14#include "common/zstd_compression.h"
15
16#include "core/core.h"
17#include "core/hle/kernel/process.h"
18#include "core/settings.h"
19
20#include "video_core/renderer_opengl/gl_shader_cache.h"
21#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
22
23namespace OpenGL {
24
25using ShaderCacheVersionHash = std::array<u8, 64>;
26
27enum class TransferableEntryKind : u32 {
28 Raw,
29 Usage,
30};
31
32enum class PrecompiledEntryKind : u32 {
33 Decompiled,
34 Dump,
35};
36
37constexpr u32 NativeVersion = 1;
38
39// Making sure sizes doesn't change by accident
40static_assert(sizeof(BaseBindings) == 12);
41static_assert(sizeof(ShaderDiskCacheUsage) == 24);
42
43namespace {
44
45ShaderCacheVersionHash GetShaderCacheVersionHash() {
46 ShaderCacheVersionHash hash{};
47 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
48 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
49 return hash;
50}
51
52} // namespace
53
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type},
58 program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
59 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
60
61ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
62
63ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
64
65bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
66 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
67 file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
68 return false;
69 }
70 u32 program_code_size{};
71 u32 program_code_size_b{};
72 if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
73 file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
74 return false;
75 }
76
77 program_code.resize(program_code_size);
78 program_code_b.resize(program_code_size_b);
79
80 if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
81 return false;
82
83 if (HasProgramA() &&
84 file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
85 return false;
86 }
87 return true;
88}
89
90bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
91 if (file.WriteObject(unique_identifier) != 1 ||
92 file.WriteObject(static_cast<u32>(program_type)) != 1 ||
93 file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
94 return false;
95 }
96
97 if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
98 return false;
99
100 if (HasProgramA() &&
101 file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
102 return false;
103 }
104 return true;
105}
106
107ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
108
109std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
110ShaderDiskCacheOpenGL::LoadTransferable() {
111 // Skip games without title id
112 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
113 if (!Settings::values.use_disk_shader_cache || !has_title_id)
114 return {};
115 tried_to_load = true;
116
117 FileUtil::IOFile file(GetTransferablePath(), "rb");
118 if (!file.IsOpen()) {
119 LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
120 GetTitleID());
121 return {};
122 }
123
124 u32 version{};
125 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
126 LOG_ERROR(Render_OpenGL,
127 "Failed to get transferable cache version for title id={} - skipping",
128 GetTitleID());
129 return {};
130 }
131
132 if (version < NativeVersion) {
133 LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
134 file.Close();
135 InvalidateTransferable();
136 return {};
137 }
138 if (version > NativeVersion) {
139 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
140 "of the emulator - skipping");
141 return {};
142 }
143
144 // Version is valid, load the shaders
145 std::vector<ShaderDiskCacheRaw> raws;
146 std::vector<ShaderDiskCacheUsage> usages;
147 while (file.Tell() < file.GetSize()) {
148 TransferableEntryKind kind{};
149 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
150 LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
151 return {};
152 }
153
154 switch (kind) {
155 case TransferableEntryKind::Raw: {
156 ShaderDiskCacheRaw entry;
157 if (!entry.Load(file)) {
158 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
159 return {};
160 }
161 transferable.insert({entry.GetUniqueIdentifier(), {}});
162 raws.push_back(std::move(entry));
163 break;
164 }
165 case TransferableEntryKind::Usage: {
166 ShaderDiskCacheUsage usage{};
167 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
168 LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
169 return {};
170 }
171 usages.push_back(std::move(usage));
172 break;
173 }
174 default:
175 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
176 static_cast<u32>(kind));
177 return {};
178 }
179 }
180 return {{raws, usages}};
181}
182
183std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
184 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
185ShaderDiskCacheOpenGL::LoadPrecompiled() {
186 if (!IsUsable())
187 return {};
188
189 FileUtil::IOFile file(GetPrecompiledPath(), "rb");
190 if (!file.IsOpen()) {
191 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
192 GetTitleID());
193 return {};
194 }
195
196 const auto result = LoadPrecompiledFile(file);
197 if (!result) {
198 LOG_INFO(Render_OpenGL,
199 "Failed to load precompiled cache for game with title id={} - removing",
200 GetTitleID());
201 file.Close();
202 InvalidatePrecompiled();
203 return {};
204 }
205 return *result;
206}
207
208std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
209 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
210ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
211 ShaderCacheVersionHash file_hash{};
212 if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
213 return {};
214 }
215 if (GetShaderCacheVersionHash() != file_hash) {
216 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
217 return {};
218 }
219
220 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
221 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
222 while (file.Tell() < file.GetSize()) {
223 PrecompiledEntryKind kind{};
224 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
225 return {};
226 }
227
228 switch (kind) {
229 case PrecompiledEntryKind::Decompiled: {
230 u64 unique_identifier{};
231 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
232 return {};
233
234 const auto entry = LoadDecompiledEntry(file);
235 if (!entry)
236 return {};
237 decompiled.insert({unique_identifier, std::move(*entry)});
238 break;
239 }
240 case PrecompiledEntryKind::Dump: {
241 ShaderDiskCacheUsage usage;
242 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
243 return {};
244
245 ShaderDiskCacheDump dump;
246 if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
247 return {};
248
249 u32 binary_length{};
250 u32 compressed_size{};
251 if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
252 file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
253 return {};
254 }
255
256 std::vector<u8> compressed_binary(compressed_size);
257 if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
258 compressed_binary.size()) {
259 return {};
260 }
261
262 dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
263 if (dump.binary.empty()) {
264 return {};
265 }
266
267 dumps.insert({usage, dump});
268 break;
269 }
270 default:
271 return {};
272 }
273 }
274 return {{decompiled, dumps}};
275}
276
277std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
278 FileUtil::IOFile& file) {
279 u32 code_size{};
280 u32 compressed_code_size{};
281 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
282 file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
283 return {};
284 }
285
286 std::vector<u8> compressed_code(compressed_code_size);
287 if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
288 return {};
289 }
290
291 const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
292 if (code.empty()) {
293 return {};
294 }
295 ShaderDiskCacheDecompiled entry;
296 entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
297
298 u32 const_buffers_count{};
299 if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
300 return {};
301 for (u32 i = 0; i < const_buffers_count; ++i) {
302 u32 max_offset{};
303 u32 index{};
304 u8 is_indirect{};
305 if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
306 file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
307 file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
308 return {};
309 }
310 entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
311 }
312
313 u32 samplers_count{};
314 if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
315 return {};
316 for (u32 i = 0; i < samplers_count; ++i) {
317 u64 offset{};
318 u64 index{};
319 u32 type{};
320 u8 is_array{};
321 u8 is_shadow{};
322 if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
323 file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
324 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
325 file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
326 file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
327 return {};
328 }
329 entry.entries.samplers.emplace_back(
330 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
331 static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
332 }
333
334 u32 global_memory_count{};
335 if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
336 return {};
337 for (u32 i = 0; i < global_memory_count; ++i) {
338 u32 cbuf_index{};
339 u32 cbuf_offset{};
340 if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
341 file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
342 return {};
343 }
344 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
345 }
346
347 for (auto& clip_distance : entry.entries.clip_distances) {
348 u8 clip_distance_raw{};
349 if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
350 return {};
351 clip_distance = clip_distance_raw != 0;
352 }
353
354 u64 shader_length{};
355 if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
356 return {};
357 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
358
359 return entry;
360}
361
362bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
363 const std::string& code,
364 const std::vector<u8>& compressed_code,
365 const GLShader::ShaderEntries& entries) {
366 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
367 file.WriteObject(unique_identifier) != 1 ||
368 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
369 file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
370 file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
371 return false;
372 }
373
374 if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
375 return false;
376 for (const auto& cbuf : entries.const_buffers) {
377 if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
378 file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
379 file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
380 return false;
381 }
382 }
383
384 if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
385 return false;
386 for (const auto& sampler : entries.samplers) {
387 if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
388 file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
389 file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
390 file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
391 file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
392 return false;
393 }
394 }
395
396 if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
397 return false;
398 for (const auto& gmem : entries.global_memory_entries) {
399 if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
400 file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
401 return false;
402 }
403 }
404
405 for (const bool clip_distance : entries.clip_distances) {
406 if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
407 return false;
408 }
409
410 return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
411}
412
413void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
414 if (!FileUtil::Delete(GetTransferablePath())) {
415 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
416 GetTransferablePath());
417 }
418 InvalidatePrecompiled();
419}
420
421void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
422 if (!FileUtil::Delete(GetPrecompiledPath())) {
423 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
424 }
425}
426
427void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
428 if (!IsUsable())
429 return;
430
431 const u64 id = entry.GetUniqueIdentifier();
432 if (transferable.find(id) != transferable.end()) {
433 // The shader already exists
434 return;
435 }
436
437 FileUtil::IOFile file = AppendTransferableFile();
438 if (!file.IsOpen())
439 return;
440 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
441 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
442 file.Close();
443 InvalidateTransferable();
444 return;
445 }
446 transferable.insert({id, {}});
447}
448
449void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
450 if (!IsUsable())
451 return;
452
453 const auto it = transferable.find(usage.unique_identifier);
454 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
455
456 auto& usages{it->second};
457 ASSERT(usages.find(usage) == usages.end());
458 usages.insert(usage);
459
460 FileUtil::IOFile file = AppendTransferableFile();
461 if (!file.IsOpen())
462 return;
463
464 if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
465 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
466 file.Close();
467 InvalidateTransferable();
468 return;
469 }
470}
471
472void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
473 const GLShader::ShaderEntries& entries) {
474 if (!IsUsable())
475 return;
476
477 const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
478 reinterpret_cast<const u8*>(code.data()), code.size())};
479 if (compressed_code.empty()) {
480 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
481 unique_identifier);
482 return;
483 }
484
485 FileUtil::IOFile file = AppendPrecompiledFile();
486 if (!file.IsOpen())
487 return;
488
489 if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
490 LOG_ERROR(Render_OpenGL,
491 "Failed to save decompiled entry to the precompiled file - removing");
492 file.Close();
493 InvalidatePrecompiled();
494 }
495}
496
497void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
498 if (!IsUsable())
499 return;
500
501 GLint binary_length{};
502 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
503
504 GLenum binary_format{};
505 std::vector<u8> binary(binary_length);
506 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
507
508 const std::vector<u8> compressed_binary =
509 Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
510
511 if (compressed_binary.empty()) {
512 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
513 usage.unique_identifier);
514 return;
515 }
516
517 FileUtil::IOFile file = AppendPrecompiledFile();
518 if (!file.IsOpen())
519 return;
520
521 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
522 file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
523 file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
524 file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
525 file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
526 compressed_binary.size()) {
527 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
528 usage.unique_identifier);
529 file.Close();
530 InvalidatePrecompiled();
531 return;
532 }
533}
534
535bool ShaderDiskCacheOpenGL::IsUsable() const {
536 return tried_to_load && Settings::values.use_disk_shader_cache;
537}
538
539FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
540 if (!EnsureDirectories())
541 return {};
542
543 const auto transferable_path{GetTransferablePath()};
544 const bool existed = FileUtil::Exists(transferable_path);
545
546 FileUtil::IOFile file(transferable_path, "ab");
547 if (!file.IsOpen()) {
548 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
549 return {};
550 }
551 if (!existed || file.GetSize() == 0) {
552 // If the file didn't exist, write its version
553 if (file.WriteObject(NativeVersion) != 1) {
554 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
555 transferable_path);
556 return {};
557 }
558 }
559 return file;
560}
561
562FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
563 if (!EnsureDirectories())
564 return {};
565
566 const auto precompiled_path{GetPrecompiledPath()};
567 const bool existed = FileUtil::Exists(precompiled_path);
568
569 FileUtil::IOFile file(precompiled_path, "ab");
570 if (!file.IsOpen()) {
571 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
572 return {};
573 }
574
575 if (!existed || file.GetSize() == 0) {
576 const auto hash{GetShaderCacheVersionHash()};
577 if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
578 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
579 precompiled_path);
580 return {};
581 }
582 }
583 return file;
584}
585
586bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
587 const auto CreateDir = [](const std::string& dir) {
588 if (!FileUtil::CreateDir(dir)) {
589 LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
590 return false;
591 }
592 return true;
593 };
594
595 return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
596 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
597 CreateDir(GetPrecompiledDir());
598}
599
600std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
601 return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
602}
603
604std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
605 return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
606}
607
608std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
609 return GetBaseDir() + DIR_SEP "transferable";
610}
611
612std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
613 return GetBaseDir() + DIR_SEP "precompiled";
614}
615
616std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
617 return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
618}
619
620std::string ShaderDiskCacheOpenGL::GetTitleID() const {
621 return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
622}
623
624} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
new file mode 100644
index 000000000..6be0c0547
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -0,0 +1,245 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <string>
9#include <tuple>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include <glad/glad.h>
16
17#include "common/assert.h"
18#include "common/common_types.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_opengl/gl_shader_gen.h"
21
22namespace Core {
23class System;
24}
25
26namespace FileUtil {
27class IOFile;
28}
29
30namespace OpenGL {
31
32using ProgramCode = std::vector<u64>;
33using Maxwell = Tegra::Engines::Maxwell3D::Regs;
34
35/// Allocated bindings used by an OpenGL shader program
36struct BaseBindings {
37 u32 cbuf{};
38 u32 gmem{};
39 u32 sampler{};
40
41 bool operator==(const BaseBindings& rhs) const {
42 return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
43 }
44
45 bool operator!=(const BaseBindings& rhs) const {
46 return !operator==(rhs);
47 }
48};
49
50/// Describes how a shader is used
51struct ShaderDiskCacheUsage {
52 u64 unique_identifier{};
53 BaseBindings bindings;
54 GLenum primitive{};
55
56 bool operator==(const ShaderDiskCacheUsage& rhs) const {
57 return std::tie(unique_identifier, bindings, primitive) ==
58 std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
59 }
60
61 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
62 return !operator==(rhs);
63 }
64};
65
66} // namespace OpenGL
67
68namespace std {
69
70template <>
71struct hash<OpenGL::BaseBindings> {
72 std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
73 return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
74 }
75};
76
77template <>
78struct hash<OpenGL::ShaderDiskCacheUsage> {
79 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
80 return static_cast<std::size_t>(usage.unique_identifier) ^
81 std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
82 }
83};
84
85} // namespace std
86
87namespace OpenGL {
88
89/// Describes a shader how it's used by the guest GPU
90class ShaderDiskCacheRaw {
91public:
92 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
93 u32 program_code_size, u32 program_code_size_b,
94 ProgramCode program_code, ProgramCode program_code_b);
95 ShaderDiskCacheRaw();
96 ~ShaderDiskCacheRaw();
97
98 bool Load(FileUtil::IOFile& file);
99
100 bool Save(FileUtil::IOFile& file) const;
101
102 u64 GetUniqueIdentifier() const {
103 return unique_identifier;
104 }
105
106 bool HasProgramA() const {
107 return program_type == Maxwell::ShaderProgram::VertexA;
108 }
109
110 Maxwell::ShaderProgram GetProgramType() const {
111 return program_type;
112 }
113
114 Maxwell::ShaderStage GetProgramStage() const {
115 switch (program_type) {
116 case Maxwell::ShaderProgram::VertexA:
117 case Maxwell::ShaderProgram::VertexB:
118 return Maxwell::ShaderStage::Vertex;
119 case Maxwell::ShaderProgram::TesselationControl:
120 return Maxwell::ShaderStage::TesselationControl;
121 case Maxwell::ShaderProgram::TesselationEval:
122 return Maxwell::ShaderStage::TesselationEval;
123 case Maxwell::ShaderProgram::Geometry:
124 return Maxwell::ShaderStage::Geometry;
125 case Maxwell::ShaderProgram::Fragment:
126 return Maxwell::ShaderStage::Fragment;
127 }
128 UNREACHABLE();
129 }
130
131 const ProgramCode& GetProgramCode() const {
132 return program_code;
133 }
134
135 const ProgramCode& GetProgramCodeB() const {
136 return program_code_b;
137 }
138
139private:
140 u64 unique_identifier{};
141 Maxwell::ShaderProgram program_type{};
142 u32 program_code_size{};
143 u32 program_code_size_b{};
144
145 ProgramCode program_code;
146 ProgramCode program_code_b;
147};
148
149/// Contains decompiled data from a shader
150struct ShaderDiskCacheDecompiled {
151 std::string code;
152 GLShader::ShaderEntries entries;
153};
154
155/// Contains an OpenGL dumped binary program
156struct ShaderDiskCacheDump {
157 GLenum binary_format;
158 std::vector<u8> binary;
159};
160
161class ShaderDiskCacheOpenGL {
162public:
163 explicit ShaderDiskCacheOpenGL(Core::System& system);
164
165 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
166 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
167 LoadTransferable();
168
169 /// Loads current game's precompiled cache. Invalidates on failure.
170 std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
171 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
172 LoadPrecompiled();
173
174 /// Removes the transferable (and precompiled) cache file.
175 void InvalidateTransferable() const;
176
177 /// Removes the precompiled cache file.
178 void InvalidatePrecompiled() const;
179
180 /// Saves a raw dump to the transferable file. Checks for collisions.
181 void SaveRaw(const ShaderDiskCacheRaw& entry);
182
183 /// Saves shader usage to the transferable file. Does not check for collisions.
184 void SaveUsage(const ShaderDiskCacheUsage& usage);
185
186 /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
187 void SaveDecompiled(u64 unique_identifier, const std::string& code,
188 const GLShader::ShaderEntries& entries);
189
190 /// Saves a dump entry to the precompiled file. Does not check for collisions.
191 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
192
193private:
194 /// Loads the transferable cache. Returns empty on failure.
195 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
196 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
197 LoadPrecompiledFile(FileUtil::IOFile& file);
198
199 /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
200 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
201
202 /// Saves a decompiled entry to the passed file. Returns true on success.
203 bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
204 const std::vector<u8>& compressed_code,
205 const GLShader::ShaderEntries& entries);
206
207 /// Returns if the cache can be used
208 bool IsUsable() const;
209
210 /// Opens current game's transferable file and write it's header if it doesn't exist
211 FileUtil::IOFile AppendTransferableFile() const;
212
213 /// Opens current game's precompiled file and write it's header if it doesn't exist
214 FileUtil::IOFile AppendPrecompiledFile() const;
215
216 /// Create shader disk cache directories. Returns true on success.
217 bool EnsureDirectories() const;
218
219 /// Gets current game's transferable file path
220 std::string GetTransferablePath() const;
221
222 /// Gets current game's precompiled file path
223 std::string GetPrecompiledPath() const;
224
225 /// Get user's transferable directory path
226 std::string GetTransferableDir() const;
227
228 /// Get user's precompiled directory path
229 std::string GetPrecompiledDir() const;
230
231 /// Get user's shader directory path
232 std::string GetBaseDir() const;
233
234 /// Get current game's title id
235 std::string GetTitleID() const;
236
237 // Copre system
238 Core::System& system;
239 // Stored transferable shaders
240 std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
241 // The cache has been loaded at boot
242 bool tried_to_load{};
243};
244
245} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 5d0819dc5..8763d9c71 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,67 +3,60 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <fmt/format.h> 5#include <fmt/format.h>
6#include "common/assert.h"
7#include "video_core/engines/maxwell_3d.h" 6#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_opengl/gl_shader_decompiler.h" 7#include "video_core/renderer_opengl/gl_shader_decompiler.h"
9#include "video_core/renderer_opengl/gl_shader_gen.h" 8#include "video_core/renderer_opengl/gl_shader_gen.h"
9#include "video_core/shader/shader_ir.h"
10 10
11namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
12 12
13using Tegra::Engines::Maxwell3D; 13using Tegra::Engines::Maxwell3D;
14using VideoCommon::Shader::ProgramCode;
15using VideoCommon::Shader::ShaderIR;
14 16
15static constexpr u32 PROGRAM_OFFSET{10}; 17static constexpr u32 PROGRAM_OFFSET{10};
16 18
17ProgramResult GenerateVertexShader(const ShaderSetup& setup) { 19ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
18 std::string out = "#version 430 core\n";
19 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
21
22 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
21 out += "// Shader Unique Id: VS" + id + "\n\n"; 23 out += "// Shader Unique Id: VS" + id + "\n\n";
22 out += Decompiler::GetCommonDeclarations(); 24 out += GetCommonDeclarations();
23 25
24 out += R"( 26 out += R"(
25
26layout (location = 0) out vec4 position; 27layout (location = 0) out vec4 position;
27 28
28layout(std140) uniform vs_config { 29layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
29 vec4 viewport_flip; 30 vec4 viewport_flip;
30 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 31 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
31 uvec4 alpha_test; 32 uvec4 alpha_test;
32}; 33};
33)";
34
35 if (setup.IsDualProgram()) {
36 out += "bool exec_vertex_b();\n";
37 }
38 34
39 ProgramResult program = 35)";
40 Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, 36 ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
41 Maxwell3D::Regs::ShaderStage::Vertex, "vertex") 37 ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
42 .value_or(ProgramResult());
43 38
44 out += program.first; 39 out += program.first;
45 40
46 if (setup.IsDualProgram()) { 41 if (setup.IsDualProgram()) {
42 ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
47 ProgramResult program_b = 43 ProgramResult program_b =
48 Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, 44 Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
49 Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") 45
50 .value_or(ProgramResult());
51 out += program_b.first; 46 out += program_b.first;
52 } 47 }
53 48
54 out += R"( 49 out += R"(
55
56void main() { 50void main() {
57 position = vec4(0.0, 0.0, 0.0, 0.0); 51 position = vec4(0.0, 0.0, 0.0, 0.0);
58 exec_vertex(); 52 execute_vertex();
59)"; 53)";
60 54
61 if (setup.IsDualProgram()) { 55 if (setup.IsDualProgram()) {
62 out += " exec_vertex_b();"; 56 out += " execute_vertex_b();";
63 } 57 }
64 58
65 out += R"( 59 out += R"(
66
67 // Check if the flip stage is VertexB 60 // Check if the flip stage is VertexB
68 // Config pack's second value is flip_stage 61 // Config pack's second value is flip_stage
69 if (config_pack[1] == 1) { 62 if (config_pack[1] == 1) {
@@ -77,73 +70,62 @@ void main() {
77 if (config_pack[1] == 1) { 70 if (config_pack[1] == 1) {
78 position.w = 1.0; 71 position.w = 1.0;
79 } 72 }
80} 73})";
81
82)";
83 74
84 return {out, program.second}; 75 return {out, program.second};
85} 76}
86 77
87ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { 78ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
88 // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
89 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
90 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 79 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
80
81 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
91 out += "// Shader Unique Id: GS" + id + "\n\n"; 82 out += "// Shader Unique Id: GS" + id + "\n\n";
92 out += Decompiler::GetCommonDeclarations(); 83 out += GetCommonDeclarations();
93 out += "bool exec_geometry();\n";
94 84
95 ProgramResult program =
96 Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
97 Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
98 .value_or(ProgramResult());
99 out += R"( 85 out += R"(
100out gl_PerVertex {
101 vec4 gl_Position;
102};
103
104layout (location = 0) in vec4 gs_position[]; 86layout (location = 0) in vec4 gs_position[];
105layout (location = 0) out vec4 position; 87layout (location = 0) out vec4 position;
106 88
107layout (std140) uniform gs_config { 89layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
108 vec4 viewport_flip; 90 vec4 viewport_flip;
109 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 91 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
110 uvec4 alpha_test; 92 uvec4 alpha_test;
111}; 93};
112 94
113void main() {
114 exec_geometry();
115}
116
117)"; 95)";
96 ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
97 ProgramResult program =
98 Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
118 out += program.first; 99 out += program.first;
100
101 out += R"(
102void main() {
103 execute_geometry();
104};)";
105
119 return {out, program.second}; 106 return {out, program.second};
120} 107}
121 108
122ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { 109ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
123 std::string out = "#version 430 core\n";
124 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
125 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 110 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
111
112 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
126 out += "// Shader Unique Id: FS" + id + "\n\n"; 113 out += "// Shader Unique Id: FS" + id + "\n\n";
127 out += Decompiler::GetCommonDeclarations(); 114 out += GetCommonDeclarations();
128 out += "bool exec_fragment();\n";
129 115
130 ProgramResult program =
131 Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
132 Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
133 .value_or(ProgramResult());
134 out += R"( 116 out += R"(
135layout(location = 0) out vec4 FragColor0; 117layout (location = 0) out vec4 FragColor0;
136layout(location = 1) out vec4 FragColor1; 118layout (location = 1) out vec4 FragColor1;
137layout(location = 2) out vec4 FragColor2; 119layout (location = 2) out vec4 FragColor2;
138layout(location = 3) out vec4 FragColor3; 120layout (location = 3) out vec4 FragColor3;
139layout(location = 4) out vec4 FragColor4; 121layout (location = 4) out vec4 FragColor4;
140layout(location = 5) out vec4 FragColor5; 122layout (location = 5) out vec4 FragColor5;
141layout(location = 6) out vec4 FragColor6; 123layout (location = 6) out vec4 FragColor6;
142layout(location = 7) out vec4 FragColor7; 124layout (location = 7) out vec4 FragColor7;
143 125
144layout (location = 0) in vec4 position; 126layout (location = 0) in noperspective vec4 position;
145 127
146layout (std140) uniform fs_config { 128layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
147 vec4 viewport_flip; 129 vec4 viewport_flip;
148 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 130 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
149 uvec4 alpha_test; 131 uvec4 alpha_test;
@@ -173,12 +155,20 @@ bool AlphaFunc(in float value) {
173 } 155 }
174} 156}
175 157
158)";
159 ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
160 ProgramResult program =
161 Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
162
163 out += program.first;
164
165 out += R"(
176void main() { 166void main() {
177 exec_fragment(); 167 execute_fragment();
178} 168}
179 169
180)"; 170)";
181 out += program.first;
182 return {out, program.second}; 171 return {out, program.second};
183} 172}
173
184} // namespace OpenGL::GLShader 174} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fcc20d3b4..fad346b48 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,170 +4,15 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <string>
9#include <vector> 7#include <vector>
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 10#include "video_core/renderer_opengl/gl_shader_decompiler.h"
11#include "video_core/shader/shader_ir.h"
13 12
14namespace OpenGL::GLShader { 13namespace OpenGL::GLShader {
15 14
16constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; 15using VideoCommon::Shader::ProgramCode;
17using ProgramCode = std::vector<u64>;
18
19enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
20
21class ConstBufferEntry {
22 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23
24public:
25 void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
26 is_used = true;
27 this->index = static_cast<unsigned>(index);
28 this->stage = stage;
29 max_offset = std::max(max_offset, static_cast<unsigned>(offset));
30 }
31
32 void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
33 is_used = true;
34 is_indirect = true;
35 this->index = static_cast<unsigned>(index);
36 this->stage = stage;
37 }
38
39 bool IsUsed() const {
40 return is_used;
41 }
42
43 bool IsIndirect() const {
44 return is_indirect;
45 }
46
47 unsigned GetIndex() const {
48 return index;
49 }
50
51 unsigned GetSize() const {
52 return max_offset + 1;
53 }
54
55 std::string GetName() const {
56 return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
57 }
58
59 u32 GetHash() const {
60 return (static_cast<u32>(stage) << 16) | index;
61 }
62
63private:
64 static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
65 "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
66 };
67
68 bool is_used{};
69 bool is_indirect{};
70 unsigned index{};
71 unsigned max_offset{};
72 Maxwell::ShaderStage stage;
73};
74
75class SamplerEntry {
76 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
77
78public:
79 SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
80 Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
81 : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array),
82 is_shadow(is_shadow) {}
83
84 std::size_t GetOffset() const {
85 return offset;
86 }
87
88 std::size_t GetIndex() const {
89 return sampler_index;
90 }
91
92 Maxwell::ShaderStage GetStage() const {
93 return stage;
94 }
95
96 std::string GetName() const {
97 return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
98 std::to_string(sampler_index);
99 }
100
101 std::string GetTypeString() const {
102 using Tegra::Shader::TextureType;
103 std::string glsl_type;
104
105 switch (type) {
106 case TextureType::Texture1D:
107 glsl_type = "sampler1D";
108 break;
109 case TextureType::Texture2D:
110 glsl_type = "sampler2D";
111 break;
112 case TextureType::Texture3D:
113 glsl_type = "sampler3D";
114 break;
115 case TextureType::TextureCube:
116 glsl_type = "samplerCube";
117 break;
118 default:
119 UNIMPLEMENTED();
120 }
121 if (is_array)
122 glsl_type += "Array";
123 if (is_shadow)
124 glsl_type += "Shadow";
125 return glsl_type;
126 }
127
128 Tegra::Shader::TextureType GetType() const {
129 return type;
130 }
131
132 bool IsArray() const {
133 return is_array;
134 }
135
136 bool IsShadow() const {
137 return is_shadow;
138 }
139
140 u32 GetHash() const {
141 return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
142 }
143
144 static std::string GetArrayName(Maxwell::ShaderStage stage) {
145 return TextureSamplerNames[static_cast<std::size_t>(stage)];
146 }
147
148private:
149 static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
150 "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
151 };
152
153 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
154 /// instruction.
155 std::size_t offset;
156 Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
157 std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
158 Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
159 bool is_array; ///< Whether the texture is being sampled as an array texture or not.
160 bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not.
161};
162
163struct ShaderEntries {
164 std::vector<ConstBufferEntry> const_buffer_entries;
165 std::vector<SamplerEntry> texture_samplers;
166 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
167 std::size_t shader_length;
168};
169
170using ProgramResult = std::pair<std::string, ShaderEntries>;
171 16
172struct ShaderSetup { 17struct ShaderSetup {
173 explicit ShaderSetup(ProgramCode program_code) { 18 explicit ShaderSetup(ProgramCode program_code) {
@@ -178,12 +23,10 @@ struct ShaderSetup {
178 ProgramCode code; 23 ProgramCode code;
179 ProgramCode code_b; // Used for dual vertex shaders 24 ProgramCode code_b; // Used for dual vertex shaders
180 u64 unique_identifier; 25 u64 unique_identifier;
181 std::size_t real_size;
182 std::size_t real_size_b;
183 } program; 26 } program;
184 27
185 /// Used in scenarios where we have a dual vertex shaders 28 /// Used in scenarios where we have a dual vertex shaders
186 void SetProgramB(ProgramCode&& program_b) { 29 void SetProgramB(ProgramCode program_b) {
187 program.code_b = std::move(program_b); 30 program.code_b = std::move(program_b);
188 has_program_b = true; 31 has_program_b = true;
189 } 32 }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
6#include "video_core/renderer_opengl/gl_shader_manager.h" 5#include "video_core/renderer_opengl/gl_shader_manager.h"
7 6
8namespace OpenGL::GLShader { 7namespace OpenGL::GLShader {
9 8
10void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { 9using Tegra::Engines::Maxwell3D;
11 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 10
12 const auto& regs = gpu.regs; 11void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
13 const auto& state = gpu.state; 12 const auto& regs = maxwell.regs;
13 const auto& state = maxwell.state;
14 14
15 // TODO(bunnei): Support more than one viewport 15 // TODO(bunnei): Support more than one viewport
16 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; 16 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
18 18
19 u32 func = static_cast<u32>(regs.alpha_test_func); 19 u32 func = static_cast<u32>(regs.alpha_test_func);
20 // Normalize the gl variants of opCompare to be the same as the normal variants 20 // Normalize the gl variants of opCompare to be the same as the normal variants
21 u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never); 21 const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
22 if (func >= op_gl_variant_base) { 22 if (func >= op_gl_variant_base) {
23 func = func - op_gl_variant_base + 1U; 23 func = func - op_gl_variant_base + 1U;
24 } 24 }
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
31 31
32 // Assign in which stage the position has to be flipped 32 // Assign in which stage the position has to be flipped
33 // (the last stage before the fragment shader). 33 // (the last stage before the fragment shader).
34 if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { 34 constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
35 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); 35 if (maxwell.regs.shader_config[geometry_index].enable) {
36 flip_stage = geometry_index;
36 } else { 37 } else {
37 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); 38 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
38 } 39 }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
12 12
13namespace OpenGL::GLShader { 13namespace OpenGL::GLShader {
14 14
15using Tegra::Engines::Maxwell3D;
16
17/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned 15/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
18// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at 16/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
19// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. 17/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
20// Not following that rule will cause problems on some AMD drivers. 18/// Not following that rule will cause problems on some AMD drivers.
21struct MaxwellUniformData { 19struct MaxwellUniformData {
22 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); 20 void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
21
23 alignas(16) GLvec4 viewport_flip; 22 alignas(16) GLvec4 viewport_flip;
24 struct alignas(16) { 23 struct alignas(16) {
25 GLuint instance_id; 24 GLuint instance_id;
@@ -63,7 +62,6 @@ public:
63 UpdatePipeline(); 62 UpdatePipeline();
64 state.draw.shader_program = 0; 63 state.draw.shader_program = 0;
65 state.draw.program_pipeline = pipeline.handle; 64 state.draw.program_pipeline = pipeline.handle;
66 state.geometry_shaders.enabled = (gs != 0);
67 } 65 }
68 66
69private: 67private:
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 285594f50..03b7548c2 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
47 * @returns Handle of the newly created OpenGL program object 47 * @returns Handle of the newly created OpenGL program object
48 */ 48 */
49template <typename... T> 49template <typename... T>
50GLuint LoadProgram(bool separable_program, T... shaders) { 50GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
51 // Link the program 51 // Link the program
52 LOG_DEBUG(Render_OpenGL, "Linking program..."); 52 LOG_DEBUG(Render_OpenGL, "Linking program...");
53 53
@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
58 if (separable_program) { 58 if (separable_program) {
59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); 59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
60 } 60 }
61 if (hint_retrievable) {
62 glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
63 }
61 64
62 glLinkProgram(program_id); 65 glLinkProgram(program_id);
63 66
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index b7ba59350..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,14 +10,62 @@
10 10
11namespace OpenGL { 11namespace OpenGL {
12 12
13using Maxwell = Tegra::Engines::Maxwell3D::Regs;
14
13OpenGLState OpenGLState::cur_state; 15OpenGLState OpenGLState::cur_state;
14bool OpenGLState::s_rgb_used; 16bool OpenGLState::s_rgb_used;
17
18namespace {
19
20template <typename T>
21bool UpdateValue(T& current_value, const T new_value) {
22 const bool changed = current_value != new_value;
23 current_value = new_value;
24 return changed;
25}
26
27template <typename T1, typename T2>
28bool UpdateTie(T1 current_value, const T2 new_value) {
29 const bool changed = current_value != new_value;
30 current_value = new_value;
31 return changed;
32}
33
34void Enable(GLenum cap, bool enable) {
35 if (enable) {
36 glEnable(cap);
37 } else {
38 glDisable(cap);
39 }
40}
41
42void Enable(GLenum cap, GLuint index, bool enable) {
43 if (enable) {
44 glEnablei(cap, index);
45 } else {
46 glDisablei(cap, index);
47 }
48}
49
50void Enable(GLenum cap, bool& current_value, bool new_value) {
51 if (UpdateValue(current_value, new_value))
52 Enable(cap, new_value);
53}
54
55void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
56 if (UpdateValue(current_value, new_value))
57 Enable(cap, index, new_value);
58}
59
60} // namespace
61
15OpenGLState::OpenGLState() { 62OpenGLState::OpenGLState() {
16 // These all match default OpenGL values 63 // These all match default OpenGL values
17 geometry_shaders.enabled = false;
18 framebuffer_srgb.enabled = false; 64 framebuffer_srgb.enabled = false;
65
19 multisample_control.alpha_to_coverage = false; 66 multisample_control.alpha_to_coverage = false;
20 multisample_control.alpha_to_one = false; 67 multisample_control.alpha_to_one = false;
68
21 cull.enabled = false; 69 cull.enabled = false;
22 cull.mode = GL_BACK; 70 cull.mode = GL_BACK;
23 cull.front_face = GL_CCW; 71 cull.front_face = GL_CCW;
@@ -28,14 +76,15 @@ OpenGLState::OpenGLState() {
28 76
29 primitive_restart.enabled = false; 77 primitive_restart.enabled = false;
30 primitive_restart.index = 0; 78 primitive_restart.index = 0;
79
31 for (auto& item : color_mask) { 80 for (auto& item : color_mask) {
32 item.red_enabled = GL_TRUE; 81 item.red_enabled = GL_TRUE;
33 item.green_enabled = GL_TRUE; 82 item.green_enabled = GL_TRUE;
34 item.blue_enabled = GL_TRUE; 83 item.blue_enabled = GL_TRUE;
35 item.alpha_enabled = GL_TRUE; 84 item.alpha_enabled = GL_TRUE;
36 } 85 }
37 stencil.test_enabled = false; 86
38 auto reset_stencil = [](auto& config) { 87 const auto ResetStencil = [](auto& config) {
39 config.test_func = GL_ALWAYS; 88 config.test_func = GL_ALWAYS;
40 config.test_ref = 0; 89 config.test_ref = 0;
41 config.test_mask = 0xFFFFFFFF; 90 config.test_mask = 0xFFFFFFFF;
@@ -44,8 +93,10 @@ OpenGLState::OpenGLState() {
44 config.action_depth_pass = GL_KEEP; 93 config.action_depth_pass = GL_KEEP;
45 config.action_stencil_fail = GL_KEEP; 94 config.action_stencil_fail = GL_KEEP;
46 }; 95 };
47 reset_stencil(stencil.front); 96 stencil.test_enabled = false;
48 reset_stencil(stencil.back); 97 ResetStencil(stencil.front);
98 ResetStencil(stencil.back);
99
49 for (auto& item : viewports) { 100 for (auto& item : viewports) {
50 item.x = 0; 101 item.x = 0;
51 item.y = 0; 102 item.y = 0;
@@ -59,6 +110,7 @@ OpenGLState::OpenGLState() {
59 item.scissor.width = 0; 110 item.scissor.width = 0;
60 item.scissor.height = 0; 111 item.scissor.height = 0;
61 } 112 }
113
62 for (auto& item : blend) { 114 for (auto& item : blend) {
63 item.enabled = true; 115 item.enabled = true;
64 item.rgb_equation = GL_FUNC_ADD; 116 item.rgb_equation = GL_FUNC_ADD;
@@ -68,11 +120,14 @@ OpenGLState::OpenGLState() {
68 item.src_a_func = GL_ONE; 120 item.src_a_func = GL_ONE;
69 item.dst_a_func = GL_ZERO; 121 item.dst_a_func = GL_ZERO;
70 } 122 }
123
71 independant_blend.enabled = false; 124 independant_blend.enabled = false;
125
72 blend_color.red = 0.0f; 126 blend_color.red = 0.0f;
73 blend_color.green = 0.0f; 127 blend_color.green = 0.0f;
74 blend_color.blue = 0.0f; 128 blend_color.blue = 0.0f;
75 blend_color.alpha = 0.0f; 129 blend_color.alpha = 0.0f;
130
76 logic_op.enabled = false; 131 logic_op.enabled = false;
77 logic_op.operation = GL_COPY; 132 logic_op.operation = GL_COPY;
78 133
@@ -89,9 +144,12 @@ OpenGLState::OpenGLState() {
89 clip_distance = {}; 144 clip_distance = {};
90 145
91 point.size = 1; 146 point.size = 1;
147
92 fragment_color_clamp.enabled = false; 148 fragment_color_clamp.enabled = false;
149
93 depth_clamp.far_plane = false; 150 depth_clamp.far_plane = false;
94 depth_clamp.near_plane = false; 151 depth_clamp.near_plane = false;
152
95 polygon_offset.fill_enable = false; 153 polygon_offset.fill_enable = false;
96 polygon_offset.line_enable = false; 154 polygon_offset.line_enable = false;
97 polygon_offset.point_enable = false; 155 polygon_offset.point_enable = false;
@@ -101,279 +159,255 @@ OpenGLState::OpenGLState() {
101} 159}
102 160
103void OpenGLState::ApplyDefaultState() { 161void OpenGLState::ApplyDefaultState() {
162 glEnable(GL_BLEND);
104 glDisable(GL_FRAMEBUFFER_SRGB); 163 glDisable(GL_FRAMEBUFFER_SRGB);
105 glDisable(GL_CULL_FACE); 164 glDisable(GL_CULL_FACE);
106 glDisable(GL_DEPTH_TEST); 165 glDisable(GL_DEPTH_TEST);
107 glDisable(GL_PRIMITIVE_RESTART); 166 glDisable(GL_PRIMITIVE_RESTART);
108 glDisable(GL_STENCIL_TEST); 167 glDisable(GL_STENCIL_TEST);
109 glEnable(GL_BLEND);
110 glDisable(GL_COLOR_LOGIC_OP); 168 glDisable(GL_COLOR_LOGIC_OP);
111 glDisable(GL_SCISSOR_TEST); 169 glDisable(GL_SCISSOR_TEST);
112} 170}
113 171
172void OpenGLState::ApplyFramebufferState() const {
173 if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
174 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
175 }
176 if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
177 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
178 }
179}
180
181void OpenGLState::ApplyVertexArrayState() const {
182 if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
183 glBindVertexArray(draw.vertex_array);
184 }
185}
186
187void OpenGLState::ApplyShaderProgram() const {
188 if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
189 glUseProgram(draw.shader_program);
190 }
191}
192
193void OpenGLState::ApplyProgramPipeline() const {
194 if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
195 glBindProgramPipeline(draw.program_pipeline);
196 }
197}
198
199void OpenGLState::ApplyClipDistances() const {
200 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
201 Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
202 clip_distance[i]);
203 }
204}
205
206void OpenGLState::ApplyPointSize() const {
207 if (UpdateValue(cur_state.point.size, point.size)) {
208 glPointSize(point.size);
209 }
210}
211
212void OpenGLState::ApplyFragmentColorClamp() const {
213 if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
214 glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
215 fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
216 }
217}
218
219void OpenGLState::ApplyMultisample() const {
220 Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
221 multisample_control.alpha_to_coverage);
222 Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
223 multisample_control.alpha_to_one);
224}
225
226void OpenGLState::ApplyDepthClamp() const {
227 if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
228 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
229 return;
230 }
231 cur_state.depth_clamp = depth_clamp;
232
233 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
234 "Unimplemented Depth Clamp Separation!");
235
236 Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
237}
238
114void OpenGLState::ApplySRgb() const { 239void OpenGLState::ApplySRgb() const {
115 // sRGB 240 if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
116 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { 241 return;
117 if (framebuffer_srgb.enabled) { 242 cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
118 // Track if sRGB is used 243 if (framebuffer_srgb.enabled) {
119 s_rgb_used = true; 244 // Track if sRGB is used
120 glEnable(GL_FRAMEBUFFER_SRGB); 245 s_rgb_used = true;
121 } else { 246 glEnable(GL_FRAMEBUFFER_SRGB);
122 glDisable(GL_FRAMEBUFFER_SRGB); 247 } else {
123 } 248 glDisable(GL_FRAMEBUFFER_SRGB);
124 } 249 }
125} 250}
126 251
127void OpenGLState::ApplyCulling() const { 252void OpenGLState::ApplyCulling() const {
128 // Culling 253 Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
129 const bool cull_changed = cull.enabled != cur_state.cull.enabled; 254
130 if (cull_changed) { 255 if (UpdateValue(cur_state.cull.mode, cull.mode)) {
131 if (cull.enabled) { 256 glCullFace(cull.mode);
132 glEnable(GL_CULL_FACE);
133 } else {
134 glDisable(GL_CULL_FACE);
135 }
136 } 257 }
137 if (cull.enabled) {
138 if (cull_changed || cull.mode != cur_state.cull.mode) {
139 glCullFace(cull.mode);
140 }
141 258
142 if (cull_changed || cull.front_face != cur_state.cull.front_face) { 259 if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
143 glFrontFace(cull.front_face); 260 glFrontFace(cull.front_face);
144 }
145 } 261 }
146} 262}
147 263
148void OpenGLState::ApplyColorMask() const { 264void OpenGLState::ApplyColorMask() const {
149 if (independant_blend.enabled) { 265 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
150 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 266 const auto& updated = color_mask[i];
151 const auto& updated = color_mask[i]; 267 auto& current = cur_state.color_mask[i];
152 const auto& current = cur_state.color_mask[i];
153 if (updated.red_enabled != current.red_enabled ||
154 updated.green_enabled != current.green_enabled ||
155 updated.blue_enabled != current.blue_enabled ||
156 updated.alpha_enabled != current.alpha_enabled) {
157 glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
158 updated.blue_enabled, updated.alpha_enabled);
159 }
160 }
161 } else {
162 const auto& updated = color_mask[0];
163 const auto& current = cur_state.color_mask[0];
164 if (updated.red_enabled != current.red_enabled || 268 if (updated.red_enabled != current.red_enabled ||
165 updated.green_enabled != current.green_enabled || 269 updated.green_enabled != current.green_enabled ||
166 updated.blue_enabled != current.blue_enabled || 270 updated.blue_enabled != current.blue_enabled ||
167 updated.alpha_enabled != current.alpha_enabled) { 271 updated.alpha_enabled != current.alpha_enabled) {
168 glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, 272 current = updated;
169 updated.alpha_enabled); 273 glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
274 updated.blue_enabled, updated.alpha_enabled);
170 } 275 }
171 } 276 }
172} 277}
173 278
174void OpenGLState::ApplyDepth() const { 279void OpenGLState::ApplyDepth() const {
175 // Depth test 280 Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
176 const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled; 281
177 if (depth_test_changed) { 282 if (cur_state.depth.test_func != depth.test_func) {
178 if (depth.test_enabled) { 283 cur_state.depth.test_func = depth.test_func;
179 glEnable(GL_DEPTH_TEST);
180 } else {
181 glDisable(GL_DEPTH_TEST);
182 }
183 }
184 if (depth.test_enabled &&
185 (depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
186 glDepthFunc(depth.test_func); 284 glDepthFunc(depth.test_func);
187 } 285 }
188 // Depth mask 286
189 if (depth.write_mask != cur_state.depth.write_mask) { 287 if (cur_state.depth.write_mask != depth.write_mask) {
288 cur_state.depth.write_mask = depth.write_mask;
190 glDepthMask(depth.write_mask); 289 glDepthMask(depth.write_mask);
191 } 290 }
192} 291}
193 292
194void OpenGLState::ApplyPrimitiveRestart() const { 293void OpenGLState::ApplyPrimitiveRestart() const {
195 const bool primitive_restart_changed = 294 Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
196 primitive_restart.enabled != cur_state.primitive_restart.enabled; 295
197 if (primitive_restart_changed) { 296 if (cur_state.primitive_restart.index != primitive_restart.index) {
198 if (primitive_restart.enabled) { 297 cur_state.primitive_restart.index = primitive_restart.index;
199 glEnable(GL_PRIMITIVE_RESTART);
200 } else {
201 glDisable(GL_PRIMITIVE_RESTART);
202 }
203 }
204 if (primitive_restart_changed ||
205 (primitive_restart.enabled &&
206 primitive_restart.index != cur_state.primitive_restart.index)) {
207 glPrimitiveRestartIndex(primitive_restart.index); 298 glPrimitiveRestartIndex(primitive_restart.index);
208 } 299 }
209} 300}
210 301
211void OpenGLState::ApplyStencilTest() const { 302void OpenGLState::ApplyStencilTest() const {
212 const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; 303 Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
213 if (stencil_test_changed) { 304
214 if (stencil.test_enabled) { 305 const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
215 glEnable(GL_STENCIL_TEST); 306 if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
216 } else { 307 current.test_mask != config.test_mask) {
217 glDisable(GL_STENCIL_TEST); 308 current.test_func = config.test_func;
309 current.test_ref = config.test_ref;
310 current.test_mask = config.test_mask;
311 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
218 } 312 }
219 } 313 if (current.action_depth_fail != config.action_depth_fail ||
220 if (stencil.test_enabled) { 314 current.action_depth_pass != config.action_depth_pass ||
221 auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, 315 current.action_stencil_fail != config.action_stencil_fail) {
222 const auto& prev_config) { 316 current.action_depth_fail = config.action_depth_fail;
223 if (stencil_test_changed || config.test_func != prev_config.test_func || 317 current.action_depth_pass = config.action_depth_pass;
224 config.test_ref != prev_config.test_ref || 318 current.action_stencil_fail = config.action_stencil_fail;
225 config.test_mask != prev_config.test_mask) { 319 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
226 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); 320 config.action_depth_pass);
227 } 321 }
228 if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || 322 if (current.write_mask != config.write_mask) {
229 config.action_depth_pass != prev_config.action_depth_pass || 323 current.write_mask = config.write_mask;
230 config.action_stencil_fail != prev_config.action_stencil_fail) { 324 glStencilMaskSeparate(face, config.write_mask);
231 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, 325 }
232 config.action_depth_pass); 326 };
233 } 327 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
234 if (config.write_mask != prev_config.write_mask) { 328 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
235 glStencilMaskSeparate(face, config.write_mask);
236 }
237 };
238 config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
239 config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
240 }
241}
242// Viewport does not affects glClearBuffer so emulate viewport using scissor test
243void OpenGLState::EmulateViewportWithScissor() {
244 auto& current = viewports[0];
245 if (current.scissor.enabled) {
246 const GLint left = std::max(current.x, current.scissor.x);
247 const GLint right =
248 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
249 const GLint bottom = std::max(current.y, current.scissor.y);
250 const GLint top =
251 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
252 current.scissor.x = std::max(left, 0);
253 current.scissor.y = std::max(bottom, 0);
254 current.scissor.width = std::max(right - left, 0);
255 current.scissor.height = std::max(top - bottom, 0);
256 } else {
257 current.scissor.enabled = true;
258 current.scissor.x = current.x;
259 current.scissor.y = current.y;
260 current.scissor.width = current.width;
261 current.scissor.height = current.height;
262 }
263} 329}
264 330
265void OpenGLState::ApplyViewport() const { 331void OpenGLState::ApplyViewport() const {
266 if (geometry_shaders.enabled) { 332 for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
267 for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); 333 const auto& updated = viewports[i];
268 i++) { 334 auto& current = cur_state.viewports[i];
269 const auto& current = cur_state.viewports[i]; 335
270 const auto& updated = viewports[i]; 336 if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
271 if (updated.x != current.x || updated.y != current.y || 337 current.height != updated.height) {
272 updated.width != current.width || updated.height != current.height) { 338 current.x = updated.x;
273 glViewportIndexedf( 339 current.y = updated.y;
274 i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), 340 current.width = updated.width;
275 static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height)); 341 current.height = updated.height;
276 } 342 glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
277 if (updated.depth_range_near != current.depth_range_near || 343 static_cast<GLfloat>(updated.width),
278 updated.depth_range_far != current.depth_range_far) { 344 static_cast<GLfloat>(updated.height));
279 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
280 }
281 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
282 if (scissor_changed) {
283 if (updated.scissor.enabled) {
284 glEnablei(GL_SCISSOR_TEST, i);
285 } else {
286 glDisablei(GL_SCISSOR_TEST, i);
287 }
288 }
289 if (updated.scissor.enabled &&
290 (scissor_changed || updated.scissor.x != current.scissor.x ||
291 updated.scissor.y != current.scissor.y ||
292 updated.scissor.width != current.scissor.width ||
293 updated.scissor.height != current.scissor.height)) {
294 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
295 updated.scissor.height);
296 }
297 }
298 } else {
299 const auto& current = cur_state.viewports[0];
300 const auto& updated = viewports[0];
301 if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
302 updated.height != current.height) {
303 glViewport(updated.x, updated.y, updated.width, updated.height);
304 }
305 if (updated.depth_range_near != current.depth_range_near ||
306 updated.depth_range_far != current.depth_range_far) {
307 glDepthRange(updated.depth_range_near, updated.depth_range_far);
308 } 345 }
309 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 346 if (current.depth_range_near != updated.depth_range_near ||
310 if (scissor_changed) { 347 current.depth_range_far != updated.depth_range_far) {
311 if (updated.scissor.enabled) { 348 current.depth_range_near = updated.depth_range_near;
312 glEnable(GL_SCISSOR_TEST); 349 current.depth_range_far = updated.depth_range_far;
313 } else { 350 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
314 glDisable(GL_SCISSOR_TEST);
315 }
316 } 351 }
317 if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x || 352
318 updated.scissor.y != current.scissor.y || 353 Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
319 updated.scissor.width != current.scissor.width || 354
320 updated.scissor.height != current.scissor.height)) { 355 if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
321 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, 356 current.scissor.width != updated.scissor.width ||
322 updated.scissor.height); 357 current.scissor.height != updated.scissor.height) {
358 current.scissor.x = updated.scissor.x;
359 current.scissor.y = updated.scissor.y;
360 current.scissor.width = updated.scissor.width;
361 current.scissor.height = updated.scissor.height;
362 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
363 updated.scissor.height);
323 } 364 }
324 } 365 }
325} 366}
326 367
327void OpenGLState::ApplyGlobalBlending() const { 368void OpenGLState::ApplyGlobalBlending() const {
328 const Blend& current = cur_state.blend[0];
329 const Blend& updated = blend[0]; 369 const Blend& updated = blend[0];
330 const bool blend_changed = updated.enabled != current.enabled; 370 Blend& current = cur_state.blend[0];
331 if (blend_changed) { 371
332 if (updated.enabled) { 372 Enable(GL_BLEND, current.enabled, updated.enabled);
333 glEnable(GL_BLEND); 373
334 } else { 374 if (current.src_rgb_func != updated.src_rgb_func ||
335 glDisable(GL_BLEND); 375 current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
336 } 376 current.dst_a_func != updated.dst_a_func) {
337 } 377 current.src_rgb_func = updated.src_rgb_func;
338 if (!updated.enabled) { 378 current.dst_rgb_func = updated.dst_rgb_func;
339 return; 379 current.src_a_func = updated.src_a_func;
340 } 380 current.dst_a_func = updated.dst_a_func;
341 if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
342 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
343 updated.dst_a_func != current.dst_a_func) {
344 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, 381 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
345 updated.dst_a_func); 382 updated.dst_a_func);
346 } 383 }
347 384
348 if (blend_changed || updated.rgb_equation != current.rgb_equation || 385 if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
349 updated.a_equation != current.a_equation) { 386 current.rgb_equation = updated.rgb_equation;
387 current.a_equation = updated.a_equation;
350 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); 388 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
351 } 389 }
352} 390}
353 391
354void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { 392void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
355 const Blend& updated = blend[target]; 393 const Blend& updated = blend[target];
356 const Blend& current = cur_state.blend[target]; 394 Blend& current = cur_state.blend[target];
357 const bool blend_changed = updated.enabled != current.enabled || force; 395
358 if (blend_changed) { 396 if (current.enabled != updated.enabled || force) {
359 if (updated.enabled) { 397 current.enabled = updated.enabled;
360 glEnablei(GL_BLEND, static_cast<GLuint>(target)); 398 Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
361 } else {
362 glDisablei(GL_BLEND, static_cast<GLuint>(target));
363 }
364 }
365 if (!updated.enabled) {
366 return;
367 } 399 }
368 if (blend_changed || updated.src_rgb_func != current.src_rgb_func || 400
369 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 401 if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
370 updated.dst_a_func != current.dst_a_func) { 402 current.dst_a_func),
403 std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
404 updated.dst_a_func))) {
371 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, 405 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
372 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); 406 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
373 } 407 }
374 408
375 if (blend_changed || updated.rgb_equation != current.rgb_equation || 409 if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
376 updated.a_equation != current.a_equation) { 410 std::tie(updated.rgb_equation, updated.a_equation))) {
377 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, 411 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
378 updated.a_equation); 412 updated.a_equation);
379 } 413 }
@@ -381,202 +415,109 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
381 415
382void OpenGLState::ApplyBlending() const { 416void OpenGLState::ApplyBlending() const {
383 if (independant_blend.enabled) { 417 if (independant_blend.enabled) {
384 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 418 const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
385 ApplyTargetBlending(i, 419 for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
386 independant_blend.enabled != cur_state.independant_blend.enabled); 420 ApplyTargetBlending(target, force);
387 } 421 }
388 } else { 422 } else {
389 ApplyGlobalBlending(); 423 ApplyGlobalBlending();
390 } 424 }
391 if (blend_color.red != cur_state.blend_color.red || 425 cur_state.independant_blend.enabled = independant_blend.enabled;
392 blend_color.green != cur_state.blend_color.green || 426
393 blend_color.blue != cur_state.blend_color.blue || 427 if (UpdateTie(
394 blend_color.alpha != cur_state.blend_color.alpha) { 428 std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
429 cur_state.blend_color.blue, cur_state.blend_color.alpha),
430 std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
395 glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); 431 glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
396 } 432 }
397} 433}
398 434
399void OpenGLState::ApplyLogicOp() const { 435void OpenGLState::ApplyLogicOp() const {
400 const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; 436 Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
401 if (logic_op_changed) {
402 if (logic_op.enabled) {
403 glEnable(GL_COLOR_LOGIC_OP);
404 } else {
405 glDisable(GL_COLOR_LOGIC_OP);
406 }
407 }
408 437
409 if (logic_op.enabled && 438 if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
410 (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
411 glLogicOp(logic_op.operation); 439 glLogicOp(logic_op.operation);
412 } 440 }
413} 441}
414 442
415void OpenGLState::ApplyPolygonOffset() const { 443void OpenGLState::ApplyPolygonOffset() const {
416 444 Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
417 const bool fill_enable_changed = 445 polygon_offset.fill_enable);
418 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; 446 Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
419 const bool line_enable_changed = 447 polygon_offset.line_enable);
420 polygon_offset.line_enable != cur_state.polygon_offset.line_enable; 448 Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
421 const bool point_enable_changed = 449 polygon_offset.point_enable);
422 polygon_offset.point_enable != cur_state.polygon_offset.point_enable; 450
423 const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; 451 if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
424 const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; 452 cur_state.polygon_offset.clamp),
425 const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; 453 std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
426
427 if (fill_enable_changed) {
428 if (polygon_offset.fill_enable) {
429 glEnable(GL_POLYGON_OFFSET_FILL);
430 } else {
431 glDisable(GL_POLYGON_OFFSET_FILL);
432 }
433 }
434
435 if (line_enable_changed) {
436 if (polygon_offset.line_enable) {
437 glEnable(GL_POLYGON_OFFSET_LINE);
438 } else {
439 glDisable(GL_POLYGON_OFFSET_LINE);
440 }
441 }
442
443 if (point_enable_changed) {
444 if (polygon_offset.point_enable) {
445 glEnable(GL_POLYGON_OFFSET_POINT);
446 } else {
447 glDisable(GL_POLYGON_OFFSET_POINT);
448 }
449 }
450
451 if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
452 (factor_changed || units_changed || clamp_changed)) {
453
454 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { 454 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
455 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); 455 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
456 } else { 456 } else {
457 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
458 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, 457 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
459 "Unimplemented Depth polygon offset clamp."); 458 "Unimplemented Depth polygon offset clamp.");
459 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
460 } 460 }
461 } 461 }
462} 462}
463 463
464void OpenGLState::ApplyTextures() const { 464void OpenGLState::ApplyTextures() const {
465 bool has_delta{};
466 std::size_t first{};
467 std::size_t last{};
468 std::array<GLuint, Maxwell::NumTextureSamplers> textures;
469
465 for (std::size_t i = 0; i < std::size(texture_units); ++i) { 470 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
466 const auto& texture_unit = texture_units[i]; 471 const auto& texture_unit = texture_units[i];
467 const auto& cur_state_texture_unit = cur_state.texture_units[i]; 472 auto& cur_state_texture_unit = cur_state.texture_units[i];
468 473 textures[i] = texture_unit.texture;
469 if (texture_unit.texture != cur_state_texture_unit.texture) { 474 if (cur_state_texture_unit.texture == textures[i])
470 glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum()); 475 continue;
471 glBindTexture(texture_unit.target, texture_unit.texture); 476 cur_state_texture_unit.texture = textures[i];
472 } 477 if (!has_delta) {
473 // Update the texture swizzle 478 first = i;
474 if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r || 479 has_delta = true;
475 texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
476 texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
477 texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
478 std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
479 texture_unit.swizzle.b, texture_unit.swizzle.a};
480 glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
481 } 480 }
481 last = i;
482 }
483 if (has_delta) {
484 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
485 textures.data() + first);
482 } 486 }
483} 487}
484 488
485void OpenGLState::ApplySamplers() const { 489void OpenGLState::ApplySamplers() const {
486 bool has_delta{}; 490 bool has_delta{};
487 std::size_t first{}, last{}; 491 std::size_t first{};
488 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; 492 std::size_t last{};
493 std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
494
489 for (std::size_t i = 0; i < std::size(samplers); ++i) { 495 for (std::size_t i = 0; i < std::size(samplers); ++i) {
496 if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
497 continue;
498 cur_state.texture_units[i].sampler = texture_units[i].sampler;
490 samplers[i] = texture_units[i].sampler; 499 samplers[i] = texture_units[i].sampler;
491 if (samplers[i] != cur_state.texture_units[i].sampler) { 500 if (!has_delta) {
492 if (!has_delta) { 501 first = i;
493 first = i; 502 has_delta = true;
494 has_delta = true;
495 }
496 last = i;
497 } 503 }
504 last = i;
498 } 505 }
499 if (has_delta) { 506 if (has_delta) {
500 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 507 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
501 samplers.data()); 508 samplers.data() + first);
502 }
503}
504
505void OpenGLState::ApplyFramebufferState() const {
506 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
507 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
508 }
509 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
510 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
511 }
512}
513
514void OpenGLState::ApplyVertexArrayState() const {
515 if (draw.vertex_array != cur_state.draw.vertex_array) {
516 glBindVertexArray(draw.vertex_array);
517 }
518}
519
520void OpenGLState::ApplyDepthClamp() const {
521 if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
522 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
523 return;
524 }
525 if (depth_clamp.far_plane != depth_clamp.near_plane) {
526 UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!");
527 }
528 if (depth_clamp.far_plane || depth_clamp.near_plane) {
529 glEnable(GL_DEPTH_CLAMP);
530 } else {
531 glDisable(GL_DEPTH_CLAMP);
532 } 509 }
533} 510}
534 511
535void OpenGLState::Apply() const { 512void OpenGLState::Apply() const {
536 ApplyFramebufferState(); 513 ApplyFramebufferState();
537 ApplyVertexArrayState(); 514 ApplyVertexArrayState();
538 515 ApplyShaderProgram();
539 // Shader program 516 ApplyProgramPipeline();
540 if (draw.shader_program != cur_state.draw.shader_program) { 517 ApplyClipDistances();
541 glUseProgram(draw.shader_program); 518 ApplyPointSize();
542 } 519 ApplyFragmentColorClamp();
543 520 ApplyMultisample();
544 // Program pipeline
545 if (draw.program_pipeline != cur_state.draw.program_pipeline) {
546 glBindProgramPipeline(draw.program_pipeline);
547 }
548 // Clip distance
549 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
550 if (clip_distance[i] != cur_state.clip_distance[i]) {
551 if (clip_distance[i]) {
552 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
553 } else {
554 glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
555 }
556 }
557 }
558 // Point
559 if (point.size != cur_state.point.size) {
560 glPointSize(point.size);
561 }
562 if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
563 glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
564 fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
565 }
566 if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
567 if (multisample_control.alpha_to_coverage) {
568 glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
569 } else {
570 glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
571 }
572 }
573 if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
574 if (multisample_control.alpha_to_one) {
575 glEnable(GL_SAMPLE_ALPHA_TO_ONE);
576 } else {
577 glDisable(GL_SAMPLE_ALPHA_TO_ONE);
578 }
579 }
580 ApplyDepthClamp(); 521 ApplyDepthClamp();
581 ApplyColorMask(); 522 ApplyColorMask();
582 ApplyViewport(); 523 ApplyViewport();
@@ -590,7 +531,28 @@ void OpenGLState::Apply() const {
590 ApplyTextures(); 531 ApplyTextures();
591 ApplySamplers(); 532 ApplySamplers();
592 ApplyPolygonOffset(); 533 ApplyPolygonOffset();
593 cur_state = *this; 534}
535
536void OpenGLState::EmulateViewportWithScissor() {
537 auto& current = viewports[0];
538 if (current.scissor.enabled) {
539 const GLint left = std::max(current.x, current.scissor.x);
540 const GLint right =
541 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
542 const GLint bottom = std::max(current.y, current.scissor.y);
543 const GLint top =
544 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
545 current.scissor.x = std::max(left, 0);
546 current.scissor.y = std::max(bottom, 0);
547 current.scissor.width = std::max(right - left, 0);
548 current.scissor.height = std::max(top - bottom, 0);
549 } else {
550 current.scissor.enabled = true;
551 current.scissor.x = current.x;
552 current.scissor.y = current.y;
553 current.scissor.width = current.width;
554 current.scissor.height = current.height;
555 }
594} 556}
595 557
596OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { 558OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index a5a7c0920..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
54 } depth_clamp; // GL_DEPTH_CLAMP 54 } depth_clamp; // GL_DEPTH_CLAMP
55 55
56 struct { 56 struct {
57 bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
58 } geometry_shaders;
59
60 struct {
61 bool enabled; // GL_CULL_FACE 57 bool enabled; // GL_CULL_FACE
62 GLenum mode; // GL_CULL_FACE_MODE 58 GLenum mode; // GL_CULL_FACE_MODE
63 GLenum front_face; // GL_FRONT_FACE 59 GLenum front_face; // GL_FRONT_FACE
@@ -126,26 +122,14 @@ public:
126 struct TextureUnit { 122 struct TextureUnit {
127 GLuint texture; // GL_TEXTURE_BINDING_2D 123 GLuint texture; // GL_TEXTURE_BINDING_2D
128 GLuint sampler; // GL_SAMPLER_BINDING 124 GLuint sampler; // GL_SAMPLER_BINDING
129 GLenum target;
130 struct {
131 GLint r; // GL_TEXTURE_SWIZZLE_R
132 GLint g; // GL_TEXTURE_SWIZZLE_G
133 GLint b; // GL_TEXTURE_SWIZZLE_B
134 GLint a; // GL_TEXTURE_SWIZZLE_A
135 } swizzle;
136 125
137 void Unbind() { 126 void Unbind() {
138 texture = 0; 127 texture = 0;
139 swizzle.r = GL_RED;
140 swizzle.g = GL_GREEN;
141 swizzle.b = GL_BLUE;
142 swizzle.a = GL_ALPHA;
143 } 128 }
144 129
145 void Reset() { 130 void Reset() {
146 Unbind(); 131 Unbind();
147 sampler = 0; 132 sampler = 0;
148 target = GL_TEXTURE_2D;
149 } 133 }
150 }; 134 };
151 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; 135 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
@@ -196,34 +180,26 @@ public:
196 static OpenGLState GetCurState() { 180 static OpenGLState GetCurState() {
197 return cur_state; 181 return cur_state;
198 } 182 }
183
199 static bool GetsRGBUsed() { 184 static bool GetsRGBUsed() {
200 return s_rgb_used; 185 return s_rgb_used;
201 } 186 }
187
202 static void ClearsRGBUsed() { 188 static void ClearsRGBUsed() {
203 s_rgb_used = false; 189 s_rgb_used = false;
204 } 190 }
191
205 /// Apply this state as the current OpenGL state 192 /// Apply this state as the current OpenGL state
206 void Apply() const; 193 void Apply() const;
207 /// Apply only the state affecting the framebuffer 194
208 void ApplyFramebufferState() const; 195 void ApplyFramebufferState() const;
209 /// Apply only the state affecting the vertex array
210 void ApplyVertexArrayState() const; 196 void ApplyVertexArrayState() const;
211 /// Set the initial OpenGL state 197 void ApplyShaderProgram() const;
212 static void ApplyDefaultState(); 198 void ApplyProgramPipeline() const;
213 /// Resets any references to the given resource 199 void ApplyClipDistances() const;
214 OpenGLState& UnbindTexture(GLuint handle); 200 void ApplyPointSize() const;
215 OpenGLState& ResetSampler(GLuint handle); 201 void ApplyFragmentColorClamp() const;
216 OpenGLState& ResetProgram(GLuint handle); 202 void ApplyMultisample() const;
217 OpenGLState& ResetPipeline(GLuint handle);
218 OpenGLState& ResetVertexArray(GLuint handle);
219 OpenGLState& ResetFramebuffer(GLuint handle);
220 void EmulateViewportWithScissor();
221
222private:
223 static OpenGLState cur_state;
224 // Workaround for sRGB problems caused by
225 // QT not supporting srgb output
226 static bool s_rgb_used;
227 void ApplySRgb() const; 203 void ApplySRgb() const;
228 void ApplyCulling() const; 204 void ApplyCulling() const;
229 void ApplyColorMask() const; 205 void ApplyColorMask() const;
@@ -239,6 +215,26 @@ private:
239 void ApplySamplers() const; 215 void ApplySamplers() const;
240 void ApplyDepthClamp() const; 216 void ApplyDepthClamp() const;
241 void ApplyPolygonOffset() const; 217 void ApplyPolygonOffset() const;
218
219 /// Set the initial OpenGL state
220 static void ApplyDefaultState();
221
222 /// Resets any references to the given resource
223 OpenGLState& UnbindTexture(GLuint handle);
224 OpenGLState& ResetSampler(GLuint handle);
225 OpenGLState& ResetProgram(GLuint handle);
226 OpenGLState& ResetPipeline(GLuint handle);
227 OpenGLState& ResetVertexArray(GLuint handle);
228 OpenGLState& ResetFramebuffer(GLuint handle);
229
230 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
231 void EmulateViewportWithScissor();
232
233private:
234 static OpenGLState cur_state;
235
236 // Workaround for sRGB problems caused by QT not supporting srgb output
237 static bool s_rgb_used;
242}; 238};
243 239
244} // namespace OpenGL 240} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c268c9686..d69cba9c3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <cstring>
9#include <memory> 8#include <memory>
10#include <glad/glad.h> 9#include <glad/glad.h>
11#include "common/assert.h" 10#include "common/assert.h"
@@ -14,6 +13,7 @@
14#include "core/core.h" 13#include "core/core.h"
15#include "core/core_timing.h" 14#include "core/core_timing.h"
16#include "core/frontend/emu_window.h" 15#include "core/frontend/emu_window.h"
16#include "core/frontend/scope_acquire_window_context.h"
17#include "core/memory.h" 17#include "core/memory.h"
18#include "core/perf_stats.h" 18#include "core/perf_stats.h"
19#include "core/settings.h" 19#include "core/settings.h"
@@ -97,29 +97,16 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
97 return matrix; 97 return matrix;
98} 98}
99 99
100ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_) 100RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
101 : emu_window{emu_window_} { 101 : VideoCore::RendererBase{window}, system{system} {}
102 if (Settings::values.use_multi_core) {
103 emu_window.MakeCurrent();
104 }
105}
106ScopeAcquireGLContext::~ScopeAcquireGLContext() {
107 if (Settings::values.use_multi_core) {
108 emu_window.DoneCurrent();
109 }
110}
111
112RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
113 : VideoCore::RendererBase{window} {}
114 102
115RendererOpenGL::~RendererOpenGL() = default; 103RendererOpenGL::~RendererOpenGL() = default;
116 104
117/// Swap buffers (render frame) 105/// Swap buffers (render frame)
118void RendererOpenGL::SwapBuffers( 106void RendererOpenGL::SwapBuffers(
119 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 107 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
120 ScopeAcquireGLContext acquire_context{render_window};
121 108
122 Core::System::GetInstance().GetPerfStats().EndSystemFrame(); 109 system.GetPerfStats().EndSystemFrame();
123 110
124 // Maintain the rasterizer's state as a priority 111 // Maintain the rasterizer's state as a priority
125 OpenGLState prev_state = OpenGLState::GetCurState(); 112 OpenGLState prev_state = OpenGLState::GetCurState();
@@ -149,8 +136,8 @@ void RendererOpenGL::SwapBuffers(
149 136
150 render_window.PollEvents(); 137 render_window.PollEvents();
151 138
152 Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); 139 system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
153 Core::System::GetInstance().GetPerfStats().BeginSystemFrame(); 140 system.GetPerfStats().BeginSystemFrame();
154 141
155 // Restore the rasterizer state 142 // Restore the rasterizer state
156 prev_state.Apply(); 143 prev_state.Apply();
@@ -176,17 +163,14 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
176 // Reset the screen info's display texture to its own permanent texture 163 // Reset the screen info's display texture to its own permanent texture
177 screen_info.display_texture = screen_info.texture.resource.handle; 164 screen_info.display_texture = screen_info.texture.resource.handle;
178 165
179 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, 166 rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
180 Memory::FlushMode::Flush);
181 167
182 VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, 168 constexpr u32 linear_bpp = 4;
183 Memory::GetPointer(framebuffer_addr), 169 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
184 gl_framebuffer_data.data(), true); 170 framebuffer.width, framebuffer.height, bytes_per_pixel,
171 linear_bpp, Memory::GetPointer(framebuffer_addr),
172 gl_framebuffer_data.data());
185 173
186 state.texture_units[0].texture = screen_info.texture.resource.handle;
187 state.Apply();
188
189 glActiveTexture(GL_TEXTURE0);
190 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 174 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
191 175
192 // Update existing texture 176 // Update existing texture
@@ -194,14 +178,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
194 // they differ from the LCD resolution. 178 // they differ from the LCD resolution.
195 // TODO: Applications could theoretically crash yuzu here by specifying too large 179 // TODO: Applications could theoretically crash yuzu here by specifying too large
196 // framebuffer sizes. We should make sure that this cannot happen. 180 // framebuffer sizes. We should make sure that this cannot happen.
197 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, 181 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
198 screen_info.texture.gl_format, screen_info.texture.gl_type, 182 framebuffer.height, screen_info.texture.gl_format,
199 gl_framebuffer_data.data()); 183 screen_info.texture.gl_type, gl_framebuffer_data.data());
200 184
201 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 185 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
202
203 state.texture_units[0].texture = 0;
204 state.Apply();
205 } 186 }
206} 187}
207 188
@@ -211,17 +192,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
211 */ 192 */
212void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 193void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
213 const TextureInfo& texture) { 194 const TextureInfo& texture) {
214 state.texture_units[0].texture = texture.resource.handle; 195 const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
215 state.Apply(); 196 glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
216
217 glActiveTexture(GL_TEXTURE0);
218 u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
219
220 // Update existing texture
221 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
222
223 state.texture_units[0].texture = 0;
224 state.Apply();
225} 197}
226 198
227/** 199/**
@@ -261,55 +233,57 @@ void RendererOpenGL::InitOpenGLObjects() {
261 sizeof(ScreenRectVertex)); 233 sizeof(ScreenRectVertex));
262 234
263 // Allocate textures for the screen 235 // Allocate textures for the screen
264 screen_info.texture.resource.Create(); 236 screen_info.texture.resource.Create(GL_TEXTURE_2D);
265
266 // Allocation of storage is deferred until the first frame, when we
267 // know the framebuffer size.
268 237
269 state.texture_units[0].texture = screen_info.texture.resource.handle; 238 const GLuint texture = screen_info.texture.resource.handle;
270 state.Apply(); 239 glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
271
272 glActiveTexture(GL_TEXTURE0);
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
276 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
277 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
278 240
279 screen_info.display_texture = screen_info.texture.resource.handle; 241 screen_info.display_texture = screen_info.texture.resource.handle;
280 242
281 state.texture_units[0].texture = 0;
282 state.Apply();
283
284 // Clear screen to black 243 // Clear screen to black
285 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
286} 245}
287 246
247void RendererOpenGL::AddTelemetryFields() {
248 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
249 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
250 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
251
252 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
253 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
254 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
255
256 auto& telemetry_session = system.TelemetrySession();
257 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
258 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
260}
261
288void RendererOpenGL::CreateRasterizer() { 262void RendererOpenGL::CreateRasterizer() {
289 if (rasterizer) { 263 if (rasterizer) {
290 return; 264 return;
291 } 265 }
292 // Initialize sRGB Usage 266 // Initialize sRGB Usage
293 OpenGLState::ClearsRGBUsed(); 267 OpenGLState::ClearsRGBUsed();
294 rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info); 268 rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
295} 269}
296 270
297void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 271void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
298 const Tegra::FramebufferConfig& framebuffer) { 272 const Tegra::FramebufferConfig& framebuffer) {
299
300 texture.width = framebuffer.width; 273 texture.width = framebuffer.width;
301 texture.height = framebuffer.height; 274 texture.height = framebuffer.height;
275 texture.pixel_format = framebuffer.pixel_format;
302 276
303 GLint internal_format; 277 GLint internal_format;
304 switch (framebuffer.pixel_format) { 278 switch (framebuffer.pixel_format) {
305 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 279 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
306 internal_format = GL_RGBA; 280 internal_format = GL_RGBA8;
307 texture.gl_format = GL_RGBA; 281 texture.gl_format = GL_RGBA;
308 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 282 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
309 gl_framebuffer_data.resize(texture.width * texture.height * 4); 283 gl_framebuffer_data.resize(texture.width * texture.height * 4);
310 break; 284 break;
311 default: 285 default:
312 internal_format = GL_RGBA; 286 internal_format = GL_RGBA8;
313 texture.gl_format = GL_RGBA; 287 texture.gl_format = GL_RGBA;
314 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 288 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
315 gl_framebuffer_data.resize(texture.width * texture.height * 4); 289 gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -318,15 +292,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
318 UNREACHABLE(); 292 UNREACHABLE();
319 } 293 }
320 294
321 state.texture_units[0].texture = texture.resource.handle; 295 texture.resource.Release();
322 state.Apply(); 296 texture.resource.Create(GL_TEXTURE_2D);
323 297 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
324 glActiveTexture(GL_TEXTURE0);
325 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
326 texture.gl_format, texture.gl_type, nullptr);
327
328 state.texture_units[0].texture = 0;
329 state.Apply();
330} 298}
331 299
332void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, 300void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -368,7 +336,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
368 }}; 336 }};
369 337
370 state.texture_units[0].texture = screen_info.display_texture; 338 state.texture_units[0].texture = screen_info.display_texture;
371 state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
372 // Workaround brigthness problems in SMO by enabling sRGB in the final output 339 // Workaround brigthness problems in SMO by enabling sRGB in the final output
373 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 340 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
374 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 341 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
@@ -429,7 +396,8 @@ void RendererOpenGL::CaptureScreenshot() {
429 GLuint renderbuffer; 396 GLuint renderbuffer;
430 glGenRenderbuffers(1, &renderbuffer); 397 glGenRenderbuffers(1, &renderbuffer);
431 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); 398 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
432 glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); 399 glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
400 layout.height);
433 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); 401 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
434 402
435 DrawScreen(layout); 403 DrawScreen(layout);
@@ -506,24 +474,14 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
506 474
507/// Initialize the renderer 475/// Initialize the renderer
508bool RendererOpenGL::Init() { 476bool RendererOpenGL::Init() {
509 ScopeAcquireGLContext acquire_context{render_window}; 477 Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
510 478
511 if (GLAD_GL_KHR_debug) { 479 if (GLAD_GL_KHR_debug) {
512 glEnable(GL_DEBUG_OUTPUT); 480 glEnable(GL_DEBUG_OUTPUT);
513 glDebugMessageCallback(DebugHandler, nullptr); 481 glDebugMessageCallback(DebugHandler, nullptr);
514 } 482 }
515 483
516 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 484 AddTelemetryFields();
517 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
518 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
519
520 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
521 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
522 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
523
524 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
525 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
526 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
527 485
528 if (!GLAD_GL_VERSION_4_3) { 486 if (!GLAD_GL_VERSION_4_3) {
529 return false; 487 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b85cc262f..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,6 +12,10 @@
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_state.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace Core::Frontend { 19namespace Core::Frontend {
16class EmuWindow; 20class EmuWindow;
17} 21}
@@ -35,23 +39,13 @@ struct TextureInfo {
35/// Structure used for storing information about the display target for the Switch screen 39/// Structure used for storing information about the display target for the Switch screen
36struct ScreenInfo { 40struct ScreenInfo {
37 GLuint display_texture; 41 GLuint display_texture;
38 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; 42 const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
39 TextureInfo texture; 43 TextureInfo texture;
40}; 44};
41 45
42/// Helper class to acquire/release OpenGL context within a given scope
43class ScopeAcquireGLContext : NonCopyable {
44public:
45 explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window);
46 ~ScopeAcquireGLContext();
47
48private:
49 Core::Frontend::EmuWindow& emu_window;
50};
51
52class RendererOpenGL : public VideoCore::RendererBase { 46class RendererOpenGL : public VideoCore::RendererBase {
53public: 47public:
54 explicit RendererOpenGL(Core::Frontend::EmuWindow& window); 48 explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
55 ~RendererOpenGL() override; 49 ~RendererOpenGL() override;
56 50
57 /// Swap buffers (render frame) 51 /// Swap buffers (render frame)
@@ -66,6 +60,7 @@ public:
66 60
67private: 61private:
68 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
69 void CreateRasterizer(); 64 void CreateRasterizer();
70 65
71 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -82,6 +77,8 @@ private:
82 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 77 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
83 const TextureInfo& texture); 78 const TextureInfo& texture);
84 79
80 Core::System& system;
81
85 OpenGLState state; 82 OpenGLState state;
86 83
87 // OpenGL object IDs 84 // OpenGL object IDs
@@ -106,7 +103,7 @@ private:
106 103
107 /// Used for transforming the framebuffer orientation 104 /// Used for transforming the framebuffer orientation
108 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 105 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
109 MathUtil::Rectangle<int> framebuffer_crop_rect; 106 Common::Rectangle<int> framebuffer_crop_rect;
110}; 107};
111 108
112} // namespace OpenGL 109} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index d84634cb3..84a987371 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,39 @@
5#include <string> 5#include <string>
6#include <fmt/format.h> 6#include <fmt/format.h>
7#include <glad/glad.h> 7#include <glad/glad.h>
8#include "common/assert.h"
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/renderer_opengl/utils.h" 10#include "video_core/renderer_opengl/utils.h"
10 11
11namespace OpenGL { 12namespace OpenGL {
12 13
14BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
15
16BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
17
18void BindBuffersRangePushBuffer::Setup(GLuint first_) {
19 first = first_;
20 buffers.clear();
21 offsets.clear();
22 sizes.clear();
23}
24
25void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
26 buffers.push_back(buffer);
27 offsets.push_back(offset);
28 sizes.push_back(size);
29}
30
31void BindBuffersRangePushBuffer::Bind() const {
32 const std::size_t count{buffers.size()};
33 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
34 if (count == 0) {
35 return;
36 }
37 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
38 sizes.data());
39}
40
13void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) { 41void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
14 if (!GLAD_GL_KHR_debug) { 42 if (!GLAD_GL_KHR_debug) {
15 return; // We don't need to throw an error as this is just for debugging 43 return; // We don't need to throw an error as this is just for debugging
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 1fcb6fc11..aef45c9dc 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
5#pragma once 5#pragma once
6 6
7#include <string> 7#include <string>
8#include <vector>
8#include <glad/glad.h> 9#include <glad/glad.h>
9#include "common/common_types.h" 10#include "common/common_types.h"
10 11
11namespace OpenGL { 12namespace OpenGL {
12 13
14class BindBuffersRangePushBuffer {
15public:
16 BindBuffersRangePushBuffer(GLenum target);
17 ~BindBuffersRangePushBuffer();
18
19 void Setup(GLuint first_);
20
21 void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
22
23 void Bind() const;
24
25private:
26 GLenum target;
27 GLuint first;
28 std::vector<GLuint> buffers;
29 std::vector<GLintptr> offsets;
30 std::vector<GLsizeiptr> sizes;
31};
32
13void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = ""); 33void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
14 34
15} // namespace OpenGL \ No newline at end of file 35} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
new file mode 100644
index 000000000..ba25b5bc7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vulkan/vulkan.hpp>
8
9namespace Vulkan {
10
11// vulkan.hpp unique handlers use DispatchLoaderStatic
12template <typename T>
13using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
14
15using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
16using UniqueBuffer = UniqueHandle<vk::Buffer>;
17using UniqueBufferView = UniqueHandle<vk::BufferView>;
18using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
19using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
20using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
21using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
22using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
23using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
24using UniqueDevice = UniqueHandle<vk::Device>;
25using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
26using UniqueEvent = UniqueHandle<vk::Event>;
27using UniqueFence = UniqueHandle<vk::Fence>;
28using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
29using UniqueImage = UniqueHandle<vk::Image>;
30using UniqueImageView = UniqueHandle<vk::ImageView>;
31using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
32using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
33using UniquePipeline = UniqueHandle<vk::Pipeline>;
34using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
35using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
36using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
37using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
38using UniqueSampler = UniqueHandle<vk::Sampler>;
39using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
40using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
41using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
42using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
43using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
44
45} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..02a9f5ecb
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,123 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <optional>
8#include <tuple>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "core/memory.h"
13#include "video_core/memory_manager.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h"
16#include "video_core/renderer_vulkan/vk_scheduler.h"
17#include "video_core/renderer_vulkan/vk_stream_buffer.h"
18
19namespace Vulkan {
20
21CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
22 std::size_t alignment, u8* host_ptr)
23 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
24 alignment{alignment} {}
25
26VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
27 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
28 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
29 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
30 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
31 vk::BufferUsageFlagBits::eIndexBuffer |
32 vk::BufferUsageFlagBits::eUniformBuffer;
33 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
34 vk::AccessFlagBits::eUniformRead;
35 stream_buffer =
36 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
37 vk::PipelineStageFlagBits::eAllCommands);
38 buffer_handle = stream_buffer->GetBuffer();
39}
40
41VKBufferCache::~VKBufferCache() = default;
42
43u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
44 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
45 ASSERT_MSG(cpu_addr, "Invalid GPU address");
46
47 // Cache management is a big overhead, so only cache entries with a given size.
48 // TODO: Figure out which size is the best for given games.
49 cache &= size >= 2048;
50
51 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
52 if (cache) {
53 auto entry = TryGet(host_ptr);
54 if (entry) {
55 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
56 return entry->GetOffset();
57 }
58 Unregister(entry);
59 }
60 }
61
62 AlignBuffer(alignment);
63 const u64 uploaded_offset = buffer_offset;
64
65 if (!host_ptr) {
66 return uploaded_offset;
67 }
68
69 std::memcpy(buffer_ptr, host_ptr, size);
70 buffer_ptr += size;
71 buffer_offset += size;
72
73 if (cache) {
74 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
75 alignment, host_ptr);
76 Register(entry);
77 }
78
79 return uploaded_offset;
80}
81
82u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
83 AlignBuffer(alignment);
84 std::memcpy(buffer_ptr, raw_pointer, size);
85 const u64 uploaded_offset = buffer_offset;
86
87 buffer_ptr += size;
88 buffer_offset += size;
89 return uploaded_offset;
90}
91
92std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
93 AlignBuffer(alignment);
94 u8* const uploaded_ptr = buffer_ptr;
95 const u64 uploaded_offset = buffer_offset;
96
97 buffer_ptr += size;
98 buffer_offset += size;
99 return {uploaded_ptr, uploaded_offset};
100}
101
102void VKBufferCache::Reserve(std::size_t max_size) {
103 bool invalidate;
104 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
105 buffer_offset = buffer_offset_base;
106
107 if (invalidate) {
108 InvalidateAll();
109 }
110}
111
112VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
113 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
114}
115
116void VKBufferCache::AlignBuffer(std::size_t alignment) {
117 // Align the offset, not the mapped pointer
118 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
119 buffer_ptr += offset_aligned - buffer_offset;
120 buffer_offset = offset_aligned;
121}
122
123} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..08b786aad
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,103 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <tuple>
9
10#include "common/common_types.h"
11#include "video_core/gpu.h"
12#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15
16namespace Tegra {
17class MemoryManager;
18}
19
20namespace Vulkan {
21
22class VKDevice;
23class VKFence;
24class VKMemoryManager;
25class VKStreamBuffer;
26
27class CachedBufferEntry final : public RasterizerCacheObject {
28public:
29 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
30 u8* host_ptr);
31
32 VAddr GetCpuAddr() const override {
33 return cpu_addr;
34 }
35
36 std::size_t GetSizeInBytes() const override {
37 return size;
38 }
39
40 std::size_t GetSize() const {
41 return size;
42 }
43
44 u64 GetOffset() const {
45 return offset;
46 }
47
48 std::size_t GetAlignment() const {
49 return alignment;
50 }
51
52 // We do not have to flush this cache as things in it are never modified by us.
53 void Flush() override {}
54
55private:
56 VAddr cpu_addr{};
57 std::size_t size{};
58 u64 offset{};
59 std::size_t alignment{};
60};
61
62class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
63public:
64 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
65 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
66 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
67 ~VKBufferCache();
68
69 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
70 /// allocated.
71 u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
72
73 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
74 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
75
76 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
77 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
78
79 /// Reserves a region of memory to be used in subsequent upload/reserve operations.
80 void Reserve(std::size_t max_size);
81
82 /// Ensures that the set data is sent to the device.
83 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
84
85 /// Returns the buffer cache handle.
86 vk::Buffer GetBuffer() const {
87 return buffer_handle;
88 }
89
90private:
91 void AlignBuffer(std::size_t alignment);
92
93 Tegra::MemoryManager& tegra_memory_manager;
94
95 std::unique_ptr<VKStreamBuffer> stream_buffer;
96 vk::Buffer buffer_handle;
97
98 u8* buffer_ptr = nullptr;
99 u64 buffer_offset = 0;
100 u64 buffer_offset_base = 0;
101};
102
103} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
new file mode 100644
index 000000000..00242ecbe
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,238 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <optional>
7#include <set>
8#include <vector>
9#include "common/assert.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12
13namespace Vulkan {
14
15namespace Alternatives {
16
17constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
18 vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
19constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
20 vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
21
22} // namespace Alternatives
23
24constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
25 switch (format) {
26 case vk::Format::eD24UnormS8Uint:
27 return Alternatives::Depth24UnormS8Uint.data();
28 case vk::Format::eD16UnormS8Uint:
29 return Alternatives::Depth16UnormS8Uint.data();
30 default:
31 return nullptr;
32 }
33}
34
35constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
36 FormatType format_type) {
37 switch (format_type) {
38 case FormatType::Linear:
39 return properties.linearTilingFeatures;
40 case FormatType::Optimal:
41 return properties.optimalTilingFeatures;
42 case FormatType::Buffer:
43 return properties.bufferFeatures;
44 default:
45 return {};
46 }
47}
48
49VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
50 vk::SurfaceKHR surface)
51 : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
52 SetupFamilies(dldi, surface);
53 SetupProperties(dldi);
54}
55
56VKDevice::~VKDevice() = default;
57
58bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
59 const auto queue_cis = GetDeviceQueueCreateInfos();
60 vk::PhysicalDeviceFeatures device_features{};
61
62 const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
63 const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
64 0, nullptr, static_cast<u32>(extensions.size()),
65 extensions.data(), &device_features);
66 vk::Device dummy_logical;
67 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
68 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
69 return false;
70 }
71
72 dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
73 logical = UniqueDevice(
74 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
75
76 graphics_queue = logical->getQueue(graphics_family, 0, dld);
77 present_queue = logical->getQueue(present_family, 0, dld);
78 return true;
79}
80
81vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
82 vk::FormatFeatureFlags wanted_usage,
83 FormatType format_type) const {
84 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
85 return wanted_format;
86 }
87 // The wanted format is not supported by hardware, search for alternatives
88 const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
89 if (alternatives == nullptr) {
90 LOG_CRITICAL(Render_Vulkan,
91 "Format={} with usage={} and type={} has no defined alternatives and host "
92 "hardware does not support it",
93 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
94 static_cast<u32>(format_type));
95 UNREACHABLE();
96 return wanted_format;
97 }
98
99 std::size_t i = 0;
100 for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
101 alternative = alternatives[++i]) {
102 if (!IsFormatSupported(alternative, wanted_usage, format_type))
103 continue;
104 LOG_WARNING(Render_Vulkan,
105 "Emulating format={} with alternative format={} with usage={} and type={}",
106 static_cast<u32>(wanted_format), static_cast<u32>(alternative),
107 static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
108 return alternative;
109 }
110
111 // No alternatives found, panic
112 LOG_CRITICAL(Render_Vulkan,
113 "Format={} with usage={} and type={} is not supported by the host hardware and "
114 "doesn't support any of the alternatives",
115 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
116 static_cast<u32>(format_type));
117 UNREACHABLE();
118 return wanted_format;
119}
120
121bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 UNREACHABLE();
127 return true;
128 }
129 const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
130 return (supported_usage & wanted_usage) == wanted_usage;
131}
132
133bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
134 vk::SurfaceKHR surface) {
135 const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
136
137 bool has_swapchain{};
138 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
139 has_swapchain |= prop.extensionName == swapchain_extension;
140 }
141 if (!has_swapchain) {
142 // The device doesn't support creating swapchains.
143 return false;
144 }
145
146 bool has_graphics{}, has_present{};
147 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
148 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
149 const auto& family = queue_family_properties[i];
150 if (family.queueCount == 0)
151 continue;
152
153 has_graphics |=
154 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
155 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
156 }
157 if (!has_graphics || !has_present) {
158 // The device doesn't have a graphics and present queue.
159 return false;
160 }
161
162 // TODO(Rodrigo): Check if the device matches all requeriments.
163 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
164 if (props.limits.maxUniformBufferRange < 65536) {
165 return false;
166 }
167
168 // Device is suitable.
169 return true;
170}
171
172void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
173 std::optional<u32> graphics_family_, present_family_;
174
175 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
176 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
177 if (graphics_family_ && present_family_)
178 break;
179
180 const auto& queue_family = queue_family_properties[i];
181 if (queue_family.queueCount == 0)
182 continue;
183
184 if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
185 graphics_family_ = i;
186 if (physical.getSurfaceSupportKHR(i, surface, dldi))
187 present_family_ = i;
188 }
189 ASSERT(graphics_family_ && present_family_);
190
191 graphics_family = *graphics_family_;
192 present_family = *present_family_;
193}
194
195void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
196 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
197 device_type = props.deviceType;
198 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
199}
200
201std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
202 static const float QUEUE_PRIORITY = 1.f;
203
204 std::set<u32> unique_queue_families = {graphics_family, present_family};
205 std::vector<vk::DeviceQueueCreateInfo> queue_cis;
206
207 for (u32 queue_family : unique_queue_families)
208 queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
209
210 return queue_cis;
211}
212
213std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
214 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
215 std::map<vk::Format, vk::FormatProperties> format_properties;
216
217 const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
219 };
220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
234
235 return format_properties;
236}
237
238} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
new file mode 100644
index 000000000..e87c7a508
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <vector>
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14/// Format usage descriptor
15enum class FormatType { Linear, Optimal, Buffer };
16
17/// Handles data specific to a physical device.
18class VKDevice final {
19public:
20 explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
21 vk::SurfaceKHR surface);
22 ~VKDevice();
23
24 /// Initializes the device. Returns true on success.
25 bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
26
27 /**
28 * Returns a format supported by the device for the passed requeriments.
29 * @param wanted_format The ideal format to be returned. It may not be the returned format.
30 * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
31 * @param format_type Format type usage.
32 * @returns A format supported by the device.
33 */
34 vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
35 FormatType format_type) const;
36
37 /// Returns the dispatch loader with direct function pointers of the device
38 const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
39 return dld;
40 }
41
42 /// Returns the logical device
43 vk::Device GetLogical() const {
44 return logical.get();
45 }
46
47 /// Returns the physical device.
48 vk::PhysicalDevice GetPhysical() const {
49 return physical;
50 }
51
52 /// Returns the main graphics queue.
53 vk::Queue GetGraphicsQueue() const {
54 return graphics_queue;
55 }
56
57 /// Returns the main present queue.
58 vk::Queue GetPresentQueue() const {
59 return present_queue;
60 }
61
62 /// Returns main graphics queue family index.
63 u32 GetGraphicsFamily() const {
64 return graphics_family;
65 }
66
67 /// Returns main present queue family index.
68 u32 GetPresentFamily() const {
69 return present_family;
70 }
71
72 /// Returns if the device is integrated with the host CPU
73 bool IsIntegrated() const {
74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
75 }
76
77 /// Returns uniform buffer alignment requeriment
78 u64 GetUniformBufferAlignment() const {
79 return uniform_buffer_alignment;
80 }
81
82 /// Checks if the physical device is suitable.
83 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
84 vk::SurfaceKHR surface);
85
86private:
87 /// Sets up queue families.
88 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
89
90 /// Sets up device properties.
91 void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
92
93 /// Returns a list of queue initialization descriptors.
94 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
95
96 /// Returns true if a format is supported.
97 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
98 FormatType format_type) const;
99
100 /// Returns the device properties for Vulkan formats.
101 static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
102 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
103
104 const vk::PhysicalDevice physical; ///< Physical device
105 vk::DispatchLoaderDynamic dld; ///< Device function pointers
106 UniqueDevice logical; ///< Logical device
107 vk::Queue graphics_queue; ///< Main graphics queue
108 vk::Queue present_queue; ///< Main present queue
109 u32 graphics_family{}; ///< Main graphics queue family index
110 u32 present_family{}; ///< Main present queue family index
111 vk::PhysicalDeviceType device_type; ///< Physical device type
112 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment
113 std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
114};
115
116} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
new file mode 100644
index 000000000..0451babbf
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <tuple>
8#include <vector>
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16
17namespace Vulkan {
18
19// TODO(Rodrigo): Fine tune this number
20constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
21
22class VKMemoryAllocation final {
23public:
24 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
25 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
26 : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
27 shifted_type{ShiftType(type)}, is_mappable{properties &
28 vk::MemoryPropertyFlagBits::eHostVisible} {
29 if (is_mappable) {
30 const auto dev = device.GetLogical();
31 const auto& dld = device.GetDispatchLoader();
32 base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
33 }
34 }
35
36 ~VKMemoryAllocation() {
37 const auto dev = device.GetLogical();
38 const auto& dld = device.GetDispatchLoader();
39 if (is_mappable)
40 dev.unmapMemory(memory, dld);
41 dev.free(memory, nullptr, dld);
42 }
43
44 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
45 auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
46 static_cast<u64>(alignment));
47 if (!found) {
48 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
49 static_cast<u64>(alignment));
50 if (!found) {
51 // Signal out of memory, it'll try to do more allocations.
52 return nullptr;
53 }
54 }
55 u8* address = is_mappable ? base_address + *found : nullptr;
56 auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
57 *found + commit_size);
58 commits.push_back(commit.get());
59
60 // Last commit's address is highly probable to be free.
61 free_iterator = *found + commit_size;
62
63 return commit;
64 }
65
66 void Free(const VKMemoryCommitImpl* commit) {
67 ASSERT(commit);
68 const auto it =
69 std::find_if(commits.begin(), commits.end(),
70 [&](const auto& stored_commit) { return stored_commit == commit; });
71 if (it == commits.end()) {
72 LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
73 UNREACHABLE();
74 return;
75 }
76 commits.erase(it);
77 }
78
79 /// Returns whether this allocation is compatible with the arguments.
80 bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
81 return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
82 (type_mask & shifted_type) != 0;
83 }
84
85private:
86 static constexpr u32 ShiftType(u32 type) {
87 return 1U << type;
88 }
89
90 /// A memory allocator, it may return a free region between "start" and "end" with the solicited
91 /// requeriments.
92 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
93 u64 iterator = start;
94 while (iterator + size < end) {
95 const u64 try_left = Common::AlignUp(iterator, alignment);
96 const u64 try_right = try_left + size;
97
98 bool overlap = false;
99 for (const auto& commit : commits) {
100 const auto [commit_left, commit_right] = commit->interval;
101 if (try_left < commit_right && commit_left < try_right) {
102 // There's an overlap, continue the search where the overlapping commit ends.
103 iterator = commit_right;
104 overlap = true;
105 break;
106 }
107 }
108 if (!overlap) {
109 // A free address has been found.
110 return try_left;
111 }
112 }
113 // No free regions where found, return an empty optional.
114 return std::nullopt;
115 }
116
117 const VKDevice& device; ///< Vulkan device.
118 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
119 const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
120 const u64 alloc_size; ///< Size of this allocation.
121 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
122 const bool is_mappable; ///< Whether the allocation is mappable.
123
124 /// Base address of the mapped pointer.
125 u8* base_address{};
126
127 /// Hints where the next free region is likely going to be.
128 u64 free_iterator{};
129
130 /// Stores all commits done from this allocation.
131 std::vector<const VKMemoryCommitImpl*> commits;
132};
133
134VKMemoryManager::VKMemoryManager(const VKDevice& device)
135 : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
136 is_memory_unified{GetMemoryUnified(props)} {}
137
138VKMemoryManager::~VKMemoryManager() = default;
139
140VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
141 ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
142
143 // When a host visible commit is asked, search for host visible and coherent, otherwise search
144 // for a fast device local type.
145 const vk::MemoryPropertyFlags wanted_properties =
146 host_visible
147 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
148 : vk::MemoryPropertyFlagBits::eDeviceLocal;
149
150 const auto TryCommit = [&]() -> VKMemoryCommit {
151 for (auto& alloc : allocs) {
152 if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
153 continue;
154
155 if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
156 return commit;
157 }
158 }
159 return {};
160 };
161
162 if (auto commit = TryCommit(); commit) {
163 return commit;
164 }
165
166 // Commit has failed, allocate more memory.
167 if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
168 // TODO(Rodrigo): Try to use host memory.
169 LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
170 UNREACHABLE();
171 }
172
173 // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
174 // there's a bug.
175 auto commit = TryCommit();
176 ASSERT(commit);
177 return commit;
178}
179
180VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
181 const auto dev = device.GetLogical();
182 const auto& dld = device.GetDispatchLoader();
183 const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
184 auto commit = Commit(requeriments, host_visible);
185 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
186 return commit;
187}
188
189VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
190 const auto dev = device.GetLogical();
191 const auto& dld = device.GetDispatchLoader();
192 const auto requeriments = dev.getImageMemoryRequirements(image, dld);
193 auto commit = Commit(requeriments, host_visible);
194 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
195 return commit;
196}
197
198bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
199 u64 size) {
200 const u32 type = [&]() {
201 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
202 const auto flags = props.memoryTypes[type_index].propertyFlags;
203 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
204 // The type matches in type and in the wanted properties.
205 return type_index;
206 }
207 }
208 LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
209 UNREACHABLE();
210 return 0u;
211 }();
212
213 const auto dev = device.GetLogical();
214 const auto& dld = device.GetDispatchLoader();
215
216 // Try to allocate found type.
217 const vk::MemoryAllocateInfo memory_ai(size, type);
218 vk::DeviceMemory memory;
219 if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
220 res != vk::Result::eSuccess) {
221 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
222 return false;
223 }
224 allocs.push_back(
225 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
226 return true;
227}
228
229/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
230 for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
231 if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
232 // Memory is considered unified when heaps are device local only.
233 return false;
234 }
235 }
236 return true;
237}
238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end)
241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this);
245}
246
247u8* VKMemoryCommitImpl::GetData() const {
248 ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
249 return data;
250}
251
252} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
new file mode 100644
index 000000000..073597b35
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <utility>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKMemoryAllocation;
17class VKMemoryCommitImpl;
18
19using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
20
21class VKMemoryManager final {
22public:
23 explicit VKMemoryManager(const VKDevice& device);
24 ~VKMemoryManager();
25
26 /**
27 * Commits a memory with the specified requeriments.
28 * @param reqs Requeriments returned from a Vulkan call.
29 * @param host_visible Signals the allocator that it *must* use host visible and coherent
30 * memory. When passing false, it will try to allocate device local memory.
31 * @returns A memory commit.
32 */
33 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
34
35 /// Commits memory required by the buffer and binds it.
36 VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
37
38 /// Commits memory required by the image and binds it.
39 VKMemoryCommit Commit(vk::Image image, bool host_visible);
40
41 /// Returns true if the memory allocations are done always in host visible and coherent memory.
42 bool IsMemoryUnified() const {
43 return is_memory_unified;
44 }
45
46private:
47 /// Allocates a chunk of memory.
48 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
49
50 /// Returns true if the device uses an unified memory model.
51 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
52
53 const VKDevice& device; ///< Device handler.
54 const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
55 const bool is_memory_unified; ///< True if memory model is unified.
56 std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
57};
58
59class VKMemoryCommitImpl final {
60 friend VKMemoryAllocation;
61
62public:
63 explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
64 u64 begin, u64 end);
65 ~VKMemoryCommitImpl();
66
67 /// Returns the writeable memory map. The commit has to be mappable.
68 u8* GetData() const;
69
70 /// Returns the Vulkan memory handler.
71 vk::DeviceMemory GetMemory() const {
72 return memory;
73 }
74
75 /// Returns the start position of the commit relative to the allocation.
76 vk::DeviceSize GetOffset() const {
77 return static_cast<vk::DeviceSize>(interval.first);
78 }
79
80private:
81 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
82 vk::DeviceMemory memory; ///< Vulkan device memory handler.
83 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
84 u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
85};
86
87} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
new file mode 100644
index 000000000..13c46e5b8
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -0,0 +1,285 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include "common/assert.h"
8#include "common/logging/log.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_resource_manager.h"
12
13namespace Vulkan {
14
15// TODO(Rodrigo): Fine tune these numbers.
16constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
17constexpr std::size_t FENCES_GROW_STEP = 0x40;
18
19class CommandBufferPool final : public VKFencedPool {
20public:
21 CommandBufferPool(const VKDevice& device)
22 : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
23
24 void Allocate(std::size_t begin, std::size_t end) override {
25 const auto dev = device.GetLogical();
26 const auto& dld = device.GetDispatchLoader();
27 const u32 graphics_family = device.GetGraphicsFamily();
28
29 auto pool = std::make_unique<Pool>();
30
31 // Command buffers are going to be commited, recorded, executed every single usage cycle.
32 // They are also going to be reseted when commited.
33 const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
34 vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
35 const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
36 pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
37
38 const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
39 vk::CommandBufferLevel::ePrimary,
40 static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
41 pool->cmdbufs =
42 dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
43
44 pools.push_back(std::move(pool));
45 }
46
47 vk::CommandBuffer Commit(VKFence& fence) {
48 const std::size_t index = CommitResource(fence);
49 const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
50 const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
51 return *pools[pool_index]->cmdbufs[sub_index];
52 }
53
54private:
55 struct Pool {
56 UniqueCommandPool handle;
57 std::vector<UniqueCommandBuffer> cmdbufs;
58 };
59
60 const VKDevice& device;
61
62 std::vector<std::unique_ptr<Pool>> pools;
63};
64
65VKResource::VKResource() = default;
66
67VKResource::~VKResource() = default;
68
69VKFence::VKFence(const VKDevice& device, UniqueFence handle)
70 : device{device}, handle{std::move(handle)} {}
71
72VKFence::~VKFence() = default;
73
74void VKFence::Wait() {
75 const auto dev = device.GetLogical();
76 const auto& dld = device.GetDispatchLoader();
77 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
78}
79
80void VKFence::Release() {
81 is_owned = false;
82}
83
84void VKFence::Commit() {
85 is_owned = true;
86 is_used = true;
87}
88
89bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
90 if (!is_used) {
91 // If a fence is not used it's always free.
92 return true;
93 }
94 if (is_owned && !owner_wait) {
95 // The fence is still being owned (Release has not been called) and ownership wait has
96 // not been asked.
97 return false;
98 }
99
100 const auto dev = device.GetLogical();
101 const auto& dld = device.GetDispatchLoader();
102 if (gpu_wait) {
103 // Wait for the fence if it has been requested.
104 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
105 } else {
106 if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
107 // Vulkan fence is not ready, not much it can do here
108 return false;
109 }
110 }
111
112 // Broadcast resources their free state.
113 for (auto* resource : protected_resources) {
114 resource->OnFenceRemoval(this);
115 }
116 protected_resources.clear();
117
118 // Prepare fence for reusage.
119 dev.resetFences({*handle}, dld);
120 is_used = false;
121 return true;
122}
123
124void VKFence::Protect(VKResource* resource) {
125 protected_resources.push_back(resource);
126}
127
128void VKFence::Unprotect(VKResource* resource) {
129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
130 ASSERT(it != protected_resources.end());
131
132 resource->OnFenceRemoval(this);
133 protected_resources.erase(it);
134}
135
136VKFenceWatch::VKFenceWatch() = default;
137
138VKFenceWatch::~VKFenceWatch() {
139 if (fence) {
140 fence->Unprotect(this);
141 }
142}
143
144void VKFenceWatch::Wait() {
145 if (fence == nullptr) {
146 return;
147 }
148 fence->Wait();
149 fence->Unprotect(this);
150}
151
152void VKFenceWatch::Watch(VKFence& new_fence) {
153 Wait();
154 fence = &new_fence;
155 fence->Protect(this);
156}
157
158bool VKFenceWatch::TryWatch(VKFence& new_fence) {
159 if (fence) {
160 return false;
161 }
162 fence = &new_fence;
163 fence->Protect(this);
164 return true;
165}
166
167void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
168 ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
169 fence = nullptr;
170}
171
172VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
173
174VKFencedPool::~VKFencedPool() = default;
175
176std::size_t VKFencedPool::CommitResource(VKFence& fence) {
177 const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
178 for (std::size_t iterator = begin; iterator < end; ++iterator) {
179 if (watches[iterator]->TryWatch(fence)) {
180 // The resource is now being watched, a free resource was successfully found.
181 return iterator;
182 }
183 }
184 return {};
185 };
186 // Try to find a free resource from the hinted position to the end.
187 auto found = Search(free_iterator, watches.size());
188 if (!found) {
189 // Search from beginning to the hinted position.
190 found = Search(0, free_iterator);
191 if (!found) {
192 // Both searches failed, the pool is full; handle it.
193 const std::size_t free_resource = ManageOverflow();
194
195 // Watch will wait for the resource to be free.
196 watches[free_resource]->Watch(fence);
197 found = free_resource;
198 }
199 }
200 // Free iterator is hinted to the resource after the one that's been commited.
201 free_iterator = (*found + 1) % watches.size();
202 return *found;
203}
204
205std::size_t VKFencedPool::ManageOverflow() {
206 const std::size_t old_capacity = watches.size();
207 Grow();
208
209 // The last entry is guaranted to be free, since it's the first element of the freshly
210 // allocated resources.
211 return old_capacity;
212}
213
214void VKFencedPool::Grow() {
215 const std::size_t old_capacity = watches.size();
216 watches.resize(old_capacity + grow_step);
217 std::generate(watches.begin() + old_capacity, watches.end(),
218 []() { return std::make_unique<VKFenceWatch>(); });
219 Allocate(old_capacity, old_capacity + grow_step);
220}
221
222VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
223 GrowFences(FENCES_GROW_STEP);
224 command_buffer_pool = std::make_unique<CommandBufferPool>(device);
225}
226
227VKResourceManager::~VKResourceManager() = default;
228
229VKFence& VKResourceManager::CommitFence() {
230 const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
231 const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
232 const auto hinted = fences.begin() + fences_iterator;
233
234 auto it = std::find_if(hinted, fences.end(), Tick);
235 if (it == fences.end()) {
236 it = std::find_if(fences.begin(), hinted, Tick);
237 if (it == hinted) {
238 return nullptr;
239 }
240 }
241 fences_iterator = std::distance(fences.begin(), it) + 1;
242 if (fences_iterator >= fences.size())
243 fences_iterator = 0;
244
245 auto& fence = *it;
246 fence->Commit();
247 return fence.get();
248 };
249
250 VKFence* found_fence = StepFences(false, false);
251 if (!found_fence) {
252 // Try again, this time waiting.
253 found_fence = StepFences(true, false);
254
255 if (!found_fence) {
256 // Allocate new fences and try again.
257 LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
258 fences.size() + FENCES_GROW_STEP);
259
260 GrowFences(FENCES_GROW_STEP);
261 found_fence = StepFences(true, false);
262 ASSERT(found_fence != nullptr);
263 }
264 }
265 return *found_fence;
266}
267
268vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
269 return command_buffer_pool->Commit(fence);
270}
271
272void VKResourceManager::GrowFences(std::size_t new_fences_count) {
273 const auto dev = device.GetLogical();
274 const auto& dld = device.GetDispatchLoader();
275 const vk::FenceCreateInfo fence_ci;
276
277 const std::size_t previous_size = fences.size();
278 fences.resize(previous_size + new_fences_count);
279
280 std::generate(fences.begin() + previous_size, fences.end(), [&]() {
281 return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
282 });
283}
284
285} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
new file mode 100644
index 000000000..08ee86fa6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -0,0 +1,180 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9#include <vector>
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14class VKDevice;
15class VKFence;
16class VKResourceManager;
17
18class CommandBufferPool;
19
20/// Interface for a Vulkan resource
21class VKResource {
22public:
23 explicit VKResource();
24 virtual ~VKResource();
25
26 /**
27 * Signals the object that an owning fence has been signaled.
28 * @param signaling_fence Fence that signals its usage end.
29 */
30 virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
31};
32
33/**
34 * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
35 * They must be commited from the resource manager. Their usage flow is: commit the fence from the
36 * resource manager, protect resources with it and use them, send the fence to an execution queue
37 * and Wait for it if needed and then call Release. Used resources will automatically be signaled
38 * when they are free to be reused.
39 * @brief Protects resources for concurrent usage and signals its release.
40 */
41class VKFence {
42 friend class VKResourceManager;
43
44public:
45 explicit VKFence(const VKDevice& device, UniqueFence handle);
46 ~VKFence();
47
48 /**
49 * Waits for the fence to be signaled.
50 * @warning You must have ownership of the fence and it has to be previously sent to a queue to
51 * call this function.
52 */
53 void Wait();
54
55 /**
56 * Releases ownership of the fence. Pass after it has been sent to an execution queue.
57 * Unmanaged usage of the fence after the call will result in undefined behavior because it may
58 * be being used for something else.
59 */
60 void Release();
61
62 /// Protects a resource with this fence.
63 void Protect(VKResource* resource);
64
65 /// Removes protection for a resource.
66 void Unprotect(VKResource* resource);
67
68 /// Retreives the fence.
69 operator vk::Fence() const {
70 return *handle;
71 }
72
73private:
74 /// Take ownership of the fence.
75 void Commit();
76
77 /**
78 * Updates the fence status.
79 * @warning Waiting for the owner might soft lock the execution.
80 * @param gpu_wait Wait for the fence to be signaled by the driver.
81 * @param owner_wait Wait for the owner to signal its freedom.
82 * @returns True if the fence is free. Waiting for gpu and owner will always return true.
83 */
84 bool Tick(bool gpu_wait, bool owner_wait);
85
86 const VKDevice& device; ///< Device handler
87 UniqueFence handle; ///< Vulkan fence
88 std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
89 bool is_owned = false; ///< The fence has been commited but not released yet.
90 bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
91};
92
93/**
94 * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
95 * resources without having to inherit VKResource from their handlers.
96 */
97class VKFenceWatch final : public VKResource {
98public:
99 explicit VKFenceWatch();
100 ~VKFenceWatch() override;
101
102 /// Waits for the fence to be released.
103 void Wait();
104
105 /**
106 * Waits for a previous fence and watches a new one.
107 * @param new_fence New fence to wait to.
108 */
109 void Watch(VKFence& new_fence);
110
111 /**
112 * Checks if it's currently being watched and starts watching it if it's available.
113 * @returns True if a watch has started, false if it's being watched.
114 */
115 bool TryWatch(VKFence& new_fence);
116
117 void OnFenceRemoval(VKFence* signaling_fence) override;
118
119private:
120 VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
121};
122
123/**
124 * Handles a pool of resources protected by fences. Manages resource overflow allocating more
125 * resources.
126 */
127class VKFencedPool {
128public:
129 explicit VKFencedPool(std::size_t grow_step);
130 virtual ~VKFencedPool();
131
132protected:
133 /**
134 * Commits a free resource and protects it with a fence. It may allocate new resources.
135 * @param fence Fence that protects the commited resource.
136 * @returns Index of the resource commited.
137 */
138 std::size_t CommitResource(VKFence& fence);
139
140 /// Called when a chunk of resources have to be allocated.
141 virtual void Allocate(std::size_t begin, std::size_t end) = 0;
142
143private:
144 /// Manages pool overflow allocating new resources.
145 std::size_t ManageOverflow();
146
147 /// Allocates a new page of resources.
148 void Grow();
149
150 std::size_t grow_step = 0; ///< Number of new resources created after an overflow
151 std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
152 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
153};
154
155/**
156 * The resource manager handles all resources that can be protected with a fence avoiding
157 * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
158 */
159class VKResourceManager final {
160public:
161 explicit VKResourceManager(const VKDevice& device);
162 ~VKResourceManager();
163
164 /// Commits a fence. It has to be sent to a queue and released.
165 VKFence& CommitFence();
166
167 /// Commits an unused command buffer and protects it with a fence.
168 vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
169
170private:
171 /// Allocates new fences.
172 void GrowFences(std::size_t new_fences_count);
173
174 const VKDevice& device; ///< Device handler.
175 std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
176 std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
177 std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
178};
179
180} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
new file mode 100644
index 000000000..ed3178f09
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <optional>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/cityhash.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
13#include "video_core/renderer_vulkan/vk_sampler_cache.h"
14#include "video_core/textures/texture.h"
15
16namespace Vulkan {
17
18static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return vk::BorderColor::eFloatTransparentBlack;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return vk::BorderColor::eFloatOpaqueBlack;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return vk::BorderColor::eFloatOpaqueWhite;
26 } else {
27 return {};
28 }
29}
30
31std::size_t SamplerCacheKey::Hash() const {
32 static_assert(sizeof(raw) % sizeof(u64) == 0);
33 return static_cast<std::size_t>(
34 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
35}
36
37bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
38 return raw == rhs.raw;
39}
40
41VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
42
43VKSamplerCache::~VKSamplerCache() = default;
44
45vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
46 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
47 auto& sampler = entry->second;
48 if (is_cache_miss) {
49 sampler = CreateSampler(tsc);
50 }
51 return *sampler;
52}
53
54UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
55 const float max_anisotropy = tsc.GetMaxAnisotropy();
56 const bool has_anisotropy = max_anisotropy > 1.0f;
57
58 const auto border_color = tsc.GetBorderColor();
59 const auto vk_border_color = TryConvertBorderColor(border_color);
60 UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
61 border_color[0], border_color[1], border_color[2], border_color[3]);
62
63 constexpr bool unnormalized_coords = false;
64
65 const vk::SamplerCreateInfo sampler_ci(
66 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
67 MaxwellToVK::Sampler::Filter(tsc.min_filter),
68 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
69 MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
70 MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
71 max_anisotropy, tsc.depth_compare_enabled,
72 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
73 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
74 unnormalized_coords);
75
76 const auto& dld = device.GetDispatchLoader();
77 const auto dev = device.GetLogical();
78 return dev.createSamplerUnique(sampler_ci, nullptr, dld);
79}
80
81} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
new file mode 100644
index 000000000..c6394dc87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/textures/texture.h"
12
13namespace Vulkan {
14
15class VKDevice;
16
17struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
18 std::size_t Hash() const;
19
20 bool operator==(const SamplerCacheKey& rhs) const;
21
22 bool operator!=(const SamplerCacheKey& rhs) const {
23 return !operator==(rhs);
24 }
25};
26
27} // namespace Vulkan
28
29namespace std {
30
31template <>
32struct hash<Vulkan::SamplerCacheKey> {
33 std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
34 return k.Hash();
35 }
36};
37
38} // namespace std
39
40namespace Vulkan {
41
42class VKSamplerCache {
43public:
44 explicit VKSamplerCache(const VKDevice& device);
45 ~VKSamplerCache();
46
47 vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
48
49private:
50 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
51
52 const VKDevice& device;
53 std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
54};
55
56} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
new file mode 100644
index 000000000..f1fea1871
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/renderer_vulkan/declarations.h"
8#include "video_core/renderer_vulkan/vk_device.h"
9#include "video_core/renderer_vulkan/vk_resource_manager.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h"
11
12namespace Vulkan {
13
14VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
15 : device{device}, resource_manager{resource_manager} {
16 next_fence = &resource_manager.CommitFence();
17 AllocateNewContext();
18}
19
20VKScheduler::~VKScheduler() = default;
21
22VKExecutionContext VKScheduler::GetExecutionContext() const {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore);
28 current_fence->Release();
29 AllocateNewContext();
30 return GetExecutionContext();
31}
32
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
34 SubmitExecution(semaphore);
35 current_fence->Wait();
36 current_fence->Release();
37 AllocateNewContext();
38 return GetExecutionContext();
39}
40
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
42 const auto& dld = device.GetDispatchLoader();
43 current_cmdbuf.end(dld);
44
45 const auto queue = device.GetGraphicsQueue();
46 const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
47 &semaphore);
48 queue.submit({submit_info}, *current_fence, dld);
49}
50
51void VKScheduler::AllocateNewContext() {
52 current_fence = next_fence;
53 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
54 next_fence = &resource_manager.CommitFence();
55
56 const auto& dld = device.GetDispatchLoader();
57 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
58}
59
60} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
new file mode 100644
index 000000000..cfaf5376f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/declarations.h"
9
10namespace Vulkan {
11
12class VKDevice;
13class VKExecutionContext;
14class VKFence;
15class VKResourceManager;
16
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler {
20public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler();
23
24 /// Gets the current execution context.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const;
26
27 /// Sends the current execution context to the GPU. It invalidates the current execution context
28 /// and returns a new one.
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
32 /// the current execution context and returns a new one.
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34
35private:
36 void SubmitExecution(vk::Semaphore semaphore);
37
38 void AllocateNewContext();
39
40 const VKDevice& device;
41 VKResourceManager& resource_manager;
42 vk::CommandBuffer current_cmdbuf;
43 VKFence* current_fence = nullptr;
44 VKFence* next_fence = nullptr;
45};
46
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
new file mode 100644
index 000000000..e0a6f5e87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -0,0 +1,1379 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <functional>
6#include <map>
7#include <set>
8
9#include <fmt/format.h>
10
11#include <sirit/sirit.h>
12
13#include "common/alignment.h"
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "common/logging/log.h"
17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/shader_bytecode.h"
19#include "video_core/engines/shader_header.h"
20#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
21#include "video_core/shader/shader_ir.h"
22
23namespace Vulkan::VKShader {
24
25using Sirit::Id;
26using Tegra::Shader::Attribute;
27using Tegra::Shader::AttributeUse;
28using Tegra::Shader::Register;
29using namespace VideoCommon::Shader;
30
31using Maxwell = Tegra::Engines::Maxwell3D::Regs;
32using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
33using Operation = const OperationNode&;
34
35// TODO(Rodrigo): Use rasterizer's value
36constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
37constexpr u32 STAGE_BINDING_STRIDE = 0x100;
38
39enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
40
41struct SamplerImage {
42 Id image_type;
43 Id sampled_image_type;
44 Id sampler;
45};
46
47namespace {
48
49spv::Dim GetSamplerDim(const Sampler& sampler) {
50 switch (sampler.GetType()) {
51 case Tegra::Shader::TextureType::Texture1D:
52 return spv::Dim::Dim1D;
53 case Tegra::Shader::TextureType::Texture2D:
54 return spv::Dim::Dim2D;
55 case Tegra::Shader::TextureType::Texture3D:
56 return spv::Dim::Dim3D;
57 case Tegra::Shader::TextureType::TextureCube:
58 return spv::Dim::Cube;
59 default:
60 UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<u32>(sampler.GetType()));
61 return spv::Dim::Dim2D;
62 }
63}
64
65/// Returns true if an attribute index is one of the 32 generic attributes
66constexpr bool IsGenericAttribute(Attribute::Index attribute) {
67 return attribute >= Attribute::Index::Attribute_0 &&
68 attribute <= Attribute::Index::Attribute_31;
69}
70
71/// Returns the location of a generic attribute
72constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) {
73 ASSERT(IsGenericAttribute(attribute));
74 return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
75}
76
77/// Returns true if an object has to be treated as precise
78bool IsPrecise(Operation operand) {
79 const auto& meta = operand.GetMeta();
80
81 if (std::holds_alternative<MetaArithmetic>(meta)) {
82 return std::get<MetaArithmetic>(meta).precise;
83 }
84 if (std::holds_alternative<MetaHalfArithmetic>(meta)) {
85 return std::get<MetaHalfArithmetic>(meta).precise;
86 }
87 return false;
88}
89
90} // namespace
91
92class SPIRVDecompiler : public Sirit::Module {
93public:
94 explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
95 : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
96 AddCapability(spv::Capability::Shader);
97 AddExtension("SPV_KHR_storage_buffer_storage_class");
98 AddExtension("SPV_KHR_variable_pointers");
99 }
100
101 void Decompile() {
102 AllocateBindings();
103 AllocateLabels();
104
105 DeclareVertex();
106 DeclareGeometry();
107 DeclareFragment();
108 DeclareRegisters();
109 DeclarePredicates();
110 DeclareLocalMemory();
111 DeclareInternalFlags();
112 DeclareInputAttributes();
113 DeclareOutputAttributes();
114 DeclareConstantBuffers();
115 DeclareGlobalBuffers();
116 DeclareSamplers();
117
118 execute_function =
119 Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
120 Emit(OpLabel());
121
122 const u32 first_address = ir.GetBasicBlocks().begin()->first;
123 const Id loop_label = OpLabel("loop");
124 const Id merge_label = OpLabel("merge");
125 const Id dummy_label = OpLabel();
126 const Id jump_label = OpLabel();
127 continue_label = OpLabel("continue");
128
129 std::vector<Sirit::Literal> literals;
130 std::vector<Id> branch_labels;
131 for (const auto& pair : labels) {
132 const auto [literal, label] = pair;
133 literals.push_back(literal);
134 branch_labels.push_back(label);
135 }
136
137 // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
138 // that shaders will use 20 nested SSYs and PBKs.
139 constexpr u32 FLOW_STACK_SIZE = 20;
140 const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
141 jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
142 spv::StorageClass::Function, Constant(t_uint, first_address)));
143 flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type),
144 spv::StorageClass::Function, ConstantNull(flow_stack_type)));
145 flow_stack_top =
146 Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0)));
147
148 Name(jmp_to, "jmp_to");
149 Name(flow_stack, "flow_stack");
150 Name(flow_stack_top, "flow_stack_top");
151
152 Emit(OpBranch(loop_label));
153 Emit(loop_label);
154 Emit(OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::Unroll));
155 Emit(OpBranch(dummy_label));
156
157 Emit(dummy_label);
158 const Id default_branch = OpLabel();
159 const Id jmp_to_load = Emit(OpLoad(t_uint, jmp_to));
160 Emit(OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone));
161 Emit(OpSwitch(jmp_to_load, default_branch, literals, branch_labels));
162
163 Emit(default_branch);
164 Emit(OpReturn());
165
166 for (const auto& pair : ir.GetBasicBlocks()) {
167 const auto& [address, bb] = pair;
168 Emit(labels.at(address));
169
170 VisitBasicBlock(bb);
171
172 const auto next_it = labels.lower_bound(address + 1);
173 const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
174 Emit(OpBranch(next_label));
175 }
176
177 Emit(jump_label);
178 Emit(OpBranch(continue_label));
179 Emit(continue_label);
180 Emit(OpBranch(loop_label));
181 Emit(merge_label);
182 Emit(OpReturn());
183 Emit(OpFunctionEnd());
184 }
185
186 ShaderEntries GetShaderEntries() const {
187 ShaderEntries entries;
188 entries.const_buffers_base_binding = const_buffers_base_binding;
189 entries.global_buffers_base_binding = global_buffers_base_binding;
190 entries.samplers_base_binding = samplers_base_binding;
191 for (const auto& cbuf : ir.GetConstantBuffers()) {
192 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
193 }
194 for (const auto& gmem : ir.GetGlobalMemoryBases()) {
195 entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
196 }
197 for (const auto& sampler : ir.GetSamplers()) {
198 entries.samplers.emplace_back(sampler);
199 }
200 for (const auto& attr : ir.GetInputAttributes()) {
201 entries.attributes.insert(GetGenericAttributeLocation(attr.first));
202 }
203 entries.clip_distances = ir.GetClipDistances();
204 entries.shader_length = ir.GetLength();
205 entries.entry_function = execute_function;
206 entries.interfaces = interfaces;
207 return entries;
208 }
209
210private:
211 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
212 using OperationDecompilersArray =
213 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
214
215 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
216 static constexpr u32 CBUF_STRIDE = 16;
217
218 void AllocateBindings() {
219 const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
220 u32 binding_iterator = binding_base;
221
222 const auto Allocate = [&binding_iterator](std::size_t count) {
223 const u32 current_binding = binding_iterator;
224 binding_iterator += static_cast<u32>(count);
225 return current_binding;
226 };
227 const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size());
228 global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size());
229 samplers_base_binding = Allocate(ir.GetSamplers().size());
230
231 ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE,
232 "Stage binding stride is too small");
233 }
234
235 void AllocateLabels() {
236 for (const auto& pair : ir.GetBasicBlocks()) {
237 const u32 address = pair.first;
238 labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
239 }
240 }
241
242 void DeclareVertex() {
243 if (stage != ShaderStage::Vertex)
244 return;
245
246 DeclareVertexRedeclarations();
247 }
248
249 void DeclareGeometry() {
250 if (stage != ShaderStage::Geometry)
251 return;
252
253 UNIMPLEMENTED();
254 }
255
256 void DeclareFragment() {
257 if (stage != ShaderStage::Fragment)
258 return;
259
260 for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
261 if (!IsRenderTargetUsed(rt)) {
262 continue;
263 }
264
265 const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
266 Name(id, fmt::format("frag_color{}", rt));
267 Decorate(id, spv::Decoration::Location, rt);
268
269 frag_colors[rt] = id;
270 interfaces.push_back(id);
271 }
272
273 if (header.ps.omap.depth) {
274 frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
275 Name(frag_depth, "frag_depth");
276 Decorate(frag_depth, spv::Decoration::BuiltIn,
277 static_cast<u32>(spv::BuiltIn::FragDepth));
278
279 interfaces.push_back(frag_depth);
280 }
281
282 frag_coord = DeclareBuiltIn(spv::BuiltIn::FragCoord, spv::StorageClass::Input, t_in_float4,
283 "frag_coord");
284 front_facing = DeclareBuiltIn(spv::BuiltIn::FrontFacing, spv::StorageClass::Input,
285 t_in_bool, "front_facing");
286 }
287
288 void DeclareRegisters() {
289 for (const u32 gpr : ir.GetRegisters()) {
290 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
291 Name(id, fmt::format("gpr_{}", gpr));
292 registers.emplace(gpr, AddGlobalVariable(id));
293 }
294 }
295
296 void DeclarePredicates() {
297 for (const auto pred : ir.GetPredicates()) {
298 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
299 Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
300 predicates.emplace(pred, AddGlobalVariable(id));
301 }
302 }
303
304 void DeclareLocalMemory() {
305 if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
306 const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4);
307 const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
308 const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
309 Name(type_pointer, "LocalMemory");
310
311 local_memory =
312 OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
313 AddGlobalVariable(Name(local_memory, "local_memory"));
314 }
315 }
316
317 void DeclareInternalFlags() {
318 constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
319 "overflow"};
320 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
321 const auto flag_code = static_cast<InternalFlag>(flag);
322 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
323 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
324 }
325 }
326
327 void DeclareInputAttributes() {
328 for (const auto element : ir.GetInputAttributes()) {
329 const Attribute::Index index = element.first;
330 if (!IsGenericAttribute(index)) {
331 continue;
332 }
333
334 UNIMPLEMENTED_IF(stage == ShaderStage::Geometry);
335
336 const u32 location = GetGenericAttributeLocation(index);
337 const Id id = OpVariable(t_in_float4, spv::StorageClass::Input);
338 Name(AddGlobalVariable(id), fmt::format("in_attr{}", location));
339 input_attributes.emplace(index, id);
340 interfaces.push_back(id);
341
342 Decorate(id, spv::Decoration::Location, location);
343
344 if (stage != ShaderStage::Fragment) {
345 continue;
346 }
347 switch (header.ps.GetAttributeUse(location)) {
348 case AttributeUse::Constant:
349 Decorate(id, spv::Decoration::Flat);
350 break;
351 case AttributeUse::ScreenLinear:
352 Decorate(id, spv::Decoration::NoPerspective);
353 break;
354 case AttributeUse::Perspective:
355 // Default
356 break;
357 default:
358 UNREACHABLE_MSG("Unused attribute being fetched");
359 }
360 }
361 }
362
363 void DeclareOutputAttributes() {
364 for (const auto index : ir.GetOutputAttributes()) {
365 if (!IsGenericAttribute(index)) {
366 continue;
367 }
368 const auto location = GetGenericAttributeLocation(index);
369 const Id id = OpVariable(t_out_float4, spv::StorageClass::Output);
370 Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
371 output_attributes.emplace(index, id);
372 interfaces.push_back(id);
373
374 Decorate(id, spv::Decoration::Location, location);
375 }
376 }
377
378 void DeclareConstantBuffers() {
379 u32 binding = const_buffers_base_binding;
380 for (const auto& entry : ir.GetConstantBuffers()) {
381 const auto [index, size] = entry;
382 const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
383 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
384
385 Decorate(id, spv::Decoration::Binding, binding++);
386 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
387 constant_buffers.emplace(index, id);
388 }
389 }
390
391 void DeclareGlobalBuffers() {
392 u32 binding = global_buffers_base_binding;
393 for (const auto& entry : ir.GetGlobalMemoryBases()) {
394 const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
395 AddGlobalVariable(
396 Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset)));
397
398 Decorate(id, spv::Decoration::Binding, binding++);
399 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
400 global_buffers.emplace(entry, id);
401 }
402 }
403
404 void DeclareSamplers() {
405 u32 binding = samplers_base_binding;
406 for (const auto& sampler : ir.GetSamplers()) {
407 const auto dim = GetSamplerDim(sampler);
408 const int depth = sampler.IsShadow() ? 1 : 0;
409 const int arrayed = sampler.IsArray() ? 1 : 0;
410 // TODO(Rodrigo): Sampled 1 indicates that the image will be used with a sampler. When
411 // SULD and SUST instructions are implemented, replace this value.
412 const int sampled = 1;
413 const Id image_type =
414 TypeImage(t_float, dim, depth, arrayed, false, sampled, spv::ImageFormat::Unknown);
415 const Id sampled_image_type = TypeSampledImage(image_type);
416 const Id pointer_type =
417 TypePointer(spv::StorageClass::UniformConstant, sampled_image_type);
418 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
419 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
420
421 sampler_images.insert(
422 {static_cast<u32>(sampler.GetIndex()), {image_type, sampled_image_type, id}});
423
424 Decorate(id, spv::Decoration::Binding, binding++);
425 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
426 }
427 }
428
429 void DeclareVertexRedeclarations() {
430 vertex_index = DeclareBuiltIn(spv::BuiltIn::VertexIndex, spv::StorageClass::Input,
431 t_in_uint, "vertex_index");
432 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
433 t_in_uint, "instance_index");
434
435 bool is_point_size_declared = false;
436 bool is_clip_distances_declared = false;
437 for (const auto index : ir.GetOutputAttributes()) {
438 if (index == Attribute::Index::PointSize) {
439 is_point_size_declared = true;
440 } else if (index == Attribute::Index::ClipDistances0123 ||
441 index == Attribute::Index::ClipDistances4567) {
442 is_clip_distances_declared = true;
443 }
444 }
445
446 std::vector<Id> members;
447 members.push_back(t_float4);
448 if (is_point_size_declared) {
449 members.push_back(t_float);
450 }
451 if (is_clip_distances_declared) {
452 members.push_back(TypeArray(t_float, Constant(t_uint, 8)));
453 }
454
455 const Id gl_per_vertex_struct = Name(TypeStruct(members), "PerVertex");
456 Decorate(gl_per_vertex_struct, spv::Decoration::Block);
457
458 u32 declaration_index = 0;
459 const auto MemberDecorateBuiltIn = [&](spv::BuiltIn builtin, std::string name,
460 bool condition) {
461 if (!condition)
462 return u32{};
463 MemberName(gl_per_vertex_struct, declaration_index, name);
464 MemberDecorate(gl_per_vertex_struct, declaration_index, spv::Decoration::BuiltIn,
465 static_cast<u32>(builtin));
466 return declaration_index++;
467 };
468
469 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
470 point_size_index =
471 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
472 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
473 is_clip_distances_declared);
474
475 const Id type_pointer = TypePointer(spv::StorageClass::Output, gl_per_vertex_struct);
476 per_vertex = OpVariable(type_pointer, spv::StorageClass::Output);
477 AddGlobalVariable(Name(per_vertex, "per_vertex"));
478 interfaces.push_back(per_vertex);
479 }
480
481 void VisitBasicBlock(const NodeBlock& bb) {
482 for (const Node node : bb) {
483 static_cast<void>(Visit(node));
484 }
485 }
486
487 Id Visit(Node node) {
488 if (const auto operation = std::get_if<OperationNode>(node)) {
489 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
490 const auto decompiler = operation_decompilers[operation_index];
491 if (decompiler == nullptr) {
492 UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
493 }
494 return (this->*decompiler)(*operation);
495
496 } else if (const auto gpr = std::get_if<GprNode>(node)) {
497 const u32 index = gpr->GetIndex();
498 if (index == Register::ZeroIndex) {
499 return Constant(t_float, 0.0f);
500 }
501 return Emit(OpLoad(t_float, registers.at(index)));
502
503 } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
504 return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue()));
505
506 } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
507 const auto value = [&]() -> Id {
508 switch (const auto index = predicate->GetIndex(); index) {
509 case Tegra::Shader::Pred::UnusedIndex:
510 return v_true;
511 case Tegra::Shader::Pred::NeverExecute:
512 return v_false;
513 default:
514 return Emit(OpLoad(t_bool, predicates.at(index)));
515 }
516 }();
517 if (predicate->IsNegated()) {
518 return Emit(OpLogicalNot(t_bool, value));
519 }
520 return value;
521
522 } else if (const auto abuf = std::get_if<AbufNode>(node)) {
523 const auto attribute = abuf->GetIndex();
524 const auto element = abuf->GetElement();
525
526 switch (attribute) {
527 case Attribute::Index::Position:
528 if (stage != ShaderStage::Fragment) {
529 UNIMPLEMENTED();
530 break;
531 } else {
532 if (element == 3) {
533 return Constant(t_float, 1.0f);
534 }
535 return Emit(OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)));
536 }
537 case Attribute::Index::TessCoordInstanceIDVertexID:
538 // TODO(Subv): Find out what the values are for the first two elements when inside a
539 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
540 // shader.
541 ASSERT(stage == ShaderStage::Vertex);
542 switch (element) {
543 case 2:
544 return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index)));
545 case 3:
546 return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, vertex_index)));
547 }
548 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
549 return Constant(t_float, 0);
550 case Attribute::Index::FrontFacing:
551 // TODO(Subv): Find out what the values are for the other elements.
552 ASSERT(stage == ShaderStage::Fragment);
553 if (element == 3) {
554 const Id is_front_facing = Emit(OpLoad(t_bool, front_facing));
555 const Id true_value =
556 BitcastTo<Type::Float>(Constant(t_int, static_cast<s32>(-1)));
557 const Id false_value = BitcastTo<Type::Float>(Constant(t_int, 0));
558 return Emit(OpSelect(t_float, is_front_facing, true_value, false_value));
559 }
560 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
561 return Constant(t_float, 0.0f);
562 default:
563 if (IsGenericAttribute(attribute)) {
564 const Id pointer =
565 AccessElement(t_in_float, input_attributes.at(attribute), element);
566 return Emit(OpLoad(t_float, pointer));
567 }
568 break;
569 }
570 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
571
572 } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
573 const Node offset = cbuf->GetOffset();
574 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
575
576 Id buffer_index{};
577 Id buffer_element{};
578
579 if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
580 // Direct access
581 const u32 offset_imm = immediate->GetValue();
582 ASSERT(offset_imm % 4 == 0);
583 buffer_index = Constant(t_uint, offset_imm / 16);
584 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
585
586 } else if (std::holds_alternative<OperationNode>(*offset)) {
587 // Indirect access
588 // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
589 // emits sub-optimal code on GLSL from my testing).
590 const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
591 const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
592 const Id final_offset = Emit(
593 OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
594 buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
595 buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
596
597 } else {
598 UNREACHABLE_MSG("Unmanaged offset node type");
599 }
600
601 const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
602 buffer_index, buffer_element));
603 return Emit(OpLoad(t_float, pointer));
604
605 } else if (const auto gmem = std::get_if<GmemNode>(node)) {
606 const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
607 const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress()));
608 const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress()));
609
610 Id offset = Emit(OpISub(t_uint, real, base));
611 offset = Emit(OpUDiv(t_uint, offset, Constant(t_uint, 4u)));
612 return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer,
613 Constant(t_uint, 0u), offset))));
614
615 } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
616 // It's invalid to call conditional on nested nodes, use an operation instead
617 const Id true_label = OpLabel();
618 const Id skip_label = OpLabel();
619 Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
620 Emit(true_label);
621
622 VisitBasicBlock(conditional->GetCode());
623
624 Emit(OpBranch(skip_label));
625 Emit(skip_label);
626 return {};
627
628 } else if (const auto comment = std::get_if<CommentNode>(node)) {
629 Name(Emit(OpUndef(t_void)), comment->GetText());
630 return {};
631 }
632
633 UNREACHABLE();
634 return {};
635 }
636
637 template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
638 Id Unary(Operation operation) {
639 const Id type_def = GetTypeDefinition(result_type);
640 const Id op_a = VisitOperand<type_a>(operation, 0);
641
642 const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a)));
643 if (IsPrecise(operation)) {
644 Decorate(value, spv::Decoration::NoContraction);
645 }
646 return value;
647 }
648
649 template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
650 Type type_b = type_a>
651 Id Binary(Operation operation) {
652 const Id type_def = GetTypeDefinition(result_type);
653 const Id op_a = VisitOperand<type_a>(operation, 0);
654 const Id op_b = VisitOperand<type_b>(operation, 1);
655
656 const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b)));
657 if (IsPrecise(operation)) {
658 Decorate(value, spv::Decoration::NoContraction);
659 }
660 return value;
661 }
662
663 template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
664 Type type_b = type_a, Type type_c = type_b>
665 Id Ternary(Operation operation) {
666 const Id type_def = GetTypeDefinition(result_type);
667 const Id op_a = VisitOperand<type_a>(operation, 0);
668 const Id op_b = VisitOperand<type_b>(operation, 1);
669 const Id op_c = VisitOperand<type_c>(operation, 2);
670
671 const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c)));
672 if (IsPrecise(operation)) {
673 Decorate(value, spv::Decoration::NoContraction);
674 }
675 return value;
676 }
677
678 template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
679 Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
680 Id Quaternary(Operation operation) {
681 const Id type_def = GetTypeDefinition(result_type);
682 const Id op_a = VisitOperand<type_a>(operation, 0);
683 const Id op_b = VisitOperand<type_b>(operation, 1);
684 const Id op_c = VisitOperand<type_c>(operation, 2);
685 const Id op_d = VisitOperand<type_d>(operation, 3);
686
687 const Id value =
688 BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c, op_d)));
689 if (IsPrecise(operation)) {
690 Decorate(value, spv::Decoration::NoContraction);
691 }
692 return value;
693 }
694
695 Id Assign(Operation operation) {
696 const Node dest = operation[0];
697 const Node src = operation[1];
698
699 Id target{};
700 if (const auto gpr = std::get_if<GprNode>(dest)) {
701 if (gpr->GetIndex() == Register::ZeroIndex) {
702 // Writing to Register::ZeroIndex is a no op
703 return {};
704 }
705 target = registers.at(gpr->GetIndex());
706
707 } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
708 target = [&]() -> Id {
709 switch (const auto attribute = abuf->GetIndex(); attribute) {
710 case Attribute::Index::Position:
711 return AccessElement(t_out_float, per_vertex, position_index,
712 abuf->GetElement());
713 case Attribute::Index::PointSize:
714 return AccessElement(t_out_float, per_vertex, point_size_index);
715 case Attribute::Index::ClipDistances0123:
716 return AccessElement(t_out_float, per_vertex, clip_distances_index,
717 abuf->GetElement());
718 case Attribute::Index::ClipDistances4567:
719 return AccessElement(t_out_float, per_vertex, clip_distances_index,
720 abuf->GetElement() + 4);
721 default:
722 if (IsGenericAttribute(attribute)) {
723 return AccessElement(t_out_float, output_attributes.at(attribute),
724 abuf->GetElement());
725 }
726 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
727 static_cast<u32>(attribute));
728 return {};
729 }
730 }();
731
732 } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
733 Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress()));
734 address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4)));
735 target = Emit(OpAccessChain(t_prv_float, local_memory, {address}));
736 }
737
738 Emit(OpStore(target, Visit(src)));
739 return {};
740 }
741
742 Id HNegate(Operation operation) {
743 UNIMPLEMENTED();
744 return {};
745 }
746
747 Id HMergeF32(Operation operation) {
748 UNIMPLEMENTED();
749 return {};
750 }
751
752 Id HMergeH0(Operation operation) {
753 UNIMPLEMENTED();
754 return {};
755 }
756
757 Id HMergeH1(Operation operation) {
758 UNIMPLEMENTED();
759 return {};
760 }
761
762 Id HPack2(Operation operation) {
763 UNIMPLEMENTED();
764 return {};
765 }
766
767 Id LogicalAssign(Operation operation) {
768 const Node dest = operation[0];
769 const Node src = operation[1];
770
771 Id target{};
772 if (const auto pred = std::get_if<PredicateNode>(dest)) {
773 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
774
775 const auto index = pred->GetIndex();
776 switch (index) {
777 case Tegra::Shader::Pred::NeverExecute:
778 case Tegra::Shader::Pred::UnusedIndex:
779 // Writing to these predicates is a no-op
780 return {};
781 }
782 target = predicates.at(index);
783
784 } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
785 target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
786 }
787
788 Emit(OpStore(target, Visit(src)));
789 return {};
790 }
791
792 Id LogicalPick2(Operation operation) {
793 UNIMPLEMENTED();
794 return {};
795 }
796
797 Id LogicalAll2(Operation operation) {
798 UNIMPLEMENTED();
799 return {};
800 }
801
802 Id LogicalAny2(Operation operation) {
803 UNIMPLEMENTED();
804 return {};
805 }
806
807 Id GetTextureSampler(Operation operation) {
808 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
809 const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
810 return Emit(OpLoad(entry.sampled_image_type, entry.sampler));
811 }
812
813 Id GetTextureImage(Operation operation) {
814 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
815 const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
816 return Emit(OpImage(entry.image_type, GetTextureSampler(operation)));
817 }
818
819 Id GetTextureCoordinates(Operation operation) {
820 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
821 std::vector<Id> coords;
822 for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
823 coords.push_back(Visit(operation[i]));
824 }
825 if (meta->sampler.IsArray()) {
826 const Id array_integer = BitcastTo<Type::Int>(Visit(meta->array));
827 coords.push_back(Emit(OpConvertSToF(t_float, array_integer)));
828 }
829 if (meta->sampler.IsShadow()) {
830 coords.push_back(Visit(meta->depth_compare));
831 }
832
833 const std::array<Id, 4> t_float_lut = {nullptr, t_float2, t_float3, t_float4};
834 return coords.size() == 1
835 ? coords[0]
836 : Emit(OpCompositeConstruct(t_float_lut.at(coords.size() - 1), coords));
837 }
838
839 Id GetTextureElement(Operation operation, Id sample_value) {
840 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
841 ASSERT(meta);
842 return Emit(OpCompositeExtract(t_float, sample_value, meta->element));
843 }
844
845 Id Texture(Operation operation) {
846 const Id texture = Emit(OpImageSampleImplicitLod(t_float4, GetTextureSampler(operation),
847 GetTextureCoordinates(operation)));
848 return GetTextureElement(operation, texture);
849 }
850
851 Id TextureLod(Operation operation) {
852 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
853 const Id texture = Emit(OpImageSampleExplicitLod(
854 t_float4, GetTextureSampler(operation), GetTextureCoordinates(operation),
855 spv::ImageOperandsMask::Lod, Visit(meta->lod)));
856 return GetTextureElement(operation, texture);
857 }
858
859 Id TextureGather(Operation operation) {
860 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
861 const auto coords = GetTextureCoordinates(operation);
862
863 Id texture;
864 if (meta->sampler.IsShadow()) {
865 texture = Emit(OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
866 Visit(meta->component)));
867 } else {
868 u32 component_value = 0;
869 if (meta->component) {
870 const auto component = std::get_if<ImmediateNode>(meta->component);
871 ASSERT_MSG(component, "Component is not an immediate value");
872 component_value = component->GetValue();
873 }
874 texture = Emit(OpImageGather(t_float4, GetTextureSampler(operation), coords,
875 Constant(t_uint, component_value)));
876 }
877
878 return GetTextureElement(operation, texture);
879 }
880
881 Id TextureQueryDimensions(Operation operation) {
882 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
883 const auto image_id = GetTextureImage(operation);
884 AddCapability(spv::Capability::ImageQuery);
885
886 if (meta->element == 3) {
887 return BitcastTo<Type::Float>(Emit(OpImageQueryLevels(t_int, image_id)));
888 }
889
890 const Id lod = VisitOperand<Type::Uint>(operation, 0);
891 const std::size_t coords_count = [&]() {
892 switch (const auto type = meta->sampler.GetType(); type) {
893 case Tegra::Shader::TextureType::Texture1D:
894 return 1;
895 case Tegra::Shader::TextureType::Texture2D:
896 case Tegra::Shader::TextureType::TextureCube:
897 return 2;
898 case Tegra::Shader::TextureType::Texture3D:
899 return 3;
900 default:
901 UNREACHABLE_MSG("Invalid texture type={}", static_cast<u32>(type));
902 return 2;
903 }
904 }();
905
906 if (meta->element >= coords_count) {
907 return Constant(t_float, 0.0f);
908 }
909
910 const std::array<Id, 3> types = {t_int, t_int2, t_int3};
911 const Id sizes = Emit(OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod));
912 const Id size = Emit(OpCompositeExtract(t_int, sizes, meta->element));
913 return BitcastTo<Type::Float>(size);
914 }
915
916 Id TextureQueryLod(Operation operation) {
917 UNIMPLEMENTED();
918 return {};
919 }
920
921 Id TexelFetch(Operation operation) {
922 UNIMPLEMENTED();
923 return {};
924 }
925
926 Id Branch(Operation operation) {
927 const auto target = std::get_if<ImmediateNode>(operation[0]);
928 UNIMPLEMENTED_IF(!target);
929
930 Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
931 BranchingOp([&]() { Emit(OpBranch(continue_label)); });
932 return {};
933 }
934
935 Id PushFlowStack(Operation operation) {
936 const auto target = std::get_if<ImmediateNode>(operation[0]);
937 ASSERT(target);
938
939 const Id current = Emit(OpLoad(t_uint, flow_stack_top));
940 const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));
941 const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current));
942
943 Emit(OpStore(access, Constant(t_uint, target->GetValue())));
944 Emit(OpStore(flow_stack_top, next));
945 return {};
946 }
947
948 Id PopFlowStack(Operation operation) {
949 const Id current = Emit(OpLoad(t_uint, flow_stack_top));
950 const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));
951 const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous));
952 const Id target = Emit(OpLoad(t_uint, access));
953
954 Emit(OpStore(flow_stack_top, previous));
955 Emit(OpStore(jmp_to, target));
956 BranchingOp([&]() { Emit(OpBranch(continue_label)); });
957 return {};
958 }
959
960 Id Exit(Operation operation) {
961 switch (stage) {
962 case ShaderStage::Vertex: {
963 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
964 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
965 const Id position = AccessElement(t_float4, per_vertex, position_index);
966 Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
967 depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
968 depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
969 Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
970 break;
971 }
972 case ShaderStage::Fragment: {
973 const auto SafeGetRegister = [&](u32 reg) {
974 // TODO(Rodrigo): Replace with contains once C++20 releases
975 if (const auto it = registers.find(reg); it != registers.end()) {
976 return Emit(OpLoad(t_float, it->second));
977 }
978 return Constant(t_float, 0.0f);
979 };
980
981 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
982 "Sample mask write is unimplemented");
983
984 // TODO(Rodrigo): Alpha testing
985
986 // Write the color outputs using the data in the shader registers, disabled
987 // rendertargets/components are skipped in the register assignment.
988 u32 current_reg = 0;
989 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
990 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
991 for (u32 component = 0; component < 4; ++component) {
992 if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
993 Emit(OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
994 SafeGetRegister(current_reg)));
995 ++current_reg;
996 }
997 }
998 }
999 if (header.ps.omap.depth) {
1000 // The depth output is always 2 registers after the last color output, and
1001 // current_reg already contains one past the last color register.
1002 Emit(OpStore(frag_depth, SafeGetRegister(current_reg + 1)));
1003 }
1004 break;
1005 }
1006 }
1007
1008 BranchingOp([&]() { Emit(OpReturn()); });
1009 return {};
1010 }
1011
1012 Id Discard(Operation operation) {
1013 BranchingOp([&]() { Emit(OpKill()); });
1014 return {};
1015 }
1016
1017 Id EmitVertex(Operation operation) {
1018 UNIMPLEMENTED();
1019 return {};
1020 }
1021
1022 Id EndPrimitive(Operation operation) {
1023 UNIMPLEMENTED();
1024 return {};
1025 }
1026
1027 Id YNegate(Operation operation) {
1028 UNIMPLEMENTED();
1029 return {};
1030 }
1031
1032 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1033 const std::string& name) {
1034 const Id id = OpVariable(type, storage);
1035 Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
1036 AddGlobalVariable(Name(id, name));
1037 interfaces.push_back(id);
1038 return id;
1039 }
1040
1041 bool IsRenderTargetUsed(u32 rt) const {
1042 for (u32 component = 0; component < 4; ++component) {
1043 if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
1044 return true;
1045 }
1046 }
1047 return false;
1048 }
1049
1050 template <typename... Args>
1051 Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
1052 std::vector<Id> members;
1053 auto elements = {elements_...};
1054 for (const auto element : elements) {
1055 members.push_back(Constant(t_uint, element));
1056 }
1057
1058 return Emit(OpAccessChain(pointer_type, composite, members));
1059 }
1060
1061 template <Type type>
1062 Id VisitOperand(Operation operation, std::size_t operand_index) {
1063 const Id value = Visit(operation[operand_index]);
1064
1065 switch (type) {
1066 case Type::Bool:
1067 case Type::Bool2:
1068 case Type::Float:
1069 return value;
1070 case Type::Int:
1071 return Emit(OpBitcast(t_int, value));
1072 case Type::Uint:
1073 return Emit(OpBitcast(t_uint, value));
1074 case Type::HalfFloat:
1075 UNIMPLEMENTED();
1076 }
1077 UNREACHABLE();
1078 return value;
1079 }
1080
1081 template <Type type>
1082 Id BitcastFrom(Id value) {
1083 switch (type) {
1084 case Type::Bool:
1085 case Type::Bool2:
1086 case Type::Float:
1087 return value;
1088 case Type::Int:
1089 case Type::Uint:
1090 return Emit(OpBitcast(t_float, value));
1091 case Type::HalfFloat:
1092 UNIMPLEMENTED();
1093 }
1094 UNREACHABLE();
1095 return value;
1096 }
1097
1098 template <Type type>
1099 Id BitcastTo(Id value) {
1100 switch (type) {
1101 case Type::Bool:
1102 case Type::Bool2:
1103 UNREACHABLE();
1104 case Type::Float:
1105 return Emit(OpBitcast(t_float, value));
1106 case Type::Int:
1107 return Emit(OpBitcast(t_int, value));
1108 case Type::Uint:
1109 return Emit(OpBitcast(t_uint, value));
1110 case Type::HalfFloat:
1111 UNIMPLEMENTED();
1112 }
1113 UNREACHABLE();
1114 return value;
1115 }
1116
1117 Id GetTypeDefinition(Type type) {
1118 switch (type) {
1119 case Type::Bool:
1120 return t_bool;
1121 case Type::Bool2:
1122 return t_bool2;
1123 case Type::Float:
1124 return t_float;
1125 case Type::Int:
1126 return t_int;
1127 case Type::Uint:
1128 return t_uint;
1129 case Type::HalfFloat:
1130 UNIMPLEMENTED();
1131 }
1132 UNREACHABLE();
1133 return {};
1134 }
1135
1136 void BranchingOp(std::function<void()> call) {
1137 const Id true_label = OpLabel();
1138 const Id skip_label = OpLabel();
1139 Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten));
1140 Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0));
1141 Emit(true_label);
1142 call();
1143
1144 Emit(skip_label);
1145 }
1146
1147 static constexpr OperationDecompilersArray operation_decompilers = {
1148 &SPIRVDecompiler::Assign,
1149
1150 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
1151 Type::Float>,
1152
1153 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
1154 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
1155 &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
1156 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
1157 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
1158 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
1159 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
1160 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
1161 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
1162 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
1163 &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
1164 &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
1165 &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
1166 &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
1167 &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
1168 &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
1169 &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
1170 &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
1171 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
1172 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
1173 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
1174
1175 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
1176 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
1177 &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
1178 &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
1179 &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
1180 &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
1181 &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
1182
1183 &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
1184 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
1185 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
1186 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
1187 &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
1188 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
1189 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
1190 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
1191 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
1192 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
1193 &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
1194 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
1195
1196 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
1197 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
1198 &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
1199 &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
1200 &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
1201 &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
1202 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
1203 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
1204 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
1205 &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Uint>,
1206 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
1207 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
1208 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
1209 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
1210 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
1211 &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
1212 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
1213
1214 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
1215 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
1216 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
1217 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
1218 &SPIRVDecompiler::HNegate,
1219 &SPIRVDecompiler::HMergeF32,
1220 &SPIRVDecompiler::HMergeH0,
1221 &SPIRVDecompiler::HMergeH1,
1222 &SPIRVDecompiler::HPack2,
1223
1224 &SPIRVDecompiler::LogicalAssign,
1225 &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
1226 &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
1227 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1228 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1229 &SPIRVDecompiler::LogicalPick2,
1230 &SPIRVDecompiler::LogicalAll2,
1231 &SPIRVDecompiler::LogicalAny2,
1232
1233 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1234 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
1235 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
1236 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
1237 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
1238 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
1239 &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool>,
1240
1241 &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
1242 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
1243 &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
1244 &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
1245 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
1246 &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
1247
1248 &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
1249 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
1250 &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
1251 &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
1252 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
1253 &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
1254
1255 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>,
1256 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>,
1257 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>,
1258 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
1259 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
1260 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
1261
1262 &SPIRVDecompiler::Texture,
1263 &SPIRVDecompiler::TextureLod,
1264 &SPIRVDecompiler::TextureGather,
1265 &SPIRVDecompiler::TextureQueryDimensions,
1266 &SPIRVDecompiler::TextureQueryLod,
1267 &SPIRVDecompiler::TexelFetch,
1268
1269 &SPIRVDecompiler::Branch,
1270 &SPIRVDecompiler::PushFlowStack,
1271 &SPIRVDecompiler::PopFlowStack,
1272 &SPIRVDecompiler::Exit,
1273 &SPIRVDecompiler::Discard,
1274
1275 &SPIRVDecompiler::EmitVertex,
1276 &SPIRVDecompiler::EndPrimitive,
1277
1278 &SPIRVDecompiler::YNegate,
1279 };
1280
1281 const ShaderIR& ir;
1282 const ShaderStage stage;
1283 const Tegra::Shader::Header header;
1284
1285 const Id t_void = Name(TypeVoid(), "void");
1286
1287 const Id t_bool = Name(TypeBool(), "bool");
1288 const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
1289
1290 const Id t_int = Name(TypeInt(32, true), "int");
1291 const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
1292 const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
1293 const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
1294
1295 const Id t_uint = Name(TypeInt(32, false), "uint");
1296 const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
1297 const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
1298 const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
1299
1300 const Id t_float = Name(TypeFloat(32), "float");
1301 const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
1302 const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
1303 const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
1304
1305 const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
1306 const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
1307
1308 const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
1309
1310 const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
1311 const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
1312 const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
1313 const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
1314
1315 const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
1316 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
1317
1318 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
1319 const Id t_cbuf_array =
1320 Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
1321 spv::Decoration::ArrayStride, CBUF_STRIDE);
1322 const Id t_cbuf_struct = MemberDecorate(
1323 Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
1324 const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
1325
1326 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
1327 const Id t_gmem_array =
1328 Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4u), "GmemArray");
1329 const Id t_gmem_struct = MemberDecorate(
1330 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
1331 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
1332
1333 const Id v_float_zero = Constant(t_float, 0.0f);
1334 const Id v_true = ConstantTrue(t_bool);
1335 const Id v_false = ConstantFalse(t_bool);
1336
1337 Id per_vertex{};
1338 std::map<u32, Id> registers;
1339 std::map<Tegra::Shader::Pred, Id> predicates;
1340 Id local_memory{};
1341 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
1342 std::map<Attribute::Index, Id> input_attributes;
1343 std::map<Attribute::Index, Id> output_attributes;
1344 std::map<u32, Id> constant_buffers;
1345 std::map<GlobalMemoryBase, Id> global_buffers;
1346 std::map<u32, SamplerImage> sampler_images;
1347
1348 Id instance_index{};
1349 Id vertex_index{};
1350 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
1351 Id frag_depth{};
1352 Id frag_coord{};
1353 Id front_facing{};
1354
1355 u32 position_index{};
1356 u32 point_size_index{};
1357 u32 clip_distances_index{};
1358
1359 std::vector<Id> interfaces;
1360
1361 u32 const_buffers_base_binding{};
1362 u32 global_buffers_base_binding{};
1363 u32 samplers_base_binding{};
1364
1365 Id execute_function{};
1366 Id jmp_to{};
1367 Id flow_stack_top{};
1368 Id flow_stack{};
1369 Id continue_label{};
1370 std::map<u32, Id> labels;
1371};
1372
1373DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
1374 auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
1375 decompiler->Decompile();
1376 return {std::move(decompiler), decompiler->GetShaderEntries()};
1377}
1378
1379} // namespace Vulkan::VKShader
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
new file mode 100644
index 000000000..329d8fa38
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -0,0 +1,80 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <set>
10#include <utility>
11#include <vector>
12
13#include <sirit/sirit.h>
14
15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/shader/shader_ir.h"
18
19namespace VideoCommon::Shader {
20class ShaderIR;
21}
22
23namespace Vulkan::VKShader {
24
25using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26
27using SamplerEntry = VideoCommon::Shader::Sampler;
28
29constexpr u32 DESCRIPTOR_SET = 0;
30
31class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
32public:
33 explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index)
34 : VideoCommon::Shader::ConstBuffer{entry}, index{index} {}
35
36 constexpr u32 GetIndex() const {
37 return index;
38 }
39
40private:
41 u32 index{};
42};
43
44class GlobalBufferEntry {
45public:
46 explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
47 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
48
49 u32 GetCbufIndex() const {
50 return cbuf_index;
51 }
52
53 u32 GetCbufOffset() const {
54 return cbuf_offset;
55 }
56
57private:
58 u32 cbuf_index{};
59 u32 cbuf_offset{};
60};
61
62struct ShaderEntries {
63 u32 const_buffers_base_binding{};
64 u32 global_buffers_base_binding{};
65 u32 samplers_base_binding{};
66 std::vector<ConstBufferEntry> const_buffers;
67 std::vector<GlobalBufferEntry> global_buffers;
68 std::vector<SamplerEntry> samplers;
69 std::set<u32> attributes;
70 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
71 std::size_t shader_length{};
72 Sirit::Id entry_function{};
73 std::vector<Sirit::Id> interfaces;
74};
75
76using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
77
78DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
79
80} // namespace Vulkan::VKShader
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
27 pipeline_stage} {
28 CreateBuffers(memory_manager, usage);
29 ReserveWatches(WATCHES_INITIAL_RESERVE);
30}
31
32VKStreamBuffer::~VKStreamBuffer() = default;
33
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
35 ASSERT(size <= buffer_size);
36 mapped_size = size;
37
38 if (offset + size > buffer_size) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and
40 // reset the state.
41 invalidation_mark = used_watches;
42 used_watches = 0;
43 offset = 0;
44 }
45
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47}
48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51
52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59
60 if (used_watches + 1 >= watches.size()) {
61 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 }
64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence());
66
67 offset += size;
68
69 return exctx;
70}
71
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
73 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
74 nullptr);
75
76 const auto dev = device.GetLogical();
77 const auto& dld = device.GetDispatchLoader();
78 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
79 commit = memory_manager.Commit(*buffer, true);
80 mapped_pointer = commit->GetData();
81}
82
83void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
84 const std::size_t previous_size = watches.size();
85 watches.resize(previous_size + grow_size);
86 std::generate(watches.begin() + previous_size, watches.end(),
87 []() { return std::make_unique<VKFenceWatch>(); });
88}
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <tuple>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler;
23
24class VKStreamBuffer {
25public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer();
30
31 /**
32 * Reserves a region of memory from the stream buffer.
33 * @param size Size to reserve.
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated.
36 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
41
42 vk::Buffer GetBuffer() const {
43 return *buffer;
44 }
45
46private:
47 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
49
50 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size);
52
53 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58
59 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62
63 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy.
65
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t>
69 invalidation_mark{}; ///< Number of watches used in the current invalidation.
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
new file mode 100644
index 000000000..08279e562
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -0,0 +1,210 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <limits>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/logging/log.h"
12#include "core/core.h"
13#include "core/frontend/framebuffer_layout.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_swapchain.h"
18
19namespace Vulkan {
20
21namespace {
22vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
23 if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
24 return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
25 }
26 const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
27 return format.format == vk::Format::eB8G8R8A8Unorm &&
28 format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
29 });
30 return found != formats.end() ? *found : formats[0];
31}
32
33vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) {
34 // Mailbox doesn't lock the application like fifo (vsync), prefer it
35 const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) {
36 return mode == vk::PresentModeKHR::eMailbox;
37 });
38 return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
39}
40
41vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
42 u32 height) {
43 constexpr auto undefined_size{std::numeric_limits<u32>::max()};
44 if (capabilities.currentExtent.width != undefined_size) {
45 return capabilities.currentExtent;
46 }
47 vk::Extent2D extent = {width, height};
48 extent.width = std::max(capabilities.minImageExtent.width,
49 std::min(capabilities.maxImageExtent.width, extent.width));
50 extent.height = std::max(capabilities.minImageExtent.height,
51 std::min(capabilities.maxImageExtent.height, extent.height));
52 return extent;
53}
54} // namespace
55
56VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
57 : surface{surface}, device{device} {}
58
59VKSwapchain::~VKSwapchain() = default;
60
61void VKSwapchain::Create(u32 width, u32 height) {
62 const auto dev = device.GetLogical();
63 const auto& dld = device.GetDispatchLoader();
64 const auto physical_device = device.GetPhysical();
65
66 const vk::SurfaceCapabilitiesKHR capabilities{
67 physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
68 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
69 return;
70 }
71
72 dev.waitIdle(dld);
73 Destroy();
74
75 CreateSwapchain(capabilities, width, height);
76 CreateSemaphores();
77 CreateImageViews();
78
79 fences.resize(image_count, nullptr);
80}
81
82void VKSwapchain::AcquireNextImage() {
83 const auto dev{device.GetLogical()};
84 const auto& dld{device.GetDispatchLoader()};
85 dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
86 *present_semaphores[frame_index], {}, &image_index, dld);
87
88 if (auto& fence = fences[image_index]; fence) {
89 fence->Wait();
90 fence->Release();
91 fence = nullptr;
92 }
93}
94
95bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
96 const vk::Semaphore present_semaphore{*present_semaphores[frame_index]};
97 const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore};
98 const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
99 const auto& dld{device.GetDispatchLoader()};
100 const auto present_queue{device.GetPresentQueue()};
101 bool recreated = false;
102
103 const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1,
104 &swapchain.get(), &image_index, {});
105 switch (const auto result = present_queue.presentKHR(&present_info, dld); result) {
106 case vk::Result::eSuccess:
107 break;
108 case vk::Result::eErrorOutOfDateKHR:
109 if (current_width > 0 && current_height > 0) {
110 Create(current_width, current_height);
111 recreated = true;
112 }
113 break;
114 default:
115 LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!",
116 vk::to_string(result));
117 UNREACHABLE();
118 }
119
120 ASSERT(fences[image_index] == nullptr);
121 fences[image_index] = &fence;
122 frame_index = (frame_index + 1) % image_count;
123 return recreated;
124}
125
126bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
127 // TODO(Rodrigo): Handle framebuffer pixel format changes
128 return framebuffer.width != current_width || framebuffer.height != current_height;
129}
130
131void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
132 u32 height) {
133 const auto dev{device.GetLogical()};
134 const auto& dld{device.GetDispatchLoader()};
135 const auto physical_device{device.GetPhysical()};
136
137 const std::vector<vk::SurfaceFormatKHR> formats{
138 physical_device.getSurfaceFormatsKHR(surface, dld)};
139
140 const std::vector<vk::PresentModeKHR> present_modes{
141 physical_device.getSurfacePresentModesKHR(surface, dld)};
142
143 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
144 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
145 extent = ChooseSwapExtent(capabilities, width, height);
146
147 current_width = extent.width;
148 current_height = extent.height;
149
150 u32 requested_image_count{capabilities.minImageCount + 1};
151 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
152 requested_image_count = capabilities.maxImageCount;
153 }
154
155 vk::SwapchainCreateInfoKHR swapchain_ci(
156 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
157 extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
158 capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
159 {});
160
161 const u32 graphics_family{device.GetGraphicsFamily()};
162 const u32 present_family{device.GetPresentFamily()};
163 const std::array<u32, 2> queue_indices{graphics_family, present_family};
164 if (graphics_family != present_family) {
165 swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent;
166 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
167 swapchain_ci.pQueueFamilyIndices = queue_indices.data();
168 } else {
169 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
170 }
171
172 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
173
174 images = dev.getSwapchainImagesKHR(*swapchain, dld);
175 image_count = static_cast<u32>(images.size());
176 image_format = surface_format.format;
177}
178
179void VKSwapchain::CreateSemaphores() {
180 const auto dev{device.GetLogical()};
181 const auto& dld{device.GetDispatchLoader()};
182
183 present_semaphores.resize(image_count);
184 for (std::size_t i = 0; i < image_count; i++) {
185 present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
186 }
187}
188
189void VKSwapchain::CreateImageViews() {
190 const auto dev{device.GetLogical()};
191 const auto& dld{device.GetDispatchLoader()};
192
193 image_views.resize(image_count);
194 for (std::size_t i = 0; i < image_count; i++) {
195 const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D,
196 image_format, {},
197 {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
198 image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
199 }
200}
201
202void VKSwapchain::Destroy() {
203 frame_index = 0;
204 present_semaphores.clear();
205 framebuffers.clear();
206 image_views.clear();
207 swapchain.reset();
208}
209
210} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
new file mode 100644
index 000000000..2ad84f185
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Layout {
13struct FramebufferLayout;
14}
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20
21class VKSwapchain {
22public:
23 explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device);
24 ~VKSwapchain();
25
26 /// Creates (or recreates) the swapchain with a given size.
27 void Create(u32 width, u32 height);
28
29 /// Acquires the next image in the swapchain, waits as needed.
30 void AcquireNextImage();
31
32 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
33 /// recreated. Takes responsability for the ownership of fence.
34 bool Present(vk::Semaphore render_semaphore, VKFence& fence);
35
36 /// Returns true when the framebuffer layout has changed.
37 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
38
39 const vk::Extent2D& GetSize() const {
40 return extent;
41 }
42
43 u32 GetImageCount() const {
44 return image_count;
45 }
46
47 u32 GetImageIndex() const {
48 return image_index;
49 }
50
51 vk::Image GetImageIndex(u32 index) const {
52 return images[index];
53 }
54
55 vk::ImageView GetImageViewIndex(u32 index) const {
56 return *image_views[index];
57 }
58
59 vk::Format GetImageFormat() const {
60 return image_format;
61 }
62
63private:
64 void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
65 void CreateSemaphores();
66 void CreateImageViews();
67
68 void Destroy();
69
70 const vk::SurfaceKHR surface;
71 const VKDevice& device;
72
73 UniqueSwapchainKHR swapchain;
74
75 u32 image_count{};
76 std::vector<vk::Image> images;
77 std::vector<UniqueImageView> image_views;
78 std::vector<UniqueFramebuffer> framebuffers;
79 std::vector<VKFence*> fences;
80 std::vector<UniqueSemaphore> present_semaphores;
81
82 u32 image_index{};
83 u32 frame_index{};
84
85 vk::Format image_format{};
86 vk::Extent2D extent{};
87
88 u32 current_width{};
89 u32 current_height{};
90};
91
92} // namespace Vulkan
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
new file mode 100644
index 000000000..e4c438792
--- /dev/null
+++ b/src/video_core/shader/decode.cpp
@@ -0,0 +1,209 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <set>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/engines/shader_header.h"
14#include "video_core/shader/shader_ir.h"
15
16namespace VideoCommon::Shader {
17
18using Tegra::Shader::Instruction;
19using Tegra::Shader::OpCode;
20
21namespace {
22
23/// Merges exit method of two parallel branches.
24constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
25 if (a == ExitMethod::Undetermined) {
26 return b;
27 }
28 if (b == ExitMethod::Undetermined) {
29 return a;
30 }
31 if (a == b) {
32 return a;
33 }
34 return ExitMethod::Conditional;
35}
36
37/**
38 * Returns whether the instruction at the specified offset is a 'sched' instruction.
39 * Sched instructions always appear before a sequence of 3 instructions.
40 */
41constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
42 constexpr u32 SchedPeriod = 4;
43 u32 absolute_offset = offset - main_offset;
44
45 return (absolute_offset % SchedPeriod) == 0;
46}
47
48} // namespace
49
50void ShaderIR::Decode() {
51 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
52
53 std::set<u32> labels;
54 const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
55 if (exit_method != ExitMethod::AlwaysEnd) {
56 UNREACHABLE_MSG("Program does not always end");
57 }
58
59 if (labels.empty()) {
60 basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
61 return;
62 }
63
64 labels.insert(main_offset);
65
66 for (const u32 label : labels) {
67 const auto next_it = labels.lower_bound(label + 1);
68 const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
69
70 basic_blocks.insert({label, DecodeRange(label, next_label)});
71 }
72}
73
74ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
75 const auto [iter, inserted] =
76 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
77 ExitMethod& exit_method = iter->second;
78 if (!inserted)
79 return exit_method;
80
81 for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
82 coverage_begin = std::min(coverage_begin, offset);
83 coverage_end = std::max(coverage_end, offset + 1);
84
85 const Instruction instr = {program_code[offset]};
86 const auto opcode = OpCode::Decode(instr);
87 if (!opcode)
88 continue;
89 switch (opcode->get().GetId()) {
90 case OpCode::Id::EXIT: {
91 // The EXIT instruction can be predicated, which means that the shader can conditionally
92 // end on this instruction. We have to consider the case where the condition is not met
93 // and check the exit method of that other basic block.
94 using Tegra::Shader::Pred;
95 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
96 return exit_method = ExitMethod::AlwaysEnd;
97 } else {
98 const ExitMethod not_met = Scan(offset + 1, end, labels);
99 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
100 }
101 }
102 case OpCode::Id::BRA: {
103 const u32 target = offset + instr.bra.GetBranchTarget();
104 labels.insert(target);
105 const ExitMethod no_jmp = Scan(offset + 1, end, labels);
106 const ExitMethod jmp = Scan(target, end, labels);
107 return exit_method = ParallelExit(no_jmp, jmp);
108 }
109 case OpCode::Id::SSY:
110 case OpCode::Id::PBK: {
111 // The SSY and PBK use a similar encoding as the BRA instruction.
112 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
113 "Constant buffer branching is not supported");
114 const u32 target = offset + instr.bra.GetBranchTarget();
115 labels.insert(target);
116 // Continue scanning for an exit method.
117 break;
118 }
119 }
120 }
121 return exit_method = ExitMethod::AlwaysReturn;
122}
123
124NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
125 NodeBlock basic_block;
126 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
127 pc = DecodeInstr(basic_block, pc);
128 }
129 return basic_block;
130}
131
132u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
133 // Ignore sched instructions when generating code.
134 if (IsSchedInstruction(pc, main_offset)) {
135 return pc + 1;
136 }
137
138 const Instruction instr = {program_code[pc]};
139 const auto opcode = OpCode::Decode(instr);
140
141 // Decoding failure
142 if (!opcode) {
143 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
144 return pc + 1;
145 }
146
147 bb.push_back(
148 Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
149
150 using Tegra::Shader::Pred;
151 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
152 "NeverExecute predicate not implemented");
153
154 static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
155 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
156 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
157 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
158 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
159 {OpCode::Type::Shift, &ShaderIR::DecodeShift},
160 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
161 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
162 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
163 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
164 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
172 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
173 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
174 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
175 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
176 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
177 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
178 {OpCode::Type::Video, &ShaderIR::DecodeVideo},
179 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
180 };
181
182 std::vector<Node> tmp_block;
183 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
184 pc = (this->*decoder->second)(tmp_block, pc);
185 } else {
186 pc = DecodeOther(tmp_block, pc);
187 }
188
189 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
190 // executed.
191 const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
192 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
193
194 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
195 const Node conditional =
196 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
197 global_code.push_back(conditional);
198 bb.push_back(conditional);
199 } else {
200 for (auto& node : tmp_block) {
201 global_code.push_back(node);
202 bb.push_back(node);
203 }
204 }
205
206 return pc + 1;
207}
208
209} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
new file mode 100644
index 000000000..3190e2d7c
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -0,0 +1,155 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::SubOp;
15
16u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 Node op_a = GetRegister(instr.gpr8);
21
22 Node op_b = [&]() -> Node {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::MOV_C:
34 case OpCode::Id::MOV_R: {
35 // MOV does not have neither 'abs' nor 'neg' bits.
36 SetRegister(bb, instr.gpr0, op_b);
37 break;
38 }
39 case OpCode::Id::FMUL_C:
40 case OpCode::Id::FMUL_R:
41 case OpCode::Id::FMUL_IMM: {
42 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
43 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
44 instr.fmul.tab5cb8_2.Value());
45 UNIMPLEMENTED_IF_MSG(
46 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
47 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
48
49 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
50
51 // TODO(Rodrigo): Should precise be used when there's a postfactor?
52 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
53
54 if (instr.fmul.postfactor != 0) {
55 auto postfactor = static_cast<s32>(instr.fmul.postfactor);
56
57 // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
58 // logic.
59 if (postfactor >= 4) {
60 postfactor = 7 - postfactor;
61 } else {
62 postfactor = 0 - postfactor;
63 }
64
65 if (postfactor > 0) {
66 value = Operation(OperationCode::FMul, NO_PRECISE, value,
67 Immediate(static_cast<f32>(1 << postfactor)));
68 } else {
69 value = Operation(OperationCode::FDiv, NO_PRECISE, value,
70 Immediate(static_cast<f32>(1 << -postfactor)));
71 }
72 }
73
74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
77 SetRegister(bb, instr.gpr0, value);
78 break;
79 }
80 case OpCode::Id::FADD_C:
81 case OpCode::Id::FADD_R:
82 case OpCode::Id::FADD_IMM: {
83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
85
86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
90 SetRegister(bb, instr.gpr0, value);
91 break;
92 }
93 case OpCode::Id::MUFU: {
94 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
95
96 Node value = [&]() {
97 switch (instr.sub_op) {
98 case SubOp::Cos:
99 return Operation(OperationCode::FCos, PRECISE, op_a);
100 case SubOp::Sin:
101 return Operation(OperationCode::FSin, PRECISE, op_a);
102 case SubOp::Ex2:
103 return Operation(OperationCode::FExp2, PRECISE, op_a);
104 case SubOp::Lg2:
105 return Operation(OperationCode::FLog2, PRECISE, op_a);
106 case SubOp::Rcp:
107 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
108 case SubOp::Rsq:
109 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
114 static_cast<unsigned>(instr.sub_op.Value()));
115 return Immediate(0);
116 }
117 }();
118 value = GetSaturatedFloat(value, instr.alu.saturate_d);
119
120 SetRegister(bb, instr.gpr0, value);
121 break;
122 }
123 case OpCode::Id::FMNMX_C:
124 case OpCode::Id::FMNMX_R:
125 case OpCode::Id::FMNMX_IMM: {
126 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
127 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
128
129 const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
130
131 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
132 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
133 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
134
135 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
136 SetRegister(bb, instr.gpr0, value);
137 break;
138 }
139 case OpCode::Id::RRO_C:
140 case OpCode::Id::RRO_R:
141 case OpCode::Id::RRO_IMM: {
142 // Currently RRO is only implemented as a register move.
143 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
144 SetRegister(bb, instr.gpr0, op_b);
145 LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
146 break;
147 }
148 default:
149 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
150 }
151
152 return pc;
153}
154
155} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
new file mode 100644
index 000000000..baee89107
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -0,0 +1,70 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
20 opcode->get().GetId() == OpCode::Id::HADD2_R) {
21 UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
22 }
23 UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
24
25 const bool negate_a =
26 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
27 const bool negate_b =
28 opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
29
30 const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
31
32 // instr.alu_half.type_a
33
34 Node op_b = [&]() {
35 switch (opcode->get().GetId()) {
36 case OpCode::Id::HADD2_C:
37 case OpCode::Id::HMUL2_C:
38 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
39 case OpCode::Id::HADD2_R:
40 case OpCode::Id::HMUL2_R:
41 return GetRegister(instr.gpr20);
42 default:
43 UNREACHABLE();
44 return Immediate(0);
45 }
46 }();
47 op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
48
49 Node value = [&]() {
50 MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
51 switch (opcode->get().GetId()) {
52 case OpCode::Id::HADD2_C:
53 case OpCode::Id::HADD2_R:
54 return Operation(OperationCode::HAdd, meta, op_a, op_b);
55 case OpCode::Id::HMUL2_C:
56 case OpCode::Id::HMUL2_R:
57 return Operation(OperationCode::HMul, meta, op_a, op_b);
58 default:
59 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
60 return Immediate(0);
61 }
62 }();
63 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
64
65 SetRegister(bb, instr.gpr0, value);
66
67 return pc;
68}
69
70} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
new file mode 100644
index 000000000..c2164ba50
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -0,0 +1,51 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
20 UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
21 } else {
22 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
23 }
24 UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
25 "Half float immediate saturation not implemented");
26
27 Node op_a = GetRegister(instr.gpr8);
28 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
29
30 const Node op_b = UnpackHalfImmediate(instr, true);
31
32 Node value = [&]() {
33 MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::HADD2_IMM:
36 return Operation(OperationCode::HAdd, meta, op_a, op_b);
37 case OpCode::Id::HMUL2_IMM:
38 return Operation(OperationCode::HMul, meta, op_a, op_b);
39 default:
40 UNREACHABLE();
41 return Immediate(0);
42 }
43 }();
44 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
45
46 SetRegister(bb, instr.gpr0, value);
47
48 return pc;
49}
50
51} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
new file mode 100644
index 000000000..0d139c0d2
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -0,0 +1,52 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 switch (opcode->get().GetId()) {
20 case OpCode::Id::MOV32_IMM: {
21 SetRegister(bb, instr.gpr0, GetImmediate32(instr));
22 break;
23 }
24 case OpCode::Id::FMUL32_IMM: {
25 Node value =
26 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
27 value = GetSaturatedFloat(value, instr.fmul32.saturate);
28
29 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
30 SetRegister(bb, instr.gpr0, value);
31 break;
32 }
33 case OpCode::Id::FADD32I: {
34 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
35 instr.fadd32i.negate_a);
36 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
37 instr.fadd32i.negate_b);
38
39 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
40 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
41 SetRegister(bb, instr.gpr0, value);
42 break;
43 }
44 default:
45 UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
46 opcode->get().GetName());
47 }
48
49 return pc;
50}
51
52} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
new file mode 100644
index 000000000..9fd4b273e
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -0,0 +1,287 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::IAdd3Height;
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::Register;
17
18u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 Node op_a = GetRegister(instr.gpr8);
23 Node op_b = [&]() {
24 if (instr.is_b_imm) {
25 return Immediate(instr.alu.GetSignedImm20_20());
26 } else if (instr.is_b_gpr) {
27 return GetRegister(instr.gpr20);
28 } else {
29 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
30 }
31 }();
32
33 switch (opcode->get().GetId()) {
34 case OpCode::Id::IADD_C:
35 case OpCode::Id::IADD_R:
36 case OpCode::Id::IADD_IMM: {
37 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
38
39 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
40 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
41
42 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
43
44 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
45 SetRegister(bb, instr.gpr0, value);
46 break;
47 }
48 case OpCode::Id::IADD3_C:
49 case OpCode::Id::IADD3_R:
50 case OpCode::Id::IADD3_IMM: {
51 Node op_c = GetRegister(instr.gpr39);
52
53 const auto ApplyHeight = [&](IAdd3Height height, Node value) {
54 switch (height) {
55 case IAdd3Height::None:
56 return value;
57 case IAdd3Height::LowerHalfWord:
58 return BitfieldExtract(value, 0, 16);
59 case IAdd3Height::UpperHalfWord:
60 return BitfieldExtract(value, 16, 16);
61 default:
62 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
63 return Immediate(0);
64 }
65 };
66
67 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
68 op_a = ApplyHeight(instr.iadd3.height_a, op_a);
69 op_b = ApplyHeight(instr.iadd3.height_b, op_b);
70 op_c = ApplyHeight(instr.iadd3.height_c, op_c);
71 }
72
73 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
74 op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
75 op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
76
77 const Node value = [&]() {
78 const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
79 if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
80 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
81 }
82 const Node shifted = [&]() {
83 switch (instr.iadd3.mode) {
84 case Tegra::Shader::IAdd3Mode::RightShift:
85 // TODO(tech4me): According to
86 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
87 // The addition between op_a and op_b should be done in uint33, more
88 // investigation required
89 return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
90 Immediate(16));
91 case Tegra::Shader::IAdd3Mode::LeftShift:
92 return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
93 Immediate(16));
94 default:
95 return add_ab;
96 }
97 }();
98 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
99 }();
100
101 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
102 SetRegister(bb, instr.gpr0, value);
103 break;
104 }
105 case OpCode::Id::ISCADD_C:
106 case OpCode::Id::ISCADD_R:
107 case OpCode::Id::ISCADD_IMM: {
108 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
109 "Condition codes generation in ISCADD is not implemented");
110
111 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
112 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
113
114 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
115 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
116 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
117
118 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
119 SetRegister(bb, instr.gpr0, value);
120 break;
121 }
122 case OpCode::Id::POPC_C:
123 case OpCode::Id::POPC_R:
124 case OpCode::Id::POPC_IMM: {
125 if (instr.popc.invert) {
126 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
127 }
128 const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
129 SetRegister(bb, instr.gpr0, value);
130 break;
131 }
132 case OpCode::Id::SEL_C:
133 case OpCode::Id::SEL_R:
134 case OpCode::Id::SEL_IMM: {
135 const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
136 const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
137 SetRegister(bb, instr.gpr0, value);
138 break;
139 }
140 case OpCode::Id::LOP_C:
141 case OpCode::Id::LOP_R:
142 case OpCode::Id::LOP_IMM: {
143 if (instr.alu.lop.invert_a)
144 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
145 if (instr.alu.lop.invert_b)
146 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
147
148 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
149 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
150 instr.generates_cc);
151 break;
152 }
153 case OpCode::Id::LOP3_C:
154 case OpCode::Id::LOP3_R:
155 case OpCode::Id::LOP3_IMM: {
156 const Node op_c = GetRegister(instr.gpr39);
157 const Node lut = [&]() {
158 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
159 return Immediate(instr.alu.lop3.GetImmLut28());
160 } else {
161 return Immediate(instr.alu.lop3.GetImmLut48());
162 }
163 }();
164
165 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
166 break;
167 }
168 case OpCode::Id::IMNMX_C:
169 case OpCode::Id::IMNMX_R:
170 case OpCode::Id::IMNMX_IMM: {
171 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
172
173 const bool is_signed = instr.imnmx.is_signed;
174
175 const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
176 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
177 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
178 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
179
180 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
181 SetRegister(bb, instr.gpr0, value);
182 break;
183 }
184 case OpCode::Id::LEA_R2:
185 case OpCode::Id::LEA_R1:
186 case OpCode::Id::LEA_IMM:
187 case OpCode::Id::LEA_RZ:
188 case OpCode::Id::LEA_HI: {
189 const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
190 switch (opcode->get().GetId()) {
191 case OpCode::Id::LEA_R2: {
192 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
193 Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
194 }
195
196 case OpCode::Id::LEA_R1: {
197 const bool neg = instr.lea.r1.neg != 0;
198 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
199 GetRegister(instr.gpr20),
200 Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
201 }
202
203 case OpCode::Id::LEA_IMM: {
204 const bool neg = instr.lea.imm.neg != 0;
205 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
206 GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
207 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
208 }
209
210 case OpCode::Id::LEA_RZ: {
211 const bool neg = instr.lea.rz.neg != 0;
212 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
213 GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
214 Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
215 }
216
217 case OpCode::Id::LEA_HI:
218 default:
219 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
220
221 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
222 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
223 }
224 }();
225
226 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
227 "Unhandled LEA Predicate");
228
229 const Node shifted_c =
230 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
231 const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
232 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
233
234 SetRegister(bb, instr.gpr0, value);
235
236 break;
237 }
238 default:
239 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
240 }
241
242 return pc;
243}
244
245void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
246 Node imm_lut, bool sets_cc) {
247 constexpr u32 lop_iterations = 32;
248 const Node one = Immediate(1);
249 const Node two = Immediate(2);
250
251 Node value{};
252 for (u32 i = 0; i < lop_iterations; ++i) {
253 const Node shift_amount = Immediate(i);
254
255 const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
256 const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
257
258 const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
259 const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
260 const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
261
262 const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
263 const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
264 const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
265
266 const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
267 const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
268
269 const Node shifted_bit =
270 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
271 const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
272
273 const Node right =
274 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
275
276 if (i > 0) {
277 value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
278 } else {
279 value = right;
280 }
281 }
282
283 SetInternalFlagsFromInteger(bb, value, sets_cc);
284 SetRegister(bb, dest, value);
285}
286
287} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
new file mode 100644
index 000000000..3ed5ccc5a
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -0,0 +1,96 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::LogicOperation;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::PredicateResultMode;
17using Tegra::Shader::Register;
18
19u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr);
22
23 Node op_a = GetRegister(instr.gpr8);
24 Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
25
26 switch (opcode->get().GetId()) {
27 case OpCode::Id::IADD32I: {
28 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
29
30 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
31
32 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
33
34 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
35 SetRegister(bb, instr.gpr0, value);
36 break;
37 }
38 case OpCode::Id::LOP32I: {
39 if (instr.alu.lop32i.invert_a)
40 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
41
42 if (instr.alu.lop32i.invert_b)
43 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
44
45 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
46 PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
47 break;
48 }
49 default:
50 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
51 opcode->get().GetName());
52 }
53
54 return pc;
55}
56
57void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
58 Node op_b, PredicateResultMode predicate_mode, Pred predicate,
59 bool sets_cc) {
60 const Node result = [&]() {
61 switch (logic_op) {
62 case LogicOperation::And:
63 return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
64 case LogicOperation::Or:
65 return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
66 case LogicOperation::Xor:
67 return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
68 case LogicOperation::PassB:
69 return op_b;
70 default:
71 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
72 return Immediate(0);
73 }
74 }();
75
76 SetInternalFlagsFromInteger(bb, result, sets_cc);
77 SetRegister(bb, dest, result);
78
79 // Write the predicate value depending on the predicate mode.
80 switch (predicate_mode) {
81 case PredicateResultMode::None:
82 // Do nothing.
83 return;
84 case PredicateResultMode::NotZero: {
85 // Set the predicate to true if the result is not zero.
86 const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
87 SetPredicate(bb, static_cast<u64>(predicate), compare);
88 break;
89 }
90 default:
91 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
92 static_cast<u32>(predicate_mode));
93 }
94}
95
96} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
new file mode 100644
index 000000000..6a95dc928
--- /dev/null
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -0,0 +1,49 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 UNIMPLEMENTED_IF(instr.bfe.negate_b);
20
21 Node op_a = GetRegister(instr.gpr8);
22 op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
23
24 switch (opcode->get().GetId()) {
25 case OpCode::Id::BFE_IMM: {
26 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
27 "Condition codes generation in BFE is not implemented");
28
29 const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
30 const Node outer_shift_imm =
31 Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
32
33 const Node inner_shift =
34 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
35 const Node outer_shift =
36 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
37
38 SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
39 SetRegister(bb, instr.gpr0, outer_shift);
40 break;
41 }
42 default:
43 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
44 }
45
46 return pc;
47}
48
49} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
new file mode 100644
index 000000000..601d66f1f
--- /dev/null
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -0,0 +1,41 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::BFI_IMM_R:
22 return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
23 default:
24 UNREACHABLE();
25 return {Immediate(0), Immediate(0)};
26 }
27 }();
28 const Node insert = GetRegister(instr.gpr8);
29 const Node offset = BitfieldExtract(packed_shift, 0, 8);
30 const Node bits = BitfieldExtract(packed_shift, 8, 8);
31
32 const Node value =
33 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
34
35 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
36 SetRegister(bb, instr.gpr0, value);
37
38 return pc;
39}
40
41} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
new file mode 100644
index 000000000..55a6fbbf2
--- /dev/null
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -0,0 +1,149 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::Register;
15
16u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::I2I_R: {
22 UNIMPLEMENTED_IF(instr.conversion.selector);
23
24 const bool input_signed = instr.conversion.is_input_signed;
25 const bool output_signed = instr.conversion.is_output_signed;
26
27 Node value = GetRegister(instr.gpr20);
28 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
29
30 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
31 input_signed);
32 if (input_signed != output_signed) {
33 value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
34 }
35
36 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
37 SetRegister(bb, instr.gpr0, value);
38 break;
39 }
40 case OpCode::Id::I2F_R:
41 case OpCode::Id::I2F_C: {
42 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
43 UNIMPLEMENTED_IF(instr.conversion.selector);
44 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
45 "Condition codes generation in I2F is not implemented");
46
47 Node value = [&]() {
48 if (instr.is_b_gpr) {
49 return GetRegister(instr.gpr20);
50 } else {
51 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
52 }
53 }();
54 const bool input_signed = instr.conversion.is_input_signed;
55 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
56 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
57 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
58 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
59
60 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
61 SetRegister(bb, instr.gpr0, value);
62 break;
63 }
64 case OpCode::Id::F2F_R:
65 case OpCode::Id::F2F_C: {
66 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
67 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
68 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
69 "Condition codes generation in F2F is not implemented");
70
71 Node value = [&]() {
72 if (instr.is_b_gpr) {
73 return GetRegister(instr.gpr20);
74 } else {
75 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
76 }
77 }();
78
79 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
80
81 value = [&]() {
82 switch (instr.conversion.f2f.rounding) {
83 case Tegra::Shader::F2fRoundingOp::None:
84 return value;
85 case Tegra::Shader::F2fRoundingOp::Round:
86 return Operation(OperationCode::FRoundEven, PRECISE, value);
87 case Tegra::Shader::F2fRoundingOp::Floor:
88 return Operation(OperationCode::FFloor, PRECISE, value);
89 case Tegra::Shader::F2fRoundingOp::Ceil:
90 return Operation(OperationCode::FCeil, PRECISE, value);
91 case Tegra::Shader::F2fRoundingOp::Trunc:
92 return Operation(OperationCode::FTrunc, PRECISE, value);
93 }
94 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
95 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
96 return Immediate(0);
97 }();
98 value = GetSaturatedFloat(value, instr.alu.saturate_d);
99
100 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
101 SetRegister(bb, instr.gpr0, value);
102 break;
103 }
104 case OpCode::Id::F2I_R:
105 case OpCode::Id::F2I_C: {
106 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
107 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
108 "Condition codes generation in F2I is not implemented");
109 Node value = [&]() {
110 if (instr.is_b_gpr) {
111 return GetRegister(instr.gpr20);
112 } else {
113 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
114 }
115 }();
116
117 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
118
119 value = [&]() {
120 switch (instr.conversion.f2i.rounding) {
121 case Tegra::Shader::F2iRoundingOp::RoundEven:
122 return Operation(OperationCode::FRoundEven, PRECISE, value);
123 case Tegra::Shader::F2iRoundingOp::Floor:
124 return Operation(OperationCode::FFloor, PRECISE, value);
125 case Tegra::Shader::F2iRoundingOp::Ceil:
126 return Operation(OperationCode::FCeil, PRECISE, value);
127 case Tegra::Shader::F2iRoundingOp::Trunc:
128 return Operation(OperationCode::FTrunc, PRECISE, value);
129 default:
130 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
131 static_cast<u32>(instr.conversion.f2i.rounding.Value()));
132 return Immediate(0);
133 }
134 }();
135 const bool is_signed = instr.conversion.is_output_signed;
136 value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
137 value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
138
139 SetRegister(bb, instr.gpr0, value);
140 break;
141 }
142 default:
143 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
144 }
145
146 return pc;
147}
148
149} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/video_core/shader/decode/decode_integer_set.cpp
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
new file mode 100644
index 000000000..0559cc8de
--- /dev/null
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -0,0 +1,59 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
20 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
21 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
22 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
23 instr.ffma.tab5980_1.Value());
24
25 const Node op_a = GetRegister(instr.gpr8);
26
27 auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
28 switch (opcode->get().GetId()) {
29 case OpCode::Id::FFMA_CR: {
30 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
31 GetRegister(instr.gpr39)};
32 }
33 case OpCode::Id::FFMA_RR:
34 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
35 case OpCode::Id::FFMA_RC: {
36 return {GetRegister(instr.gpr39),
37 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
38 }
39 case OpCode::Id::FFMA_IMM:
40 return {GetImmediate19(instr), GetRegister(instr.gpr39)};
41 default:
42 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
43 return {Immediate(0), Immediate(0)};
44 }
45 }();
46
47 op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
48 op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
49
50 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
51 value = GetSaturatedFloat(value, instr.alu.saturate_d);
52
53 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
54 SetRegister(bb, instr.gpr0, value);
55
56 return pc;
57}
58
59} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
new file mode 100644
index 000000000..1bd6755dd
--- /dev/null
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -0,0 +1,58 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
20 instr.fset.neg_a != 0);
21
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
33
34 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
35 // condition is true, and to 0 otherwise.
36 const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
37
38 const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
39 const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
40
41 const Node predicate = Operation(combiner, first_pred, second_pred);
42
43 const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
44 const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
45 const Node value =
46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
47
48 if (instr.fset.bf) {
49 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
50 } else {
51 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
52 }
53 SetRegister(bb, instr.gpr0, value);
54
55 return pc;
56}
57
58} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
new file mode 100644
index 000000000..9285b8d05
--- /dev/null
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -0,0 +1,56 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred;
15
16u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
21 instr.fsetp.neg_a != 0);
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31 op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
32
33 // We can't use the constant predicate as destination.
34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
35
36 const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
37 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
38
39 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
40 const Node value = Operation(combiner, predicate, second_pred);
41
42 // Set the primary predicate to the result of Predicate OP SecondPredicate
43 SetPredicate(bb, instr.fsetp.pred3, value);
44
45 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
46 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
47 // if enabled
48 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
49 const Node second_value = Operation(combiner, negated_pred, second_pred);
50 SetPredicate(bb, instr.fsetp.pred0, second_value);
51 }
52
53 return pc;
54}
55
56} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
new file mode 100644
index 000000000..748368555
--- /dev/null
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -0,0 +1,67 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16
17u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
22
23 // instr.hset2.type_a
24 // instr.hset2.type_b
25 Node op_a = GetRegister(instr.gpr8);
26 Node op_b = [&]() {
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::HSET2_R:
29 return GetRegister(instr.gpr20);
30 default:
31 UNREACHABLE();
32 return Immediate(0);
33 }
34 }();
35
36 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
37 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
38
39 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
40
41 MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
42 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
43
44 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
45
46 // HSET2 operates on each half float in the pack.
47 std::array<Node, 2> values;
48 for (u32 i = 0; i < 2; ++i) {
49 const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
50 const Node true_value = Immediate(raw_value << (i * 16));
51 const Node false_value = Immediate(0);
52
53 const Node comparison =
54 Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
55 const Node predicate = Operation(combiner, comparison, second_pred);
56
57 values[i] =
58 Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
59 }
60
61 const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
62 SetRegister(bb, instr.gpr0, value);
63
64 return pc;
65}
66
67} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
new file mode 100644
index 000000000..e68512692
--- /dev/null
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -0,0 +1,62 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred;
15
16u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
21
22 Node op_a = GetRegister(instr.gpr8);
23 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
24
25 const Node op_b = [&]() {
26 switch (opcode->get().GetId()) {
27 case OpCode::Id::HSETP2_R:
28 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
29 instr.hsetp2.negate_b);
30 default:
31 UNREACHABLE();
32 return Immediate(0);
33 }
34 }();
35
36 // We can't use the constant predicate as destination.
37 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
38
39 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
40
41 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
42 const OperationCode pair_combiner =
43 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
44
45 MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
46 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
47 const Node first_pred = Operation(pair_combiner, comparison);
48
49 // Set the primary predicate to the result of Predicate OP SecondPredicate
50 const Node value = Operation(combiner, first_pred, second_pred);
51 SetPredicate(bb, instr.hsetp2.pred3, value);
52
53 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
54 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
55 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
56 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
57 }
58
59 return pc;
60}
61
62} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
new file mode 100644
index 000000000..7a07c5ec6
--- /dev/null
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -0,0 +1,77 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::HalfPrecision;
15using Tegra::Shader::HalfType;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18
19u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr);
22
23 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
24 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
25 } else {
26 UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
27 }
28
29 constexpr auto identity = HalfType::H0_H1;
30
31 const HalfType type_a = instr.hfma2.type_a;
32 const Node op_a = GetRegister(instr.gpr8);
33
34 bool neg_b{}, neg_c{};
35 auto [saturate, type_b, op_b, type_c,
36 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
37 switch (opcode->get().GetId()) {
38 case OpCode::Id::HFMA2_CR:
39 neg_b = instr.hfma2.negate_b;
40 neg_c = instr.hfma2.negate_c;
41 return {instr.hfma2.saturate, instr.hfma2.type_b,
42 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
43 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
44 case OpCode::Id::HFMA2_RC:
45 neg_b = instr.hfma2.negate_b;
46 neg_c = instr.hfma2.negate_c;
47 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
48 instr.hfma2.type_b,
49 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
50 case OpCode::Id::HFMA2_RR:
51 neg_b = instr.hfma2.rr.negate_b;
52 neg_c = instr.hfma2.rr.negate_c;
53 return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
54 instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
55 case OpCode::Id::HFMA2_IMM_R:
56 neg_c = instr.hfma2.negate_c;
57 return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
58 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
59 default:
60 return {false, identity, Immediate(0), identity, Immediate(0)};
61 }
62 }();
63 UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
64
65 op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
66 op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
67
68 MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
69 Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
70 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
71
72 SetRegister(bb, instr.gpr0, value);
73
74 return pc;
75}
76
77} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
new file mode 100644
index 000000000..a3bf17eba
--- /dev/null
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -0,0 +1,50 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 const Node op_a = GetRegister(instr.gpr8);
20 const Node op_b = [&]() {
21 if (instr.is_b_imm) {
22 return Immediate(instr.alu.GetSignedImm20_20());
23 } else if (instr.is_b_gpr) {
24 return GetRegister(instr.gpr20);
25 } else {
26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 }
28 }();
29
30 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
31 // is true, and to 0 otherwise.
32 const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
33 const Node first_pred =
34 GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
35
36 const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
37
38 const Node predicate = Operation(combiner, first_pred, second_pred);
39
40 const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
41 const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
42 const Node value =
43 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
44
45 SetRegister(bb, instr.gpr0, value);
46
47 return pc;
48}
49
50} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
new file mode 100644
index 000000000..aad836d24
--- /dev/null
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -0,0 +1,53 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred;
15
16u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 const Node op_a = GetRegister(instr.gpr8);
21
22 const Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return Immediate(instr.alu.GetSignedImm20_20());
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 // We can't use the constant predicate as destination.
33 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
34
35 const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
36 const Node predicate =
37 GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
38
39 // Set the primary predicate to the result of Predicate OP SecondPredicate
40 const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42 SetPredicate(bb, instr.isetp.pred3, value);
43
44 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
45 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
46 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
47 SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
new file mode 100644
index 000000000..ea3c71eed
--- /dev/null
+++ b/src/video_core/shader/decode/memory.cpp
@@ -0,0 +1,239 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register;
20
21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr);
24
25 switch (opcode->get().GetId()) {
26 case OpCode::Id::LD_A: {
27 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
28 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
29 "Indirect attribute loads are not supported");
30 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
31 "Unaligned attribute loads are not supported");
32
33 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
34 Tegra::Shader::IpaSampleMode::Default};
35
36 u64 next_element = instr.attribute.fmt20.element;
37 auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
38
39 const auto LoadNextElement = [&](u32 reg_offset) {
40 const Node buffer = GetRegister(instr.gpr39);
41 const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
42 next_element, input_mode, buffer);
43
44 SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
45
46 // Load the next attribute element into the following register. If the element
47 // to load goes beyond the vec4 size, load the first element of the next
48 // attribute.
49 next_element = (next_element + 1) % 4;
50 next_index = next_index + (next_element == 0 ? 1 : 0);
51 };
52
53 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
54 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
55 LoadNextElement(reg_offset);
56 }
57 break;
58 }
59 case OpCode::Id::LD_C: {
60 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
61
62 Node index = GetRegister(instr.gpr8);
63
64 const Node op_a =
65 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
66
67 switch (instr.ld_c.type.Value()) {
68 case Tegra::Shader::UniformType::Single:
69 SetRegister(bb, instr.gpr0, op_a);
70 break;
71
72 case Tegra::Shader::UniformType::Double: {
73 const Node op_b =
74 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
75
76 SetTemporal(bb, 0, op_a);
77 SetTemporal(bb, 1, op_b);
78 SetRegister(bb, instr.gpr0, GetTemporal(0));
79 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
80 break;
81 }
82 default:
83 UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
84 }
85 break;
86 }
87 case OpCode::Id::LD_L: {
88 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
89 static_cast<u32>(instr.ld_l.unknown.Value()));
90
91 const auto GetLmem = [&](s32 offset) {
92 ASSERT(offset % 4 == 0);
93 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
94 const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
95 immediate_offset);
96 return GetLocalMemory(address);
97 };
98
99 switch (instr.ldst_sl.type.Value()) {
100 case Tegra::Shader::StoreType::Bits32:
101 case Tegra::Shader::StoreType::Bits64:
102 case Tegra::Shader::StoreType::Bits128: {
103 const u32 count = [&]() {
104 switch (instr.ldst_sl.type.Value()) {
105 case Tegra::Shader::StoreType::Bits32:
106 return 1;
107 case Tegra::Shader::StoreType::Bits64:
108 return 2;
109 case Tegra::Shader::StoreType::Bits128:
110 return 4;
111 default:
112 UNREACHABLE();
113 return 0;
114 }
115 }();
116 for (u32 i = 0; i < count; ++i)
117 SetTemporal(bb, i, GetLmem(i * 4));
118 for (u32 i = 0; i < count; ++i)
119 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
120 break;
121 }
122 default:
123 UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
124 static_cast<u32>(instr.ldst_sl.type.Value()));
125 }
126 break;
127 }
128 case OpCode::Id::LDG: {
129 const u32 count = [&]() {
130 switch (instr.ldg.type) {
131 case Tegra::Shader::UniformType::Single:
132 return 1;
133 case Tegra::Shader::UniformType::Double:
134 return 2;
135 case Tegra::Shader::UniformType::Quad:
136 case Tegra::Shader::UniformType::UnsignedQuad:
137 return 4;
138 default:
139 UNIMPLEMENTED_MSG("Unimplemented LDG size!");
140 return 1;
141 }
142 }();
143
144 const Node addr_register = GetRegister(instr.gpr8);
145 const Node base_address =
146 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
147 const auto cbuf = std::get_if<CbufNode>(base_address);
148 ASSERT(cbuf != nullptr);
149 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
150 ASSERT(cbuf_offset_imm != nullptr);
151 const auto cbuf_offset = cbuf_offset_imm->GetValue();
152
153 bb.push_back(Comment(
154 fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
155
156 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
157 used_global_memory_bases.insert(descriptor);
158
159 const Node immediate_offset =
160 Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
161 const Node base_real_address =
162 Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
163
164 for (u32 i = 0; i < count; ++i) {
165 const Node it_offset = Immediate(i * 4);
166 const Node real_address =
167 Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
168 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
169
170 SetTemporal(bb, i, gmem);
171 }
172 for (u32 i = 0; i < count; ++i) {
173 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
174 }
175 break;
176 }
177 case OpCode::Id::ST_A: {
178 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
179 "Indirect attribute loads are not supported");
180 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
181 "Unaligned attribute loads are not supported");
182
183 u64 next_element = instr.attribute.fmt20.element;
184 auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
185
186 const auto StoreNextElement = [&](u32 reg_offset) {
187 const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
188 next_element, GetRegister(instr.gpr39));
189 const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
190
191 bb.push_back(Operation(OperationCode::Assign, dest, src));
192
193 // Load the next attribute element into the following register. If the element
194 // to load goes beyond the vec4 size, load the first element of the next
195 // attribute.
196 next_element = (next_element + 1) % 4;
197 next_index = next_index + (next_element == 0 ? 1 : 0);
198 };
199
200 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
201 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
202 StoreNextElement(reg_offset);
203 }
204
205 break;
206 }
207 case OpCode::Id::ST_L: {
208 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
209 static_cast<u32>(instr.st_l.unknown.Value()));
210
211 const auto GetLmemAddr = [&](s32 offset) {
212 ASSERT(offset % 4 == 0);
213 const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
214 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
215 };
216
217 switch (instr.ldst_sl.type.Value()) {
218 case Tegra::Shader::StoreType::Bits128:
219 SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
220 SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
221 case Tegra::Shader::StoreType::Bits64:
222 SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
223 case Tegra::Shader::StoreType::Bits32:
224 SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
225 break;
226 default:
227 UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
228 static_cast<u32>(instr.ldst_sl.type.Value()));
229 }
230 break;
231 }
232 default:
233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
234 }
235
236 return pc;
237}
238
239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
new file mode 100644
index 000000000..d750a2936
--- /dev/null
+++ b/src/video_core/shader/decode/other.cpp
@@ -0,0 +1,189 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::ConditionCode;
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Register;
16
17u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::EXIT: {
23 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
24 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
25 static_cast<u32>(cc));
26
27 switch (instr.flow.cond) {
28 case Tegra::Shader::FlowCondition::Always:
29 bb.push_back(Operation(OperationCode::Exit));
30 if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
31 // If this is an unconditional exit then just end processing here,
32 // otherwise we have to account for the possibility of the condition
33 // not being met, so continue processing the next instruction.
34 pc = MAX_PROGRAM_LENGTH - 1;
35 }
36 break;
37
38 case Tegra::Shader::FlowCondition::Fcsm_Tr:
39 // TODO(bunnei): What is this used for? If we assume this conditon is not
40 // satisifed, dual vertex shaders in Farming Simulator make more sense
41 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
42 break;
43
44 default:
45 UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
46 static_cast<u32>(instr.flow.cond.Value()));
47 }
48 break;
49 }
50 case OpCode::Id::KIL: {
51 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
52
53 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
54 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
55 static_cast<u32>(cc));
56
57 bb.push_back(Operation(OperationCode::Discard));
58 break;
59 }
60 case OpCode::Id::MOV_SYS: {
61 switch (instr.sys20) {
62 case Tegra::Shader::SystemVariable::InvocationInfo: {
63 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
64 SetRegister(bb, instr.gpr0, Immediate(0u));
65 break;
66 }
67 case Tegra::Shader::SystemVariable::Ydirection: {
68 // Config pack's third value is Y_NEGATE's state.
69 SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
70 break;
71 }
72 default:
73 UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
74 }
75 break;
76 }
77 case OpCode::Id::BRA: {
78 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
79 "BRA with constant buffers are not implemented");
80
81 const u32 target = pc + instr.bra.GetBranchTarget();
82 const Node branch = Operation(OperationCode::Branch, Immediate(target));
83
84 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
85 if (cc != Tegra::Shader::ConditionCode::T) {
86 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
87 } else {
88 bb.push_back(branch);
89 }
90 break;
91 }
92 case OpCode::Id::SSY: {
93 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
94 "Constant buffer flow is not supported");
95
96 // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
97 // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
98 // structure to the BRA opcode.
99 const u32 target = pc + instr.bra.GetBranchTarget();
100 bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
101 break;
102 }
103 case OpCode::Id::PBK: {
104 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
105 "Constant buffer PBK is not supported");
106
107 // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
108 // using SYNC on a PBK address will kill the shader execution. We don't emulate this because
109 // it's very unlikely a driver will emit such invalid shader.
110 const u32 target = pc + instr.bra.GetBranchTarget();
111 bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
112 break;
113 }
114 case OpCode::Id::SYNC: {
115 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
116 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
117 static_cast<u32>(cc));
118
119 // The SYNC opcode jumps to the address previously set by the SSY opcode
120 bb.push_back(Operation(OperationCode::PopFlowStack));
121 break;
122 }
123 case OpCode::Id::BRK: {
124 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
125 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
126 static_cast<u32>(cc));
127
128 // The BRK opcode jumps to the address previously set by the PBK opcode
129 bb.push_back(Operation(OperationCode::PopFlowStack));
130 break;
131 }
132 case OpCode::Id::IPA: {
133 const auto& attribute = instr.attribute.fmt28;
134 const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
135 instr.ipa.sample_mode.Value()};
136
137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
138 Node value = attr;
139 const Tegra::Shader::Attribute::Index index = attribute.index.Value();
140 if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
141 index <= Tegra::Shader::Attribute::Index::Attribute_31) {
142 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
143 // In theory by setting them as perspective, OpenGL does the perspective correction.
144 // A way must figured to reverse the last step of it.
145 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
146 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
147 }
148 }
149 value = GetSaturatedFloat(value, instr.ipa.saturate);
150
151 SetRegister(bb, instr.gpr0, value);
152 break;
153 }
154 case OpCode::Id::OUT_R: {
155 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
156 "Stream buffer is not supported");
157
158 if (instr.out.emit) {
159 // gpr0 is used to store the next address and gpr8 contains the address to emit.
160 // Hardware uses pointers here but we just ignore it
161 bb.push_back(Operation(OperationCode::EmitVertex));
162 SetRegister(bb, instr.gpr0, Immediate(0));
163 }
164 if (instr.out.cut) {
165 bb.push_back(Operation(OperationCode::EndPrimitive));
166 }
167 break;
168 }
169 case OpCode::Id::ISBERD: {
170 UNIMPLEMENTED_IF(instr.isberd.o != 0);
171 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
172 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
173 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
174 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
175 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
176 break;
177 }
178 case OpCode::Id::DEPBAR: {
179 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
180 break;
181 }
182 default:
183 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
184 }
185
186 return pc;
187}
188
189} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
new file mode 100644
index 000000000..83c61680e
--- /dev/null
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -0,0 +1,67 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred;
15
16u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::PSETP: {
22 const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
23 const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
24
25 // We can't use the constant predicate as destination.
26 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
27
28 const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
29
30 const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
31 const Node predicate = Operation(combiner, op_a, op_b);
32
33 // Set the primary predicate to the result of Predicate OP SecondPredicate
34 SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
35
36 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
37 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
38 // enabled
39 SetPredicate(bb, instr.psetp.pred0,
40 Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
41 second_pred));
42 }
43 break;
44 }
45 case OpCode::Id::CSETP: {
46 const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
47 const Node condition_code = GetConditionCode(instr.csetp.cc);
48
49 const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
50
51 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
52 SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
53 }
54 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
55 const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
56 SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
57 }
58 break;
59 }
60 default:
61 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
62 }
63
64 return pc;
65}
66
67} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
new file mode 100644
index 000000000..d0495995d
--- /dev/null
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -0,0 +1,46 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
20 "Condition codes generation in PSET is not implemented");
21
22 const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
23 const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
24 const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
25
26 const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
27
28 const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
29 const Node predicate = Operation(combiner, first_pred, second_pred);
30
31 const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
33 const Node value =
34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
35
36 if (instr.pset.bf) {
37 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
38 } else {
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 }
41 SetRegister(bb, instr.gpr0, value);
42
43 return pc;
44}
45
46} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
new file mode 100644
index 000000000..f070e8912
--- /dev/null
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -0,0 +1,51 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
20
21 const Node apply_mask = [&]() {
22 switch (opcode->get().GetId()) {
23 case OpCode::Id::R2P_IMM:
24 return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
25 default:
26 UNREACHABLE();
27 return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
28 }
29 }();
30 const Node mask = GetRegister(instr.gpr8);
31 const auto offset = static_cast<u32>(instr.r2p.byte) * 8;
32
33 constexpr u32 programmable_preds = 7;
34 for (u64 pred = 0; pred < programmable_preds; ++pred) {
35 const auto shift = static_cast<u32>(pred);
36
37 const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
38 const Node condition =
39 Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
40
41 const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
42 const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
43
44 const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
45 bb.push_back(Conditional(condition, {code}));
46 }
47
48 return pc;
49}
50
51} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
new file mode 100644
index 000000000..951e85f44
--- /dev/null
+++ b/src/video_core/shader/decode/shift.cpp
@@ -0,0 +1,55 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 const Node op_a = GetRegister(instr.gpr8);
20 const Node op_b = [&]() {
21 if (instr.is_b_imm) {
22 return Immediate(instr.alu.GetSignedImm20_20());
23 } else if (instr.is_b_gpr) {
24 return GetRegister(instr.gpr20);
25 } else {
26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 }
28 }();
29
30 switch (opcode->get().GetId()) {
31 case OpCode::Id::SHR_C:
32 case OpCode::Id::SHR_R:
33 case OpCode::Id::SHR_IMM: {
34 const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
35 instr.shift.is_signed, PRECISE, op_a, op_b);
36 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
37 SetRegister(bb, instr.gpr0, value);
38 break;
39 }
40 case OpCode::Id::SHL_C:
41 case OpCode::Id::SHL_R:
42 case OpCode::Id::SHL_IMM: {
43 const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
44 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
45 SetRegister(bb, instr.gpr0, value);
46 break;
47 }
48 default:
49 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
50 }
51
52 return pc;
53}
54
55} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a775b402b
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,598 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/shader_ir.h"
15
16namespace VideoCommon::Shader {
17
18using Tegra::Shader::Instruction;
19using Tegra::Shader::OpCode;
20using Tegra::Shader::Register;
21using Tegra::Shader::TextureMiscMode;
22using Tegra::Shader::TextureProcessMode;
23using Tegra::Shader::TextureType;
24
25static std::size_t GetCoordCount(TextureType texture_type) {
26 switch (texture_type) {
27 case TextureType::Texture1D:
28 return 1;
29 case TextureType::Texture2D:
30 return 2;
31 case TextureType::Texture3D:
32 case TextureType::TextureCube:
33 return 3;
34 default:
35 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
36 return 0;
37 }
38}
39
40u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
41 const Instruction instr = {program_code[pc]};
42 const auto opcode = OpCode::Decode(instr);
43
44 switch (opcode->get().GetId()) {
45 case OpCode::Id::TEX: {
46 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
47 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
48 }
49
50 const TextureType texture_type{instr.tex.texture_type};
51 const bool is_array = instr.tex.array != 0;
52 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr,
57 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
58 break;
59 }
60 case OpCode::Id::TEXS: {
61 const TextureType texture_type{instr.texs.GetTextureType()};
62 const bool is_array{instr.texs.IsArrayTexture()};
63 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
64 const auto process_mode = instr.texs.GetTextureProcessMode();
65
66 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
67 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
68 }
69
70 const Node4 components =
71 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
72
73 if (instr.texs.fp32_flag) {
74 WriteTexsInstructionFloat(bb, instr, components);
75 } else {
76 WriteTexsInstructionHalfFloat(bb, instr, components);
77 }
78 break;
79 }
80 case OpCode::Id::TLD4: {
81 ASSERT(instr.tld4.array == 0);
82 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
83 "NDV is not implemented");
84 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
85 "PTP is not implemented");
86
87 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
88 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
89 }
90
91 const auto texture_type = instr.tld4.texture_type.Value();
92 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
93 const bool is_array = instr.tld4.array != 0;
94 const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
95 WriteTexInstructionFloat(
96 bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset,
329 std::vector<Node> aoffi) {
330 const bool is_array = array;
331 const bool is_shadow = depth_compare;
332
333 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
334 (texture_type == TextureType::TextureCube && is_array && is_shadow),
335 "This method is not supported.");
336
337 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
338
339 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
340 process_mode == TextureProcessMode::LL ||
341 process_mode == TextureProcessMode::LLA;
342
343 // LOD selection (either via bias or explicit textureLod) not supported in GL for
344 // sampler2DArrayShadow and samplerCubeArrayShadow.
345 const bool gl_lod_supported =
346 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
347 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
348
349 const OperationCode read_method =
350 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
351
352 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
353
354 Node bias = {};
355 Node lod = {};
356 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
357 switch (process_mode) {
358 case TextureProcessMode::LZ:
359 lod = Immediate(0.0f);
360 break;
361 case TextureProcessMode::LB:
362 // If present, lod or bias are always stored in the register indexed by the gpr20
363 // field with an offset depending on the usage of the other registers
364 bias = GetRegister(instr.gpr20.Value() + bias_offset);
365 break;
366 case TextureProcessMode::LL:
367 lod = GetRegister(instr.gpr20.Value() + bias_offset);
368 break;
369 default:
370 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
371 break;
372 }
373 }
374
375 Node4 values;
376 for (u32 element = 0; element < values.size(); ++element) {
377 auto copy_coords = coords;
378 MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
379 values[element] = Operation(read_method, meta, std::move(copy_coords));
380 }
381
382 return values;
383}
384
385Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
386 TextureProcessMode process_mode, bool depth_compare, bool is_array,
387 bool is_aoffi) {
388 const bool lod_bias_enabled{
389 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
390
391 u64 parameter_register = instr.gpr20.Value();
392 if (lod_bias_enabled) {
393 ++parameter_register;
394 }
395
396 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
397 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
398 // If enabled arrays index is always stored in the gpr8 field
399 const u64 array_register = instr.gpr8.Value();
400 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
401 const u64 coord_register = array_register + (is_array ? 1 : 0);
402
403 std::vector<Node> coords;
404 for (std::size_t i = 0; i < coord_count; ++i) {
405 coords.push_back(GetRegister(coord_register + i));
406 }
407 // 1D.DC in OpenGL the 2nd component is ignored.
408 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
409 coords.push_back(Immediate(0.0f));
410 }
411
412 const Node array = is_array ? GetRegister(array_register) : nullptr;
413
414 std::vector<Node> aoffi;
415 if (is_aoffi) {
416 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
417 }
418
419 Node dc{};
420 if (depth_compare) {
421 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
422 // or bias are used
423 dc = GetRegister(parameter_register++);
424 }
425
426 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
427}
428
429Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
430 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
431 const bool lod_bias_enabled =
432 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
433
434 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
435 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
436 // If enabled arrays index is always stored in the gpr8 field
437 const u64 array_register = instr.gpr8.Value();
438 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
439 const u64 coord_register = array_register + (is_array ? 1 : 0);
440 const u64 last_coord_register =
441 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
442 ? static_cast<u64>(instr.gpr20.Value())
443 : coord_register + 1;
444 const u32 bias_offset = coord_count > 2 ? 1 : 0;
445
446 std::vector<Node> coords;
447 for (std::size_t i = 0; i < coord_count; ++i) {
448 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
449 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
450 }
451
452 const Node array = is_array ? GetRegister(array_register) : nullptr;
453
454 Node dc{};
455 if (depth_compare) {
456 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
457 // or bias are used
458 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
459 dc = GetRegister(depth_register);
460 }
461
462 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
463}
464
465Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
466 bool is_array, bool is_aoffi) {
467 const std::size_t coord_count = GetCoordCount(texture_type);
468 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
469 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
470
471 // If enabled arrays index is always stored in the gpr8 field
472 const u64 array_register = instr.gpr8.Value();
473 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
474 const u64 coord_register = array_register + (is_array ? 1 : 0);
475
476 std::vector<Node> coords;
477 for (std::size_t i = 0; i < coord_count; ++i) {
478 coords.push_back(GetRegister(coord_register + i));
479 }
480
481 u64 parameter_register = instr.gpr20.Value();
482 std::vector<Node> aoffi;
483 if (is_aoffi) {
484 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
485 }
486
487 Node dc{};
488 if (depth_compare) {
489 dc = GetRegister(parameter_register++);
490 }
491
492 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
493
494 Node4 values;
495 for (u32 element = 0; element < values.size(); ++element) {
496 auto coords_copy = coords;
497 MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
498 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
499 }
500
501 return values;
502}
503
504Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
505 const std::size_t type_coord_count = GetCoordCount(texture_type);
506 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
507
508 // If enabled arrays index is always stored in the gpr8 field
509 const u64 array_register = instr.gpr8.Value();
510 // if is array gpr20 is used
511 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
512
513 const u64 last_coord_register =
514 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
515 ? static_cast<u64>(instr.gpr20.Value())
516 : coord_register + 1;
517
518 std::vector<Node> coords;
519 for (std::size_t i = 0; i < type_coord_count; ++i) {
520 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
521 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
522 }
523
524 const Node array = is_array ? GetRegister(array_register) : nullptr;
525 // When lod is used always is in gpr20
526 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
527
528 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
529
530 Node4 values;
531 for (u32 element = 0; element < values.size(); ++element) {
532 auto coords_copy = coords;
533 MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
534 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
535 }
536 return values;
537}
538
539std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
540 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
541 std::size_t max_coords, std::size_t max_inputs) {
542 const std::size_t coord_count = GetCoordCount(texture_type);
543
544 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
545 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
546 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
547 UNIMPLEMENTED_MSG("Unsupported Texture operation");
548 total_coord_count = std::min(total_coord_count, max_coords);
549 }
550 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
551 total_coord_count +=
552 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
553
554 return {coord_count, total_coord_count};
555}
556
557std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
558 bool is_tld4) {
559 const auto [coord_offsets, size, wrap_value,
560 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
561 if (is_tld4) {
562 return {{0, 8, 16}, 6, 32, 64};
563 } else {
564 return {{0, 4, 8}, 4, 8, 16};
565 }
566 }();
567 const u32 mask = (1U << size) - 1;
568
569 std::vector<Node> aoffi;
570 aoffi.reserve(coord_count);
571
572 const auto aoffi_immediate{
573 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
574 if (!aoffi_immediate) {
575 // Variable access, not supported on AMD.
576 LOG_WARNING(HW_GPU,
577 "AOFFI constant folding failed, some hardware might have graphical issues");
578 for (std::size_t coord = 0; coord < coord_count; ++coord) {
579 const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
580 const Node condition =
581 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
582 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
583 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
584 }
585 return aoffi;
586 }
587
588 for (std::size_t coord = 0; coord < coord_count; ++coord) {
589 s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
590 if (value >= wrap_value) {
591 value -= diff_value;
592 }
593 aoffi.push_back(Immediate(value));
594 }
595 return aoffi;
596}
597
598} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
new file mode 100644
index 000000000..956c01d9b
--- /dev/null
+++ b/src/video_core/shader/decode/video.cpp
@@ -0,0 +1,111 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred;
15using Tegra::Shader::VideoType;
16using Tegra::Shader::VmadShr;
17
18u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 const Node op_a =
23 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
24 instr.video.type_a, instr.video.byte_height_a);
25 const Node op_b = [&]() {
26 if (instr.video.use_register_b) {
27 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
28 instr.video.signed_b, instr.video.type_b,
29 instr.video.byte_height_b);
30 }
31 if (instr.video.signed_b) {
32 const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
33 return Immediate(static_cast<u32>(imm));
34 } else {
35 return Immediate(instr.alu.GetImm20_16());
36 }
37 }();
38
39 switch (opcode->get().GetId()) {
40 case OpCode::Id::VMAD: {
41 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
42 const Node op_c = GetRegister(instr.gpr39);
43
44 Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
45 value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
46
47 if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
48 const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
49 value =
50 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
51 }
52
53 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
54 SetRegister(bb, instr.gpr0, value);
55 break;
56 }
57 case OpCode::Id::VSETP: {
58 // We can't use the constant predicate as destination.
59 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
60
61 const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
62 const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
63 const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
64
65 const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
66
67 // Set the primary predicate to the result of Predicate OP SecondPredicate
68 SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
69
70 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
71 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
72 // if enabled
73 const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
74 SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
75 }
76 break;
77 }
78 default:
79 UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
80 }
81
82 return pc;
83}
84
85Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
86 Tegra::Shader::VideoType type, u64 byte_height) {
87 if (!is_chunk) {
88 return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
89 }
90 const Node zero = Immediate(0);
91
92 switch (type) {
93 case Tegra::Shader::VideoType::Size16_Low:
94 return BitfieldExtract(op, 0, 16);
95 case Tegra::Shader::VideoType::Size16_High:
96 return BitfieldExtract(op, 16, 16);
97 case Tegra::Shader::VideoType::Size32:
98 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
99 // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
100 UNIMPLEMENTED();
101 return zero;
102 case Tegra::Shader::VideoType::Invalid:
103 UNREACHABLE_MSG("Invalid instruction encoding");
104 return zero;
105 default:
106 UNREACHABLE();
107 return zero;
108 }
109}
110
111} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
new file mode 100644
index 000000000..db15c0718
--- /dev/null
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -0,0 +1,119 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18
19 UNIMPLEMENTED_IF(instr.xmad.sign_a);
20 UNIMPLEMENTED_IF(instr.xmad.sign_b);
21 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
22 "Condition codes generation in XMAD is not implemented");
23
24 Node op_a = GetRegister(instr.gpr8);
25
26 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
27 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
28 const bool is_signed_a = instr.xmad.sign_a == 1;
29 const bool is_signed_b = instr.xmad.sign_b == 1;
30 const bool is_signed_c = is_signed_a;
31
32 auto [is_merge, is_psl, is_high_b, mode, op_b,
33 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::XMAD_CR:
36 return {instr.xmad.merge_56,
37 instr.xmad.product_shift_left_second,
38 instr.xmad.high_b,
39 instr.xmad.mode_cbf,
40 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
41 GetRegister(instr.gpr39)};
42 case OpCode::Id::XMAD_RR:
43 return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
44 instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
45 case OpCode::Id::XMAD_RC:
46 return {false,
47 false,
48 instr.xmad.high_b,
49 instr.xmad.mode_cbf,
50 GetRegister(instr.gpr39),
51 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
52 case OpCode::Id::XMAD_IMM:
53 return {instr.xmad.merge_37,
54 instr.xmad.product_shift_left,
55 false,
56 instr.xmad.mode,
57 Immediate(static_cast<u32>(instr.xmad.imm20_16)),
58 GetRegister(instr.gpr39)};
59 }
60 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
61 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
62 }();
63
64 op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
65
66 const Node original_b = op_b;
67 op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
68
69 // TODO(Rodrigo): Use an appropiate sign for this operation
70 Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
71 if (is_psl) {
72 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
73 }
74 SetTemporal(bb, 0, product);
75 product = GetTemporal(0);
76
77 const Node original_c = op_c;
78 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
79 op_c = [&]() {
80 switch (set_mode) {
81 case Tegra::Shader::XmadMode::None:
82 return original_c;
83 case Tegra::Shader::XmadMode::CLo:
84 return BitfieldExtract(original_c, 0, 16);
85 case Tegra::Shader::XmadMode::CHi:
86 return BitfieldExtract(original_c, 16, 16);
87 case Tegra::Shader::XmadMode::CBcc: {
88 const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
89 NO_PRECISE, original_b, Immediate(16));
90 return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
91 shifted_b);
92 }
93 default:
94 UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
95 return Immediate(0);
96 }
97 }();
98
99 SetTemporal(bb, 1, op_c);
100 op_c = GetTemporal(1);
101
102 // TODO(Rodrigo): Use an appropiate sign for this operation
103 Node sum = Operation(OperationCode::IAdd, product, op_c);
104 SetTemporal(bb, 2, sum);
105 sum = GetTemporal(2);
106 if (is_merge) {
107 const Node a = BitfieldExtract(sum, 0, 16);
108 const Node b =
109 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
110 sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
111 }
112
113 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
114 SetRegister(bb, instr.gpr0, sum);
115
116 return pc;
117}
118
119} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
new file mode 100644
index 000000000..ac5112d78
--- /dev/null
+++ b/src/video_core/shader/shader_ir.cpp
@@ -0,0 +1,444 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cmath>
6#include <unordered_map>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "common/logging/log.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::IpaMode;
19using Tegra::Shader::Pred;
20using Tegra::Shader::PredCondition;
21using Tegra::Shader::PredOperation;
22using Tegra::Shader::Register;
23
24Node ShaderIR::StoreNode(NodeData&& node_data) {
25 auto store = std::make_unique<NodeData>(node_data);
26 const Node node = store.get();
27 stored_nodes.push_back(std::move(store));
28 return node;
29}
30
31Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
32 return StoreNode(ConditionalNode(condition, std::move(code)));
33}
34
35Node ShaderIR::Comment(const std::string& text) {
36 return StoreNode(CommentNode(text));
37}
38
39Node ShaderIR::Immediate(u32 value) {
40 return StoreNode(ImmediateNode(value));
41}
42
43Node ShaderIR::GetRegister(Register reg) {
44 if (reg != Register::ZeroIndex) {
45 used_registers.insert(static_cast<u32>(reg));
46 }
47 return StoreNode(GprNode(reg));
48}
49
50Node ShaderIR::GetImmediate19(Instruction instr) {
51 return Immediate(instr.alu.GetImm20_19());
52}
53
54Node ShaderIR::GetImmediate32(Instruction instr) {
55 return Immediate(instr.alu.GetImm20_32());
56}
57
58Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
59 const auto index = static_cast<u32>(index_);
60 const auto offset = static_cast<u32>(offset_);
61
62 const auto [entry, is_new] = used_cbufs.try_emplace(index);
63 entry->second.MarkAsUsed(offset);
64
65 return StoreNode(CbufNode(index, Immediate(offset)));
66}
67
68Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
69 const auto index = static_cast<u32>(index_);
70 const auto offset = static_cast<u32>(offset_);
71
72 const auto [entry, is_new] = used_cbufs.try_emplace(index);
73 entry->second.MarkAsUsedIndirect();
74
75 const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
76 return StoreNode(CbufNode(index, final_offset));
77}
78
79Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
80 const auto pred = static_cast<Pred>(pred_);
81 if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
82 used_predicates.insert(pred);
83 }
84
85 return StoreNode(PredicateNode(pred, negated));
86}
87
88Node ShaderIR::GetPredicate(bool immediate) {
89 return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
90}
91
92Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
93 const Tegra::Shader::IpaMode& input_mode, Node buffer) {
94 const auto [entry, is_new] =
95 used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
96 entry->second.insert(input_mode);
97
98 return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
99}
100
101Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
102 if (index == Attribute::Index::ClipDistances0123 ||
103 index == Attribute::Index::ClipDistances4567) {
104 const auto clip_index =
105 static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
106 used_clip_distances.at(clip_index) = true;
107 }
108 used_output_attributes.insert(index);
109
110 return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
111}
112
113Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
114 const Node node = StoreNode(InternalFlagNode(flag));
115 if (negated) {
116 return Operation(OperationCode::LogicalNegate, node);
117 }
118 return node;
119}
120
121Node ShaderIR::GetLocalMemory(Node address) {
122 return StoreNode(LmemNode(address));
123}
124
125Node ShaderIR::GetTemporal(u32 id) {
126 return GetRegister(Register::ZeroIndex + 1 + id);
127}
128
129Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
130 if (absolute) {
131 value = Operation(OperationCode::FAbsolute, NO_PRECISE, value);
132 }
133 if (negate) {
134 value = Operation(OperationCode::FNegate, NO_PRECISE, value);
135 }
136 return value;
137}
138
139Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
140 if (!saturate) {
141 return value;
142 }
143 const Node positive_zero = Immediate(std::copysignf(0, 1));
144 const Node positive_one = Immediate(1.0f);
145 return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one);
146}
147
148Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) {
149 switch (size) {
150 case Register::Size::Byte:
151 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
152 Immediate(24));
153 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
154 Immediate(24));
155 return value;
156 case Register::Size::Short:
157 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
158 Immediate(16));
159 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
160 Immediate(16));
161 case Register::Size::Word:
162 // Default - do nothing
163 return value;
164 default:
165 UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
166 return value;
167 }
168}
169
170Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
171 if (!is_signed) {
172 // Absolute or negate on an unsigned is pointless
173 return value;
174 }
175 if (absolute) {
176 value = Operation(OperationCode::IAbsolute, NO_PRECISE, value);
177 }
178 if (negate) {
179 value = Operation(OperationCode::INegate, NO_PRECISE, value);
180 }
181 return value;
182}
183
184Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
185 const Node value = Immediate(instr.half_imm.PackImmediates());
186 if (!has_negation) {
187 return value;
188 }
189 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
190 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
191
192 return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate);
193}
194
195Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
196 switch (merge) {
197 case Tegra::Shader::HalfMerge::H0_H1:
198 return src;
199 case Tegra::Shader::HalfMerge::F32:
200 return Operation(OperationCode::HMergeF32, src);
201 case Tegra::Shader::HalfMerge::Mrg_H0:
202 return Operation(OperationCode::HMergeH0, dest, src);
203 case Tegra::Shader::HalfMerge::Mrg_H1:
204 return Operation(OperationCode::HMergeH1, dest, src);
205 }
206 UNREACHABLE();
207 return src;
208}
209
210Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
211 if (absolute) {
212 value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value);
213 }
214 if (negate) {
215 value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true),
216 GetPredicate(true));
217 }
218 return value;
219}
220
221Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
222 static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
223 {PredCondition::LessThan, OperationCode::LogicalFLessThan},
224 {PredCondition::Equal, OperationCode::LogicalFEqual},
225 {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
226 {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
227 {PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
228 {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
229 {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
230 {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
231 {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
232 {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
233 {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
234
235 const auto comparison{PredicateComparisonTable.find(condition)};
236 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
237 "Unknown predicate comparison operation");
238
239 Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
240
241 if (condition == PredCondition::LessThanWithNan ||
242 condition == PredCondition::NotEqualWithNan ||
243 condition == PredCondition::LessEqualWithNan ||
244 condition == PredCondition::GreaterThanWithNan ||
245 condition == PredCondition::GreaterEqualWithNan) {
246
247 predicate = Operation(OperationCode::LogicalOr, predicate,
248 Operation(OperationCode::LogicalFIsNan, op_a));
249 predicate = Operation(OperationCode::LogicalOr, predicate,
250 Operation(OperationCode::LogicalFIsNan, op_b));
251 }
252
253 return predicate;
254}
255
256Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
257 Node op_b) {
258 static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
259 {PredCondition::LessThan, OperationCode::LogicalILessThan},
260 {PredCondition::Equal, OperationCode::LogicalIEqual},
261 {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
262 {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
263 {PredCondition::NotEqual, OperationCode::LogicalINotEqual},
264 {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
265 {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
266 {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
267 {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
268 {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
269 {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
270
271 const auto comparison{PredicateComparisonTable.find(condition)};
272 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
273 "Unknown predicate comparison operation");
274
275 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b);
276
277 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
278 condition == PredCondition::NotEqualWithNan ||
279 condition == PredCondition::LessEqualWithNan ||
280 condition == PredCondition::GreaterThanWithNan ||
281 condition == PredCondition::GreaterEqualWithNan,
282 "NaN comparisons for integers are not implemented");
283 return predicate;
284}
285
286Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
287 const MetaHalfArithmetic& meta, Node op_a, Node op_b) {
288
289 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
290 condition == PredCondition::NotEqualWithNan ||
291 condition == PredCondition::LessEqualWithNan ||
292 condition == PredCondition::GreaterThanWithNan ||
293 condition == PredCondition::GreaterEqualWithNan,
294 "Unimplemented NaN comparison for half floats");
295
296 static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
297 {PredCondition::LessThan, OperationCode::Logical2HLessThan},
298 {PredCondition::Equal, OperationCode::Logical2HEqual},
299 {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
300 {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
301 {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
302 {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
303 {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan},
304 {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual},
305 {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual},
306 {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan},
307 {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}};
308
309 const auto comparison{PredicateComparisonTable.find(condition)};
310 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
311 "Unknown predicate comparison operation");
312
313 const Node predicate = Operation(comparison->second, meta, op_a, op_b);
314
315 return predicate;
316}
317
318OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
319 static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
320 {PredOperation::And, OperationCode::LogicalAnd},
321 {PredOperation::Or, OperationCode::LogicalOr},
322 {PredOperation::Xor, OperationCode::LogicalXor},
323 };
324
325 const auto op = PredicateOperationTable.find(operation);
326 UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
327 return op->second;
328}
329
330Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
331 switch (cc) {
332 case Tegra::Shader::ConditionCode::NEU:
333 return GetInternalFlag(InternalFlag::Zero, true);
334 default:
335 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
336 return GetPredicate(static_cast<u64>(Pred::NeverExecute));
337 }
338}
339
340void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
341 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
342}
343
344void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
345 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
346}
347
348void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
349 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
350}
351
352void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
353 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
354}
355
356void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
357 SetRegister(bb, Register::ZeroIndex + 1 + id, value);
358}
359
360void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
361 if (!sets_cc) {
362 return;
363 }
364 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
365 SetInternalFlag(bb, InternalFlag::Zero, zerop);
366 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
367}
368
369void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
370 if (!sets_cc) {
371 return;
372 }
373 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
374 SetInternalFlag(bb, InternalFlag::Zero, zerop);
375 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
376}
377
378Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
379 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
380 Immediate(bits));
381}
382
383/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code,
384 bool is_signed) {
385 if (is_signed) {
386 return operation_code;
387 }
388 switch (operation_code) {
389 case OperationCode::FCastInteger:
390 return OperationCode::FCastUInteger;
391 case OperationCode::IAdd:
392 return OperationCode::UAdd;
393 case OperationCode::IMul:
394 return OperationCode::UMul;
395 case OperationCode::IDiv:
396 return OperationCode::UDiv;
397 case OperationCode::IMin:
398 return OperationCode::UMin;
399 case OperationCode::IMax:
400 return OperationCode::UMax;
401 case OperationCode::ICastFloat:
402 return OperationCode::UCastFloat;
403 case OperationCode::ICastUnsigned:
404 return OperationCode::UCastSigned;
405 case OperationCode::ILogicalShiftLeft:
406 return OperationCode::ULogicalShiftLeft;
407 case OperationCode::ILogicalShiftRight:
408 return OperationCode::ULogicalShiftRight;
409 case OperationCode::IArithmeticShiftRight:
410 return OperationCode::UArithmeticShiftRight;
411 case OperationCode::IBitwiseAnd:
412 return OperationCode::UBitwiseAnd;
413 case OperationCode::IBitwiseOr:
414 return OperationCode::UBitwiseOr;
415 case OperationCode::IBitwiseXor:
416 return OperationCode::UBitwiseXor;
417 case OperationCode::IBitwiseNot:
418 return OperationCode::UBitwiseNot;
419 case OperationCode::IBitfieldInsert:
420 return OperationCode::UBitfieldInsert;
421 case OperationCode::IBitCount:
422 return OperationCode::UBitCount;
423 case OperationCode::LogicalILessThan:
424 return OperationCode::LogicalULessThan;
425 case OperationCode::LogicalIEqual:
426 return OperationCode::LogicalUEqual;
427 case OperationCode::LogicalILessEqual:
428 return OperationCode::LogicalULessEqual;
429 case OperationCode::LogicalIGreaterThan:
430 return OperationCode::LogicalUGreaterThan;
431 case OperationCode::LogicalINotEqual:
432 return OperationCode::LogicalUNotEqual;
433 case OperationCode::LogicalIGreaterEqual:
434 return OperationCode::LogicalUGreaterEqual;
435 case OperationCode::INegate:
436 UNREACHABLE_MSG("Can't negate an unsigned integer");
437 case OperationCode::IAbsolute:
438 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
439 }
440 UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
441 return {};
442}
443
444} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
new file mode 100644
index 000000000..4888998d3
--- /dev/null
+++ b/src/video_core/shader/shader_ir.h
@@ -0,0 +1,842 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <map>
10#include <optional>
11#include <set>
12#include <string>
13#include <tuple>
14#include <variant>
15#include <vector>
16
17#include "common/common_types.h"
18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/engines/shader_bytecode.h"
20#include "video_core/engines/shader_header.h"
21
22namespace VideoCommon::Shader {
23
24class OperationNode;
25class ConditionalNode;
26class GprNode;
27class ImmediateNode;
28class InternalFlagNode;
29class PredicateNode;
30class AbufNode; ///< Attribute buffer
31class CbufNode; ///< Constant buffer
32class LmemNode; ///< Local memory
33class GmemNode; ///< Global memory
34class CommentNode;
35
36using ProgramCode = std::vector<u64>;
37
38using NodeData =
39 std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
40 PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
41using Node = const NodeData*;
42using Node4 = std::array<Node, 4>;
43using NodeBlock = std::vector<Node>;
44
45constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
46
47enum class OperationCode {
48 Assign, /// (float& dest, float src) -> void
49
50 Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
51
52 FAdd, /// (MetaArithmetic, float a, float b) -> float
53 FMul, /// (MetaArithmetic, float a, float b) -> float
54 FDiv, /// (MetaArithmetic, float a, float b) -> float
55 FFma, /// (MetaArithmetic, float a, float b, float c) -> float
56 FNegate, /// (MetaArithmetic, float a) -> float
57 FAbsolute, /// (MetaArithmetic, float a) -> float
58 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
59 FMin, /// (MetaArithmetic, float a, float b) -> float
60 FMax, /// (MetaArithmetic, float a, float b) -> float
61 FCos, /// (MetaArithmetic, float a) -> float
62 FSin, /// (MetaArithmetic, float a) -> float
63 FExp2, /// (MetaArithmetic, float a) -> float
64 FLog2, /// (MetaArithmetic, float a) -> float
65 FInverseSqrt, /// (MetaArithmetic, float a) -> float
66 FSqrt, /// (MetaArithmetic, float a) -> float
67 FRoundEven, /// (MetaArithmetic, float a) -> float
68 FFloor, /// (MetaArithmetic, float a) -> float
69 FCeil, /// (MetaArithmetic, float a) -> float
70 FTrunc, /// (MetaArithmetic, float a) -> float
71 FCastInteger, /// (MetaArithmetic, int a) -> float
72 FCastUInteger, /// (MetaArithmetic, uint a) -> float
73
74 IAdd, /// (MetaArithmetic, int a, int b) -> int
75 IMul, /// (MetaArithmetic, int a, int b) -> int
76 IDiv, /// (MetaArithmetic, int a, int b) -> int
77 INegate, /// (MetaArithmetic, int a) -> int
78 IAbsolute, /// (MetaArithmetic, int a) -> int
79 IMin, /// (MetaArithmetic, int a, int b) -> int
80 IMax, /// (MetaArithmetic, int a, int b) -> int
81 ICastFloat, /// (MetaArithmetic, float a) -> int
82 ICastUnsigned, /// (MetaArithmetic, uint a) -> int
83 ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
84 ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
85 IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
86 IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
87 IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
88 IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
89 IBitwiseNot, /// (MetaArithmetic, int a) -> int
90 IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
91 IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
92 IBitCount, /// (MetaArithmetic, int) -> int
93
94 UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
95 UMul, /// (MetaArithmetic, uint a, uint b) -> uint
96 UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
97 UMin, /// (MetaArithmetic, uint a, uint b) -> uint
98 UMax, /// (MetaArithmetic, uint a, uint b) -> uint
99 UCastFloat, /// (MetaArithmetic, float a) -> uint
100 UCastSigned, /// (MetaArithmetic, int a) -> uint
101 ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
102 ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
103 UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
104 UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
105 UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
106 UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
107 UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
108 UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
109 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
110 UBitCount, /// (MetaArithmetic, uint) -> uint
111
112 HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
113 HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
114 HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
115 HAbsolute, /// (f16vec2 a) -> f16vec2
116 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
117 HMergeF32, /// (f16vec2 src) -> float
118 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
119 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
120 HPack2, /// (float a, float b) -> f16vec2
121
122 LogicalAssign, /// (bool& dst, bool src) -> void
123 LogicalAnd, /// (bool a, bool b) -> bool
124 LogicalOr, /// (bool a, bool b) -> bool
125 LogicalXor, /// (bool a, bool b) -> bool
126 LogicalNegate, /// (bool a) -> bool
127 LogicalPick2, /// (bool2 pair, uint index) -> bool
128 LogicalAll2, /// (bool2 a) -> bool
129 LogicalAny2, /// (bool2 a) -> bool
130
131 LogicalFLessThan, /// (float a, float b) -> bool
132 LogicalFEqual, /// (float a, float b) -> bool
133 LogicalFLessEqual, /// (float a, float b) -> bool
134 LogicalFGreaterThan, /// (float a, float b) -> bool
135 LogicalFNotEqual, /// (float a, float b) -> bool
136 LogicalFGreaterEqual, /// (float a, float b) -> bool
137 LogicalFIsNan, /// (float a) -> bool
138
139 LogicalILessThan, /// (int a, int b) -> bool
140 LogicalIEqual, /// (int a, int b) -> bool
141 LogicalILessEqual, /// (int a, int b) -> bool
142 LogicalIGreaterThan, /// (int a, int b) -> bool
143 LogicalINotEqual, /// (int a, int b) -> bool
144 LogicalIGreaterEqual, /// (int a, int b) -> bool
145
146 LogicalULessThan, /// (uint a, uint b) -> bool
147 LogicalUEqual, /// (uint a, uint b) -> bool
148 LogicalULessEqual, /// (uint a, uint b) -> bool
149 LogicalUGreaterThan, /// (uint a, uint b) -> bool
150 LogicalUNotEqual, /// (uint a, uint b) -> bool
151 LogicalUGreaterEqual, /// (uint a, uint b) -> bool
152
153 Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
154 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
155 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
156 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
157 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
158 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
159
160 Texture, /// (MetaTexture, float[N] coords) -> float4
161 TextureLod, /// (MetaTexture, float[N] coords) -> float4
162 TextureGather, /// (MetaTexture, float[N] coords) -> float4
163 TextureQueryDimensions, /// (MetaTexture, float a) -> float4
164 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
165 TexelFetch, /// (MetaTexture, int[N], int) -> float4
166
167 Branch, /// (uint branch_target) -> void
168 PushFlowStack, /// (uint branch_target) -> void
169 PopFlowStack, /// () -> void
170 Exit, /// () -> void
171 Discard, /// () -> void
172
173 EmitVertex, /// () -> void
174 EndPrimitive, /// () -> void
175
176 YNegate, /// () -> float
177
178 Amount,
179};
180
181enum class InternalFlag {
182 Zero = 0,
183 Sign = 1,
184 Carry = 2,
185 Overflow = 3,
186 Amount = 4,
187};
188
189/// Describes the behaviour of code path of a given entry point and a return point.
190enum class ExitMethod {
191 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
192 AlwaysReturn, ///< All code paths reach the return point.
193 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
194 AlwaysEnd, ///< All code paths reach a END instruction.
195};
196
197class Sampler {
198public:
199 explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
200 bool is_array, bool is_shadow)
201 : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
202
203 std::size_t GetOffset() const {
204 return offset;
205 }
206
207 std::size_t GetIndex() const {
208 return index;
209 }
210
211 Tegra::Shader::TextureType GetType() const {
212 return type;
213 }
214
215 bool IsArray() const {
216 return is_array;
217 }
218
219 bool IsShadow() const {
220 return is_shadow;
221 }
222
223 bool operator<(const Sampler& rhs) const {
224 return std::tie(offset, index, type, is_array, is_shadow) <
225 std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
226 }
227
228private:
229 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
230 /// instruction.
231 std::size_t offset{};
232 std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
233 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
234 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
235 bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
236};
237
238class ConstBuffer {
239public:
240 explicit ConstBuffer(u32 max_offset, bool is_indirect)
241 : max_offset{max_offset}, is_indirect{is_indirect} {}
242
243 ConstBuffer() = default;
244
245 void MarkAsUsed(u64 offset) {
246 max_offset = std::max(max_offset, static_cast<u32>(offset));
247 }
248
249 void MarkAsUsedIndirect() {
250 is_indirect = true;
251 }
252
253 bool IsIndirect() const {
254 return is_indirect;
255 }
256
257 u32 GetSize() const {
258 return max_offset + sizeof(float);
259 }
260
261 u32 GetMaxOffset() const {
262 return max_offset;
263 }
264
265private:
266 u32 max_offset{};
267 bool is_indirect{};
268};
269
270struct GlobalMemoryBase {
271 u32 cbuf_index{};
272 u32 cbuf_offset{};
273
274 bool operator<(const GlobalMemoryBase& rhs) const {
275 return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
276 }
277};
278
279struct MetaArithmetic {
280 bool precise{};
281};
282
283struct MetaHalfArithmetic {
284 bool precise{};
285 std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
286 Tegra::Shader::HalfType::H0_H1,
287 Tegra::Shader::HalfType::H0_H1};
288};
289
290struct MetaTexture {
291 const Sampler& sampler;
292 Node array{};
293 Node depth_compare{};
294 std::vector<Node> aoffi;
295 Node bias{};
296 Node lod{};
297 Node component{};
298 u32 element{};
299};
300
301constexpr MetaArithmetic PRECISE = {true};
302constexpr MetaArithmetic NO_PRECISE = {false};
303constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
304
305using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>;
306
307/// Holds any kind of operation that can be done in the IR
308class OperationNode final {
309public:
310 template <typename... T>
311 explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
312
313 template <typename... T>
314 explicit constexpr OperationNode(OperationCode code, Meta&& meta)
315 : code{code}, meta{std::move(meta)} {}
316
317 template <typename... T>
318 explicit constexpr OperationNode(OperationCode code, const T*... operands)
319 : OperationNode(code, {}, operands...) {}
320
321 template <typename... T>
322 explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
323 : code{code}, meta{std::move(meta)} {
324
325 auto operands_list = {operands_...};
326 for (auto& operand : operands_list) {
327 operands.push_back(operand);
328 }
329 }
330
331 explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
332 : code{code}, meta{meta}, operands{std::move(operands)} {}
333
334 explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
335 : code{code}, meta{}, operands{std::move(operands)} {}
336
337 OperationCode GetCode() const {
338 return code;
339 }
340
341 const Meta& GetMeta() const {
342 return meta;
343 }
344
345 std::size_t GetOperandsCount() const {
346 return operands.size();
347 }
348
349 Node operator[](std::size_t operand_index) const {
350 return operands.at(operand_index);
351 }
352
353private:
354 const OperationCode code;
355 const Meta meta;
356 std::vector<Node> operands;
357};
358
359/// Encloses inside any kind of node that returns a boolean conditionally-executed code
360class ConditionalNode final {
361public:
362 explicit ConditionalNode(Node condition, std::vector<Node>&& code)
363 : condition{condition}, code{std::move(code)} {}
364
365 Node GetCondition() const {
366 return condition;
367 }
368
369 const std::vector<Node>& GetCode() const {
370 return code;
371 }
372
373private:
374 const Node condition; ///< Condition to be satisfied
375 std::vector<Node> code; ///< Code to execute
376};
377
378/// A general purpose register
379class GprNode final {
380public:
381 explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
382
383 u32 GetIndex() const {
384 return static_cast<u32>(index);
385 }
386
387private:
388 const Tegra::Shader::Register index;
389};
390
391/// A 32-bits value that represents an immediate value
392class ImmediateNode final {
393public:
394 explicit constexpr ImmediateNode(u32 value) : value{value} {}
395
396 u32 GetValue() const {
397 return value;
398 }
399
400private:
401 const u32 value;
402};
403
404/// One of Maxwell's internal flags
405class InternalFlagNode final {
406public:
407 explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
408
409 InternalFlag GetFlag() const {
410 return flag;
411 }
412
413private:
414 const InternalFlag flag;
415};
416
417/// A predicate register, it can be negated without additional nodes
418class PredicateNode final {
419public:
420 explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
421 : index{index}, negated{negated} {}
422
423 Tegra::Shader::Pred GetIndex() const {
424 return index;
425 }
426
427 bool IsNegated() const {
428 return negated;
429 }
430
431private:
432 const Tegra::Shader::Pred index;
433 const bool negated;
434};
435
436/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
437class AbufNode final {
438public:
439 explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
440 const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
441 : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
442
443 explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
444 Node buffer = {})
445 : input_mode{}, buffer{buffer}, index{index}, element{element} {}
446
447 Tegra::Shader::IpaMode GetInputMode() const {
448 return input_mode;
449 }
450
451 Tegra::Shader::Attribute::Index GetIndex() const {
452 return index;
453 }
454
455 u32 GetElement() const {
456 return element;
457 }
458
459 Node GetBuffer() const {
460 return buffer;
461 }
462
463private:
464 const Tegra::Shader::IpaMode input_mode;
465 const Node buffer;
466 const Tegra::Shader::Attribute::Index index;
467 const u32 element;
468};
469
470/// Constant buffer node, usually mapped to uniform buffers in GLSL
471class CbufNode final {
472public:
473 explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {}
474
475 u32 GetIndex() const {
476 return index;
477 }
478
479 Node GetOffset() const {
480 return offset;
481 }
482
483private:
484 const u32 index;
485 const Node offset;
486};
487
488/// Local memory node
489class LmemNode final {
490public:
491 explicit constexpr LmemNode(Node address) : address{address} {}
492
493 Node GetAddress() const {
494 return address;
495 }
496
497private:
498 const Node address;
499};
500
501/// Global memory node
502class GmemNode final {
503public:
504 explicit constexpr GmemNode(Node real_address, Node base_address,
505 const GlobalMemoryBase& descriptor)
506 : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
507
508 Node GetRealAddress() const {
509 return real_address;
510 }
511
512 Node GetBaseAddress() const {
513 return base_address;
514 }
515
516 const GlobalMemoryBase& GetDescriptor() const {
517 return descriptor;
518 }
519
520private:
521 const Node real_address;
522 const Node base_address;
523 const GlobalMemoryBase descriptor;
524};
525
526/// Commentary, can be dropped
527class CommentNode final {
528public:
529 explicit CommentNode(std::string text) : text{std::move(text)} {}
530
531 const std::string& GetText() const {
532 return text;
533 }
534
535private:
536 std::string text;
537};
538
539class ShaderIR final {
540public:
541 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
542 : program_code{program_code}, main_offset{main_offset} {
543
544 Decode();
545 }
546
547 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
548 return basic_blocks;
549 }
550
551 const std::set<u32>& GetRegisters() const {
552 return used_registers;
553 }
554
555 const std::set<Tegra::Shader::Pred>& GetPredicates() const {
556 return used_predicates;
557 }
558
559 const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
560 GetInputAttributes() const {
561 return used_input_attributes;
562 }
563
564 const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
565 return used_output_attributes;
566 }
567
568 const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
569 return used_cbufs;
570 }
571
572 const std::set<Sampler>& GetSamplers() const {
573 return used_samplers;
574 }
575
576 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
577 const {
578 return used_clip_distances;
579 }
580
581 const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
582 return used_global_memory_bases;
583 }
584
585 std::size_t GetLength() const {
586 return static_cast<std::size_t>(coverage_end * sizeof(u64));
587 }
588
589 const Tegra::Shader::Header& GetHeader() const {
590 return header;
591 }
592
593private:
594 void Decode();
595
596 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
597
598 NodeBlock DecodeRange(u32 begin, u32 end);
599
600 /**
601 * Decodes a single instruction from Tegra to IR.
602 * @param bb Basic block where the nodes will be written to.
603 * @param pc Program counter. Offset to decode.
604 * @return Next address to decode.
605 */
606 u32 DecodeInstr(NodeBlock& bb, u32 pc);
607
608 u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
609 u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
610 u32 DecodeBfe(NodeBlock& bb, u32 pc);
611 u32 DecodeBfi(NodeBlock& bb, u32 pc);
612 u32 DecodeShift(NodeBlock& bb, u32 pc);
613 u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
614 u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
615 u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
616 u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
617 u32 DecodeFfma(NodeBlock& bb, u32 pc);
618 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
619 u32 DecodeConversion(NodeBlock& bb, u32 pc);
620 u32 DecodeMemory(NodeBlock& bb, u32 pc);
621 u32 DecodeTexture(NodeBlock& bb, u32 pc);
622 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
623 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
624 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
625 u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
626 u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
627 u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
628 u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
629 u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
630 u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
631 u32 DecodeVideo(NodeBlock& bb, u32 pc);
632 u32 DecodeXmad(NodeBlock& bb, u32 pc);
633 u32 DecodeOther(NodeBlock& bb, u32 pc);
634
635 /// Internalizes node's data and returns a managed pointer to a clone of that node
636 Node StoreNode(NodeData&& node_data);
637
638 /// Creates a conditional node
639 Node Conditional(Node condition, std::vector<Node>&& code);
640 /// Creates a commentary
641 Node Comment(const std::string& text);
642 /// Creates an u32 immediate
643 Node Immediate(u32 value);
644 /// Creates a s32 immediate
645 Node Immediate(s32 value) {
646 return Immediate(static_cast<u32>(value));
647 }
648 /// Creates a f32 immediate
649 Node Immediate(f32 value) {
650 u32 integral;
651 std::memcpy(&integral, &value, sizeof(u32));
652 return Immediate(integral);
653 }
654
655 /// Generates a node for a passed register.
656 Node GetRegister(Tegra::Shader::Register reg);
657 /// Generates a node representing a 19-bit immediate value
658 Node GetImmediate19(Tegra::Shader::Instruction instr);
659 /// Generates a node representing a 32-bit immediate value
660 Node GetImmediate32(Tegra::Shader::Instruction instr);
661 /// Generates a node representing a constant buffer
662 Node GetConstBuffer(u64 index, u64 offset);
663 /// Generates a node representing a constant buffer with a variadic offset
664 Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
665 /// Generates a node for a passed predicate. It can be optionally negated
666 Node GetPredicate(u64 pred, bool negated = false);
667 /// Generates a predicate node for an immediate true or false value
668 Node GetPredicate(bool immediate);
669 /// Generates a node representing an input attribute. Keeps track of used attributes.
670 Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
671 const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
672 /// Generates a node representing an output attribute. Keeps track of used attributes.
673 Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
674 /// Generates a node representing an internal flag
675 Node GetInternalFlag(InternalFlag flag, bool negated = false);
676 /// Generates a node representing a local memory address
677 Node GetLocalMemory(Node address);
678 /// Generates a temporal, internally it uses a post-RZ register
679 Node GetTemporal(u32 id);
680
681 /// Sets a register. src value must be a number-evaluated node.
682 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
683 /// Sets a predicate. src value must be a bool-evaluated node
684 void SetPredicate(NodeBlock& bb, u64 dest, Node src);
685 /// Sets an internal flag. src value must be a bool-evaluated node
686 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
687 /// Sets a local memory address. address and value must be a number-evaluated node
688 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
689 /// Sets a temporal. Internally it uses a post-RZ register
690 void SetTemporal(NodeBlock& bb, u32 id, Node value);
691
692 /// Sets internal flags from a float
693 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
694 /// Sets internal flags from an integer
695 void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
696
697 /// Conditionally absolute/negated float. Absolute is applied first
698 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
699 /// Conditionally saturates a float
700 Node GetSaturatedFloat(Node value, bool saturate = true);
701
702 /// Converts an integer to different sizes.
703 Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
704 /// Conditionally absolute/negated integer. Absolute is applied first
705 Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
706
707 /// Unpacks a half immediate from an instruction
708 Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
709 /// Merges a half pair into another value
710 Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
711 /// Conditionally absolute/negated half float pair. Absolute is applied first
712 Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
713
714 /// Returns a predicate comparing two floats
715 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
716 /// Returns a predicate comparing two integers
717 Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
718 Node op_a, Node op_b);
719 /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
720 Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
721 const MetaHalfArithmetic& meta, Node op_a, Node op_b);
722
723 /// Returns a predicate combiner operation
724 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
725
726 /// Returns a condition code evaluated from internal flags
727 Node GetConditionCode(Tegra::Shader::ConditionCode cc);
728
729 /// Accesses a texture sampler
730 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
731 Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
732
733 /// Extracts a sequence of bits from a node
734 Node BitfieldExtract(Node value, u32 offset, u32 bits);
735
736 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
737 const Node4& components);
738
739 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
740 const Node4& components);
741 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
742 const Node4& components);
743
744 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
745 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
746 bool is_array, bool is_aoffi);
747
748 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
749 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
750 bool is_array);
751
752 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
753 bool depth_compare, bool is_array, bool is_aoffi);
754
755 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
756 bool is_array);
757
758 std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
759 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
760 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
761
762 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
763
764 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
765 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
766 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
767
768 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
769 u64 byte_height);
770
771 void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
772 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
773 Tegra::Shader::PredicateResultMode predicate_mode,
774 Tegra::Shader::Pred predicate, bool sets_cc);
775 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
776 Node op_c, Node imm_lut, bool sets_cc);
777
778 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
779
780 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
781
782 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
783
784 template <typename... T>
785 Node Operation(OperationCode code, const T*... operands) {
786 return StoreNode(OperationNode(code, operands...));
787 }
788
789 template <typename... T>
790 Node Operation(OperationCode code, Meta&& meta, const T*... operands) {
791 return StoreNode(OperationNode(code, std::move(meta), operands...));
792 }
793
794 template <typename... T>
795 Node Operation(OperationCode code, std::vector<Node>&& operands) {
796 return StoreNode(OperationNode(code, std::move(operands)));
797 }
798
799 template <typename... T>
800 Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
801 return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
802 }
803
804 template <typename... T>
805 Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) {
806 return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...));
807 }
808
809 template <typename... T>
810 Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) {
811 return StoreNode(
812 OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...));
813 }
814
815 static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
816
817 const ProgramCode& program_code;
818 const u32 main_offset;
819
820 u32 coverage_begin{};
821 u32 coverage_end{};
822 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
823
824 std::map<u32, NodeBlock> basic_blocks;
825 NodeBlock global_code;
826
827 std::vector<std::unique_ptr<NodeData>> stored_nodes;
828
829 std::set<u32> used_registers;
830 std::set<Tegra::Shader::Pred> used_predicates;
831 std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
832 used_input_attributes;
833 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
834 std::map<u32, ConstBuffer> used_cbufs;
835 std::set<Sampler> used_samplers;
836 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
837 std::set<GlobalMemoryBase> used_global_memory_bases;
838
839 Tegra::Shader::Header header;
840};
841
842} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
new file mode 100644
index 000000000..4505667ff
--- /dev/null
+++ b/src/video_core/shader/track.cpp
@@ -0,0 +1,102 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <variant>
8
9#include "common/common_types.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14namespace {
15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
16 OperationCode operation_code) {
17 for (; cursor >= 0; --cursor) {
18 const Node node = code.at(cursor);
19 if (const auto operation = std::get_if<OperationNode>(node)) {
20 if (operation->GetCode() == operation_code)
21 return {node, cursor};
22 }
23 if (const auto conditional = std::get_if<ConditionalNode>(node)) {
24 const auto& conditional_code = conditional->GetCode();
25 const auto [found, internal_cursor] = FindOperation(
26 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
27 if (found)
28 return {found, cursor};
29 }
30 }
31 return {};
32}
33} // namespace
34
35Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
36 if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
37 // Cbuf found, but it has to be immediate
38 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
39 }
40 if (const auto gpr = std::get_if<GprNode>(tracked)) {
41 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
42 return nullptr;
43 }
44 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
45 // register that it uses as operand
46 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
47 if (!source) {
48 return nullptr;
49 }
50 return TrackCbuf(source, code, new_cursor);
51 }
52 if (const auto operation = std::get_if<OperationNode>(tracked)) {
53 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
54 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
55 // Cbuf found in operand
56 return found;
57 }
58 }
59 return nullptr;
60 }
61 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
62 const auto& conditional_code = conditional->GetCode();
63 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
64 }
65 return nullptr;
66}
67
68std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
69 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
70 // that it uses as operand
71 const auto [found, found_cursor] =
72 TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
73 if (!found) {
74 return {};
75 }
76 if (const auto immediate = std::get_if<ImmediateNode>(found)) {
77 return immediate->GetValue();
78 }
79 return {};
80}
81
82std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
83 s64 cursor) {
84 for (; cursor >= 0; --cursor) {
85 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
86 if (!found_node) {
87 return {};
88 }
89 const auto operation = std::get_if<OperationNode>(found_node);
90 ASSERT(operation);
91
92 const auto& target = (*operation)[0];
93 if (const auto gpr_target = std::get_if<GprNode>(target)) {
94 if (gpr_target->GetIndex() == tracked->GetIndex()) {
95 return {(*operation)[1], new_cursor};
96 }
97 }
98 }
99 return {};
100}
101
102} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1a344229f..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -50,6 +50,24 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
50 } 50 }
51} 51}
52 52
53bool SurfaceTargetIsArray(SurfaceTarget target) {
54 switch (target) {
55 case SurfaceTarget::Texture1D:
56 case SurfaceTarget::Texture2D:
57 case SurfaceTarget::Texture3D:
58 case SurfaceTarget::TextureCubemap:
59 return false;
60 case SurfaceTarget::Texture1DArray:
61 case SurfaceTarget::Texture2DArray:
62 case SurfaceTarget::TextureCubeArray:
63 return true;
64 default:
65 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
66 UNREACHABLE();
67 return false;
68 }
69}
70
53PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { 71PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
54 switch (format) { 72 switch (format) {
55 case Tegra::DepthFormat::S8_Z24_UNORM: 73 case Tegra::DepthFormat::S8_Z24_UNORM:
@@ -71,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
71 89
72PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
73 switch (format) { 91 switch (format) {
74 // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
75 // gamma.
76 case Tegra::RenderTargetFormat::RGBA8_SRGB: 92 case Tegra::RenderTargetFormat::RGBA8_SRGB:
77 return PixelFormat::RGBA8_SRGB; 93 return PixelFormat::RGBA8_SRGB;
78 case Tegra::RenderTargetFormat::RGBA8_UNORM: 94 case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -408,6 +424,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
408 switch (format) { 424 switch (format) {
409 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 425 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
410 return PixelFormat::ABGR8U; 426 return PixelFormat::ABGR8U;
427 case Tegra::FramebufferConfig::PixelFormat::BGRA8:
428 return PixelFormat::BGRA8;
411 default: 429 default:
412 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 430 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
413 UNREACHABLE(); 431 UNREACHABLE();
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index c2259c3c2..b783e4b27 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -109,8 +109,7 @@ enum class SurfaceType {
109 ColorTexture = 0, 109 ColorTexture = 0,
110 Depth = 1, 110 Depth = 1,
111 DepthStencil = 2, 111 DepthStencil = 2,
112 Fill = 3, 112 Invalid = 3,
113 Invalid = 4,
114}; 113};
115 114
116enum class SurfaceTarget { 115enum class SurfaceTarget {
@@ -441,6 +440,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
441 440
442bool SurfaceTargetIsLayered(SurfaceTarget target); 441bool SurfaceTargetIsLayered(SurfaceTarget target);
443 442
443bool SurfaceTargetIsArray(SurfaceTarget target);
444
444PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format); 445PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
445 446
446PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format); 447PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp
new file mode 100644
index 000000000..e96eba7cc
--- /dev/null
+++ b/src/video_core/texture_cache.cpp
@@ -0,0 +1,386 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/alignment.h"
6#include "common/assert.h"
7#include "common/cityhash.h"
8#include "common/common_types.h"
9#include "core/core.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache.h"
12#include "video_core/textures/decoders.h"
13#include "video_core/textures/texture.h"
14
15namespace VideoCommon {
16
17using VideoCore::Surface::SurfaceTarget;
18
19using VideoCore::Surface::ComponentTypeFromDepthFormat;
20using VideoCore::Surface::ComponentTypeFromRenderTarget;
21using VideoCore::Surface::ComponentTypeFromTexture;
22using VideoCore::Surface::PixelFormatFromDepthFormat;
23using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
24using VideoCore::Surface::PixelFormatFromTextureFormat;
25using VideoCore::Surface::SurfaceTargetFromTextureType;
26
27constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
28 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
29}
30
31SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
32 const Tegra::Texture::FullTextureInfo& config) {
33 SurfaceParams params;
34 params.is_tiled = config.tic.IsTiled();
35 params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
36 params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
37 params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
38 params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
39 params.pixel_format =
40 PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false);
41 params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
42 params.type = GetFormatType(params.pixel_format);
43 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
44 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
45 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
46 params.depth = config.tic.Depth();
47 if (params.target == SurfaceTarget::TextureCubemap ||
48 params.target == SurfaceTarget::TextureCubeArray) {
49 params.depth *= 6;
50 }
51 params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
52 params.unaligned_height = config.tic.Height();
53 params.num_levels = config.tic.max_mip_level + 1;
54
55 params.CalculateCachedValues();
56 return params;
57}
58
59SurfaceParams SurfaceParams::CreateForDepthBuffer(
60 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
61 u32 block_width, u32 block_height, u32 block_depth,
62 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
63 SurfaceParams params;
64 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
65 params.block_width = 1 << std::min(block_width, 5U);
66 params.block_height = 1 << std::min(block_height, 5U);
67 params.block_depth = 1 << std::min(block_depth, 5U);
68 params.tile_width_spacing = 1;
69 params.pixel_format = PixelFormatFromDepthFormat(format);
70 params.component_type = ComponentTypeFromDepthFormat(format);
71 params.type = GetFormatType(params.pixel_format);
72 params.width = zeta_width;
73 params.height = zeta_height;
74 params.unaligned_height = zeta_height;
75 params.target = SurfaceTarget::Texture2D;
76 params.depth = 1;
77 params.num_levels = 1;
78
79 params.CalculateCachedValues();
80 return params;
81}
82
83SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
84 const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
85 SurfaceParams params;
86 params.is_tiled =
87 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
88 params.block_width = 1 << config.memory_layout.block_width;
89 params.block_height = 1 << config.memory_layout.block_height;
90 params.block_depth = 1 << config.memory_layout.block_depth;
91 params.tile_width_spacing = 1;
92 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
93 params.component_type = ComponentTypeFromRenderTarget(config.format);
94 params.type = GetFormatType(params.pixel_format);
95 if (params.is_tiled) {
96 params.width = config.width;
97 } else {
98 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
99 params.pitch = config.width;
100 params.width = params.pitch / bpp;
101 }
102 params.height = config.height;
103 params.depth = 1;
104 params.unaligned_height = config.height;
105 params.target = SurfaceTarget::Texture2D;
106 params.num_levels = 1;
107
108 params.CalculateCachedValues();
109 return params;
110}
111
112SurfaceParams SurfaceParams::CreateForFermiCopySurface(
113 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
114 SurfaceParams params{};
115 params.is_tiled = !config.linear;
116 params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
117 params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
118 params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
119 params.tile_width_spacing = 1;
120 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
121 params.component_type = ComponentTypeFromRenderTarget(config.format);
122 params.type = GetFormatType(params.pixel_format);
123 params.width = config.width;
124 params.height = config.height;
125 params.unaligned_height = config.height;
126 // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
127 params.target = SurfaceTarget::Texture2D;
128 params.depth = 1;
129 params.num_levels = 1;
130
131 params.CalculateCachedValues();
132 return params;
133}
134
135u32 SurfaceParams::GetMipWidth(u32 level) const {
136 return std::max(1U, width >> level);
137}
138
139u32 SurfaceParams::GetMipHeight(u32 level) const {
140 return std::max(1U, height >> level);
141}
142
143u32 SurfaceParams::GetMipDepth(u32 level) const {
144 return IsLayered() ? depth : std::max(1U, depth >> level);
145}
146
147bool SurfaceParams::IsLayered() const {
148 switch (target) {
149 case SurfaceTarget::Texture1DArray:
150 case SurfaceTarget::Texture2DArray:
151 case SurfaceTarget::TextureCubeArray:
152 case SurfaceTarget::TextureCubemap:
153 return true;
154 default:
155 return false;
156 }
157}
158
159u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
160 // Auto block resizing algorithm from:
161 // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
162 if (level == 0) {
163 return block_height;
164 }
165 const u32 height{GetMipHeight(level)};
166 const u32 default_block_height{GetDefaultBlockHeight(pixel_format)};
167 const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height};
168 u32 block_height = 16;
169 while (block_height > 1 && blocks_in_y <= block_height * 4) {
170 block_height >>= 1;
171 }
172 return block_height;
173}
174
175u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
176 if (level == 0)
177 return block_depth;
178 if (target != SurfaceTarget::Texture3D)
179 return 1;
180
181 const u32 depth{GetMipDepth(level)};
182 u32 block_depth = 32;
183 while (block_depth > 1 && depth * 2 <= block_depth) {
184 block_depth >>= 1;
185 }
186 if (block_depth == 32 && GetMipBlockHeight(level) >= 4) {
187 return 16;
188 }
189 return block_depth;
190}
191
192std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
193 std::size_t offset = 0;
194 for (u32 i = 0; i < level; i++) {
195 offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false);
196 }
197 return offset;
198}
199
200std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
201 std::size_t offset = 0;
202 for (u32 i = 0; i < level; i++) {
203 offset += GetInnerMipmapMemorySize(i, true, false, false);
204 }
205 return offset;
206}
207
208std::size_t SurfaceParams::GetGuestLayerSize() const {
209 return GetInnerMemorySize(false, true, false);
210}
211
212std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
213 return GetInnerMipmapMemorySize(level, true, IsLayered(), false);
214}
215
216bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const {
217 if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) !=
218 std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format,
219 view_params.component_type, view_params.type)) {
220 return false;
221 }
222
223 const SurfaceTarget view_target{view_params.target};
224 if (view_target == target) {
225 return true;
226 }
227
228 switch (target) {
229 case SurfaceTarget::Texture1D:
230 case SurfaceTarget::Texture2D:
231 case SurfaceTarget::Texture3D:
232 return false;
233 case SurfaceTarget::Texture1DArray:
234 return view_target == SurfaceTarget::Texture1D;
235 case SurfaceTarget::Texture2DArray:
236 return view_target == SurfaceTarget::Texture2D;
237 case SurfaceTarget::TextureCubemap:
238 return view_target == SurfaceTarget::Texture2D ||
239 view_target == SurfaceTarget::Texture2DArray;
240 case SurfaceTarget::TextureCubeArray:
241 return view_target == SurfaceTarget::Texture2D ||
242 view_target == SurfaceTarget::Texture2DArray ||
243 view_target == SurfaceTarget::TextureCubemap;
244 default:
245 UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target));
246 return false;
247 }
248}
249
250bool SurfaceParams::IsPixelFormatZeta() const {
251 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
252 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
253}
254
255void SurfaceParams::CalculateCachedValues() {
256 guest_size_in_bytes = GetInnerMemorySize(false, false, false);
257
258 // ASTC is uncompressed in software, in emulated as RGBA8
259 if (IsPixelFormatASTC(pixel_format)) {
260 host_size_in_bytes = width * height * depth * 4;
261 } else {
262 host_size_in_bytes = GetInnerMemorySize(true, false, false);
263 }
264
265 switch (target) {
266 case SurfaceTarget::Texture1D:
267 case SurfaceTarget::Texture2D:
268 case SurfaceTarget::Texture3D:
269 num_layers = 1;
270 break;
271 case SurfaceTarget::Texture1DArray:
272 case SurfaceTarget::Texture2DArray:
273 case SurfaceTarget::TextureCubemap:
274 case SurfaceTarget::TextureCubeArray:
275 num_layers = depth;
276 break;
277 default:
278 UNREACHABLE();
279 }
280}
281
282std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
283 bool uncompressed) const {
284 const bool tiled{as_host_size ? false : is_tiled};
285 const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
286 const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
287 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)};
288 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)};
289 const u32 depth{layer_only ? 1U : GetMipDepth(level)};
290 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height,
291 depth, GetMipBlockHeight(level), GetMipBlockDepth(level));
292}
293
294std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only,
295 bool uncompressed) const {
296 std::size_t size = 0;
297 for (u32 level = 0; level < num_levels; ++level) {
298 size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed);
299 }
300 if (!as_host_size && is_tiled) {
301 size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
302 }
303 return size;
304}
305
306std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const {
307 std::map<u64, std::pair<u32, u32>> view_offset_map;
308 switch (target) {
309 case SurfaceTarget::Texture1D:
310 case SurfaceTarget::Texture2D:
311 case SurfaceTarget::Texture3D: {
312 constexpr u32 layer = 0;
313 for (u32 level = 0; level < num_levels; ++level) {
314 const std::size_t offset{GetGuestMipmapLevelOffset(level)};
315 view_offset_map.insert({offset, {layer, level}});
316 }
317 break;
318 }
319 case SurfaceTarget::Texture1DArray:
320 case SurfaceTarget::Texture2DArray:
321 case SurfaceTarget::TextureCubemap:
322 case SurfaceTarget::TextureCubeArray: {
323 const std::size_t layer_size{GetGuestLayerSize()};
324 for (u32 level = 0; level < num_levels; ++level) {
325 const std::size_t level_offset{GetGuestMipmapLevelOffset(level)};
326 for (u32 layer = 0; layer < num_layers; ++layer) {
327 const auto layer_offset{static_cast<std::size_t>(layer_size * layer)};
328 const std::size_t offset{level_offset + layer_offset};
329 view_offset_map.insert({offset, {layer, level}});
330 }
331 }
332 break;
333 }
334 default:
335 UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target));
336 }
337 return view_offset_map;
338}
339
340bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const {
341 return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) &&
342 IsInBounds(view_params, layer, level);
343}
344
345bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const {
346 return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level);
347}
348
349bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const {
350 if (view_params.target != SurfaceTarget::Texture3D) {
351 return true;
352 }
353 return view_params.depth == GetMipDepth(level);
354}
355
356bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const {
357 return layer + view_params.num_layers <= num_layers &&
358 level + view_params.num_levels <= num_levels;
359}
360
361std::size_t HasheableSurfaceParams::Hash() const {
362 return static_cast<std::size_t>(
363 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
364}
365
366bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const {
367 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
368 height, depth, pitch, unaligned_height, num_levels, pixel_format,
369 component_type, type, target) ==
370 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
371 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
372 rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type,
373 rhs.type, rhs.target);
374}
375
376std::size_t ViewKey::Hash() const {
377 return static_cast<std::size_t>(
378 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
379}
380
381bool ViewKey::operator==(const ViewKey& rhs) const {
382 return std::tie(base_layer, num_layers, base_level, num_levels) ==
383 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels);
384}
385
386} // namespace VideoCommon
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h
new file mode 100644
index 000000000..041551691
--- /dev/null
+++ b/src/video_core/texture_cache.h
@@ -0,0 +1,586 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <memory>
9#include <set>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13
14#include <boost/icl/interval_map.hpp>
15#include <boost/range/iterator_range.hpp>
16
17#include "common/assert.h"
18#include "common/common_types.h"
19#include "core/memory.h"
20#include "video_core/engines/fermi_2d.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/gpu.h"
23#include "video_core/rasterizer_interface.h"
24#include "video_core/surface.h"
25
26namespace Core {
27class System;
28}
29
30namespace Tegra::Texture {
31struct FullTextureInfo;
32}
33
34namespace VideoCore {
35class RasterizerInterface;
36}
37
38namespace VideoCommon {
39
40class HasheableSurfaceParams {
41public:
42 std::size_t Hash() const;
43
44 bool operator==(const HasheableSurfaceParams& rhs) const;
45
46protected:
47 // Avoid creation outside of a managed environment.
48 HasheableSurfaceParams() = default;
49
50 bool is_tiled;
51 u32 block_width;
52 u32 block_height;
53 u32 block_depth;
54 u32 tile_width_spacing;
55 u32 width;
56 u32 height;
57 u32 depth;
58 u32 pitch;
59 u32 unaligned_height;
60 u32 num_levels;
61 VideoCore::Surface::PixelFormat pixel_format;
62 VideoCore::Surface::ComponentType component_type;
63 VideoCore::Surface::SurfaceType type;
64 VideoCore::Surface::SurfaceTarget target;
65};
66
67class SurfaceParams final : public HasheableSurfaceParams {
68public:
69 /// Creates SurfaceCachedParams from a texture configuration.
70 static SurfaceParams CreateForTexture(Core::System& system,
71 const Tegra::Texture::FullTextureInfo& config);
72
73 /// Creates SurfaceCachedParams for a depth buffer configuration.
74 static SurfaceParams CreateForDepthBuffer(
75 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
76 u32 block_width, u32 block_height, u32 block_depth,
77 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
78
79 /// Creates SurfaceCachedParams from a framebuffer configuration.
80 static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
81
82 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
83 static SurfaceParams CreateForFermiCopySurface(
84 const Tegra::Engines::Fermi2D::Regs::Surface& config);
85
86 bool IsTiled() const {
87 return is_tiled;
88 }
89
90 u32 GetBlockWidth() const {
91 return block_width;
92 }
93
94 u32 GetTileWidthSpacing() const {
95 return tile_width_spacing;
96 }
97
98 u32 GetWidth() const {
99 return width;
100 }
101
102 u32 GetHeight() const {
103 return height;
104 }
105
106 u32 GetDepth() const {
107 return depth;
108 }
109
110 u32 GetPitch() const {
111 return pitch;
112 }
113
114 u32 GetNumLevels() const {
115 return num_levels;
116 }
117
118 VideoCore::Surface::PixelFormat GetPixelFormat() const {
119 return pixel_format;
120 }
121
122 VideoCore::Surface::ComponentType GetComponentType() const {
123 return component_type;
124 }
125
126 VideoCore::Surface::SurfaceTarget GetTarget() const {
127 return target;
128 }
129
130 VideoCore::Surface::SurfaceType GetType() const {
131 return type;
132 }
133
134 std::size_t GetGuestSizeInBytes() const {
135 return guest_size_in_bytes;
136 }
137
138 std::size_t GetHostSizeInBytes() const {
139 return host_size_in_bytes;
140 }
141
142 u32 GetNumLayers() const {
143 return num_layers;
144 }
145
146 /// Returns the width of a given mipmap level.
147 u32 GetMipWidth(u32 level) const;
148
149 /// Returns the height of a given mipmap level.
150 u32 GetMipHeight(u32 level) const;
151
152 /// Returns the depth of a given mipmap level.
153 u32 GetMipDepth(u32 level) const;
154
155 /// Returns true if these parameters are from a layered surface.
156 bool IsLayered() const;
157
158 /// Returns the block height of a given mipmap level.
159 u32 GetMipBlockHeight(u32 level) const;
160
161 /// Returns the block depth of a given mipmap level.
162 u32 GetMipBlockDepth(u32 level) const;
163
164 /// Returns the offset in bytes in guest memory of a given mipmap level.
165 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
166
167 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
168 std::size_t GetHostMipmapLevelOffset(u32 level) const;
169
170 /// Returns the size of a layer in bytes in guest memory.
171 std::size_t GetGuestLayerSize() const;
172
173 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
174 std::size_t GetHostLayerSize(u32 level) const;
175
176 /// Returns true if another surface can be familiar with this. This is a loosely defined term
177 /// that reflects the possibility of these two surface parameters potentially being part of a
178 /// bigger superset.
179 bool IsFamiliar(const SurfaceParams& view_params) const;
180
181 /// Returns true if the pixel format is a depth and/or stencil format.
182 bool IsPixelFormatZeta() const;
183
184 /// Creates a map that redirects an address difference to a layer and mipmap level.
185 std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;
186
187 /// Returns true if the passed surface view parameters is equal or a valid subset of this.
188 bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;
189
190private:
191 /// Calculates values that can be deduced from HasheableSurfaceParams.
192 void CalculateCachedValues();
193
194 /// Returns the size of a given mipmap level.
195 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
196 bool uncompressed) const;
197
198 /// Returns the size of all mipmap levels and aligns as needed.
199 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;
200
201 /// Returns true if the passed view width and height match the size of this params in a given
202 /// mipmap level.
203 bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;
204
205 /// Returns true if the passed view depth match the size of this params in a given mipmap level.
206 bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;
207
208 /// Returns true if the passed view layers and mipmap levels are in bounds.
209 bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;
210
211 std::size_t guest_size_in_bytes;
212 std::size_t host_size_in_bytes;
213 u32 num_layers;
214};
215
216struct ViewKey {
217 std::size_t Hash() const;
218
219 bool operator==(const ViewKey& rhs) const;
220
221 u32 base_layer{};
222 u32 num_layers{};
223 u32 base_level{};
224 u32 num_levels{};
225};
226
227} // namespace VideoCommon
228
229namespace std {
230
231template <>
232struct hash<VideoCommon::SurfaceParams> {
233 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
234 return k.Hash();
235 }
236};
237
238template <>
239struct hash<VideoCommon::ViewKey> {
240 std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
241 return k.Hash();
242 }
243};
244
245} // namespace std
246
247namespace VideoCommon {
248
249template <typename TView, typename TExecutionContext>
250class SurfaceBase {
251 static_assert(std::is_trivially_copyable_v<TExecutionContext>);
252
253public:
254 virtual void LoadBuffer() = 0;
255
256 virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;
257
258 virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
259
260 TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
261 if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
262 // It can't be a view if it's in a prior address.
263 return {};
264 }
265
266 const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
267 const auto it{view_offset_map.find(relative_offset)};
268 if (it == view_offset_map.end()) {
269 // Couldn't find an aligned view.
270 return {};
271 }
272 const auto [layer, level] = it->second;
273
274 if (!params.IsViewValid(view_params, layer, level)) {
275 return {};
276 }
277
278 return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
279 }
280
281 VAddr GetCpuAddr() const {
282 ASSERT(is_registered);
283 return cpu_addr;
284 }
285
286 u8* GetHostPtr() const {
287 ASSERT(is_registered);
288 return host_ptr;
289 }
290
291 CacheAddr GetCacheAddr() const {
292 ASSERT(is_registered);
293 return cache_addr;
294 }
295
296 std::size_t GetSizeInBytes() const {
297 return params.GetGuestSizeInBytes();
298 }
299
300 void MarkAsModified(bool is_modified_) {
301 is_modified = is_modified_;
302 }
303
304 const SurfaceParams& GetSurfaceParams() const {
305 return params;
306 }
307
308 TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
309 TView* view{TryGetView(view_addr, view_params)};
310 ASSERT(view != nullptr);
311 return view;
312 }
313
314 void Register(VAddr cpu_addr_, u8* host_ptr_) {
315 ASSERT(!is_registered);
316 is_registered = true;
317 cpu_addr = cpu_addr_;
318 host_ptr = host_ptr_;
319 cache_addr = ToCacheAddr(host_ptr_);
320 }
321
322 void Register(VAddr cpu_addr_) {
323 Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
324 }
325
326 void Unregister() {
327 ASSERT(is_registered);
328 is_registered = false;
329 }
330
331 bool IsRegistered() const {
332 return is_registered;
333 }
334
335protected:
336 explicit SurfaceBase(const SurfaceParams& params)
337 : params{params}, view_offset_map{params.CreateViewOffsetMap()} {}
338
339 ~SurfaceBase() = default;
340
341 virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;
342
343 bool IsModified() const {
344 return is_modified;
345 }
346
347 const SurfaceParams params;
348
349private:
350 TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
351 const ViewKey key{base_layer, num_layers, base_level, num_levels};
352 const auto [entry, is_cache_miss] = views.try_emplace(key);
353 auto& view{entry->second};
354 if (is_cache_miss) {
355 view = CreateView(key);
356 }
357 return view.get();
358 }
359
360 const std::map<u64, std::pair<u32, u32>> view_offset_map;
361
362 VAddr cpu_addr{};
363 u8* host_ptr{};
364 CacheAddr cache_addr{};
365 bool is_modified{};
366 bool is_registered{};
367 std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
368};
369
370template <typename TSurface, typename TView, typename TExecutionContext>
371class TextureCache {
372 static_assert(std::is_trivially_copyable_v<TExecutionContext>);
373 using ResultType = std::tuple<TView*, TExecutionContext>;
374 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
375 using IntervalType = typename IntervalMap::interval_type;
376
377public:
378 void InvalidateRegion(CacheAddr addr, std::size_t size) {
379 for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
380 if (!surface->IsRegistered()) {
381 // Skip duplicates
382 continue;
383 }
384 Unregister(surface);
385 }
386 }
387
388 ResultType GetTextureSurface(TExecutionContext exctx,
389 const Tegra::Texture::FullTextureInfo& config) {
390 auto& memory_manager{system.GPU().MemoryManager()};
391 const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
392 if (!cpu_addr) {
393 return {{}, exctx};
394 }
395 const auto params{SurfaceParams::CreateForTexture(system, config)};
396 return GetSurfaceView(exctx, *cpu_addr, params, true);
397 }
398
399 ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
400 const auto& regs{system.GPU().Maxwell3D().regs};
401 if (!regs.zeta.Address() || !regs.zeta_enable) {
402 return {{}, exctx};
403 }
404
405 auto& memory_manager{system.GPU().MemoryManager()};
406 const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
407 if (!cpu_addr) {
408 return {{}, exctx};
409 }
410
411 const auto depth_params{SurfaceParams::CreateForDepthBuffer(
412 system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
413 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
414 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
415 return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
416 }
417
418 ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
419 bool preserve_contents) {
420 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
421
422 const auto& regs{system.GPU().Maxwell3D().regs};
423 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
424 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
425 return {{}, exctx};
426 }
427
428 auto& memory_manager{system.GPU().MemoryManager()};
429 const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
430 const auto cpu_addr{memory_manager.GpuToCpuAddress(
431 config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
432 if (!cpu_addr) {
433 return {{}, exctx};
434 }
435
436 return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
437 preserve_contents);
438 }
439
440 ResultType GetFermiSurface(TExecutionContext exctx,
441 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
442 const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
443 ASSERT(cpu_addr);
444 return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
445 true);
446 }
447
448 TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
449 const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
450 return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
451 }
452
453protected:
454 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
455 : system{system}, rasterizer{rasterizer} {}
456
457 ~TextureCache() = default;
458
459 virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
460 const SurfaceParams& params, bool preserve_contents,
461 const std::vector<TSurface*>& overlaps) = 0;
462
463 virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
464
465 void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
466 surface->Register(cpu_addr, host_ptr);
467 registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
468 rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
469 }
470
471 void Unregister(TSurface* surface) {
472 registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
473 rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
474 surface->Unregister();
475 }
476
477 TSurface* GetUncachedSurface(const SurfaceParams& params) {
478 if (TSurface* surface = TryGetReservedSurface(params); surface)
479 return surface;
480 // No reserved surface available, create a new one and reserve it
481 auto new_surface{CreateSurface(params)};
482 TSurface* surface{new_surface.get()};
483 ReserveSurface(params, std::move(new_surface));
484 return surface;
485 }
486
487 Core::System& system;
488
489private:
490 ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
491 bool preserve_contents) {
492 const auto host_ptr{Memory::GetPointer(cpu_addr)};
493 const auto cache_addr{ToCacheAddr(host_ptr)};
494 const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
495 if (overlaps.empty()) {
496 return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
497 }
498
499 if (overlaps.size() == 1) {
500 if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
501 return {view, exctx};
502 }
503
504 TView* fast_view;
505 std::tie(fast_view, exctx) =
506 TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);
507
508 for (TSurface* surface : overlaps) {
509 if (!fast_view) {
510 // Flush even when we don't care about the contents, to preserve memory not written
511 // by the new surface.
512 exctx = surface->FlushBuffer(exctx);
513 }
514 Unregister(surface);
515 }
516
517 if (fast_view) {
518 return {fast_view, exctx};
519 }
520
521 return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
522 }
523
524 ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
525 const SurfaceParams& params, bool preserve_contents) {
526 TSurface* new_surface{GetUncachedSurface(params)};
527 Register(new_surface, cpu_addr, host_ptr);
528 if (preserve_contents) {
529 exctx = LoadSurface(exctx, new_surface);
530 }
531 return {new_surface->GetView(cpu_addr, params), exctx};
532 }
533
534 TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
535 surface->LoadBuffer();
536 exctx = surface->UploadTexture(exctx);
537 surface->MarkAsModified(false);
538 return exctx;
539 }
540
541 std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
542 if (size == 0) {
543 return {};
544 }
545 const IntervalType interval{cache_addr, cache_addr + size};
546
547 std::vector<TSurface*> surfaces;
548 for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
549 surfaces.push_back(*pair.second.begin());
550 }
551 return surfaces;
552 }
553
554 void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
555 surface_reserve[params].push_back(std::move(surface));
556 }
557
558 TSurface* TryGetReservedSurface(const SurfaceParams& params) {
559 auto search{surface_reserve.find(params)};
560 if (search == surface_reserve.end()) {
561 return {};
562 }
563 for (auto& surface : search->second) {
564 if (!surface->IsRegistered()) {
565 return surface.get();
566 }
567 }
568 return {};
569 }
570
571 IntervalType GetSurfaceInterval(TSurface* surface) const {
572 return IntervalType::right_open(surface->GetCacheAddr(),
573 surface->GetCacheAddr() + surface->GetSizeInBytes());
574 }
575
576 VideoCore::RasterizerInterface& rasterizer;
577
578 IntervalMap registered_surfaces;
579
580 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
581 /// previously been used. This is to prevent surfaces from being constantly created and
582 /// destroyed when used with different surface parameters.
583 std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
584};
585
586} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..82050bd51
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,93 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/surface.h"
14#include "video_core/textures/astc.h"
15#include "video_core/textures/convert.h"
16
17namespace Tegra::Texture {
18
19using VideoCore::Surface::PixelFormat;
20
21template <bool reverse>
22void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
23 union S8Z24 {
24 BitField<0, 24, u32> z24;
25 BitField<24, 8, u32> s8;
26 };
27 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
28
29 union Z24S8 {
30 BitField<0, 8, u32> s8;
31 BitField<8, 24, u32> z24;
32 };
33 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
34
35 S8Z24 s8z24_pixel{};
36 Z24S8 z24s8_pixel{};
37 constexpr auto bpp{
38 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
39 for (std::size_t y = 0; y < height; ++y) {
40 for (std::size_t x = 0; x < width; ++x) {
41 const std::size_t offset{bpp * (y * width + x)};
42 if constexpr (reverse) {
43 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
44 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
45 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
46 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
47 } else {
48 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
49 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
50 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
51 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
52 }
53 }
54 }
55}
56
57static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
58 SwapS8Z24ToZ24S8<false>(data, width, height);
59}
60
61static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height);
63}
64
65void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
66 bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{};
70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data =
73 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
78 }
79}
80
81void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
82 bool convert_astc, bool convert_s8z24) {
83 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
84 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
85 static_cast<u32>(pixel_format));
86 UNREACHABLE();
87
88 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
89 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
90 }
91}
92
93} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..12542e71c
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,21 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore::Surface {
10enum class PixelFormat;
11}
12
13namespace Tegra::Texture {
14
15void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
19 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
20
21} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "core/memory.h"
10#include "video_core/gpu.h" 9#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -103,8 +102,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 102 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 103 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 104 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 105 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 106 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 107 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 108 }
110 pixel_base += stride_x; 109 pixel_base += stride_x;
@@ -154,7 +153,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 153 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 154 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 155 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 156 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 157 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 158 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 159 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
230 } 229 }
231} 230}
232 231
233void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 232void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
234 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 233 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
235 u32 block_depth, u32 width_spacing) { 234 u32 block_depth, u32 width_spacing) {
236 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 235 CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
237 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 236 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
238 bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, 237 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
239 block_height, block_depth, width_spacing); 238 width_spacing);
240} 239}
241 240
242std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 241std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
243 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 242 u32 width, u32 height, u32 depth, u32 block_height,
244 u32 block_height, u32 block_depth, u32 width_spacing) { 243 u32 block_depth, u32 width_spacing) {
245 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 244 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
246 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 245 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
247 width, height, depth, block_height, block_depth, width_spacing); 246 width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
249} 248}
250 249
251void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 250void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
252 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 251 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
253 u32 block_height) {
254 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 252 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
255 gob_size_x}; 253 gob_size_x};
256 for (u32 line = 0; line < subrect_height; ++line) { 254 for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
262 const u32 gob_address = 260 const u32 gob_address =
263 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 261 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
264 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 262 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
265 const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 263 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
266 const VAddr dest_addr = swizzled_data + swizzled_offset; 264 u8* dest_addr = swizzled_data + swizzled_offset;
267 265
268 Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); 266 std::memcpy(dest_addr, source_line, bytes_per_pixel);
269 } 267 }
270 } 268 }
271} 269}
272 270
273void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 271void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
274 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 272 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
275 u32 block_height, u32 offset_x, u32 offset_y) { 273 u32 offset_x, u32 offset_y) {
276 for (u32 line = 0; line < subrect_height; ++line) { 274 for (u32 line = 0; line < subrect_height; ++line) {
277 const u32 y2 = line + offset_y; 275 const u32 y2 = line + offset_y;
278 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + 276 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
282 const u32 x2 = (x + offset_x) * bytes_per_pixel; 280 const u32 x2 = (x + offset_x) * bytes_per_pixel;
283 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; 281 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
284 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; 282 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
285 const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; 283 u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
286 const VAddr source_addr = swizzled_data + swizzled_offset; 284 u8* source_addr = swizzled_data + swizzled_offset;
287 285
288 Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); 286 std::memcpy(dest_line, source_addr, bytes_per_pixel);
289 } 287 }
290 } 288 }
291} 289}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,18 +16,15 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format. 20void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */ 26std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 27 u32 width, u32 height, u32 depth,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
32 u32 block_depth = TICEntry::DefaultBlockHeight, 29 u32 block_depth = TICEntry::DefaultBlockHeight,
33 u32 width_spacing = 0); 30 u32 width_spacing = 0);
@@ -37,25 +34,21 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
52/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
55 u32 block_height); 48
56/// Copies a tiled subrectangle into a linear surface. 49/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
59 u32 block_height, u32 offset_x, u32 offset_y); 52 u32 offset_x, u32 offset_y);
60 53
61} // namespace Tegra::Texture 54} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e7c78bee2..bea0d5bc2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,11 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/memory_manager.h"
12 11
13namespace Tegra::Texture { 12namespace Tegra::Texture {
14 13
@@ -182,7 +181,7 @@ struct TICEntry {
182 }; 181 };
183 union { 182 union {
184 BitField<0, 16, u32> height_minus_1; 183 BitField<0, 16, u32> height_minus_1;
185 BitField<16, 15, u32> depth_minus_1; 184 BitField<16, 14, u32> depth_minus_1;
186 }; 185 };
187 union { 186 union {
188 BitField<6, 13, u32> mip_lod_bias; 187 BitField<6, 13, u32> mip_lod_bias;
@@ -282,34 +281,62 @@ enum class TextureMipmapFilter : u32 {
282 281
283struct TSCEntry { 282struct TSCEntry {
284 union { 283 union {
285 BitField<0, 3, WrapMode> wrap_u; 284 struct {
286 BitField<3, 3, WrapMode> wrap_v; 285 union {
287 BitField<6, 3, WrapMode> wrap_p; 286 BitField<0, 3, WrapMode> wrap_u;
288 BitField<9, 1, u32> depth_compare_enabled; 287 BitField<3, 3, WrapMode> wrap_v;
289 BitField<10, 3, DepthCompareFunc> depth_compare_func; 288 BitField<6, 3, WrapMode> wrap_p;
290 BitField<13, 1, u32> srgb_conversion; 289 BitField<9, 1, u32> depth_compare_enabled;
291 BitField<20, 3, u32> max_anisotropy; 290 BitField<10, 3, DepthCompareFunc> depth_compare_func;
291 BitField<13, 1, u32> srgb_conversion;
292 BitField<20, 3, u32> max_anisotropy;
293 };
294 union {
295 BitField<0, 2, TextureFilter> mag_filter;
296 BitField<4, 2, TextureFilter> min_filter;
297 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
298 BitField<9, 1, u32> cubemap_interface_filtering;
299 BitField<12, 13, u32> mip_lod_bias;
300 };
301 union {
302 BitField<0, 12, u32> min_lod_clamp;
303 BitField<12, 12, u32> max_lod_clamp;
304 BitField<24, 8, u32> srgb_border_color_r;
305 };
306 union {
307 BitField<12, 8, u32> srgb_border_color_g;
308 BitField<20, 8, u32> srgb_border_color_b;
309 };
310 std::array<f32, 4> border_color;
311 };
312 std::array<u8, 0x20> raw;
292 }; 313 };
293 union { 314
294 BitField<0, 2, TextureFilter> mag_filter; 315 float GetMaxAnisotropy() const {
295 BitField<4, 2, TextureFilter> min_filter; 316 return static_cast<float>(1U << max_anisotropy);
296 BitField<6, 2, TextureMipmapFilter> mip_filter; 317 }
297 BitField<9, 1, u32> cubemap_interface_filtering; 318
298 BitField<12, 13, u32> mip_lod_bias; 319 float GetMinLod() const {
299 }; 320 return static_cast<float>(min_lod_clamp) / 256.0f;
300 union { 321 }
301 BitField<0, 12, u32> min_lod_clamp; 322
302 BitField<12, 12, u32> max_lod_clamp; 323 float GetMaxLod() const {
303 BitField<24, 8, u32> srgb_border_color_r; 324 return static_cast<float>(max_lod_clamp) / 256.0f;
304 }; 325 }
305 union { 326
306 BitField<12, 8, u32> srgb_border_color_g; 327 float GetLodBias() const {
307 BitField<20, 8, u32> srgb_border_color_b; 328 // Sign extend the 13-bit value.
308 }; 329 constexpr u32 mask = 1U << (13 - 1);
309 float border_color_r; 330 return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
310 float border_color_g; 331 }
311 float border_color_b; 332
312 float border_color_a; 333 std::array<float, 4> GetBorderColor() const {
334 if (srgb_conversion) {
335 return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
336 srgb_border_color_b / 255.0f, border_color[3]};
337 }
338 return border_color;
339 }
313}; 340};
314static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 341static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
315 342
@@ -317,7 +344,6 @@ struct FullTextureInfo {
317 u32 index; 344 u32 index;
318 TICEntry tic; 345 TICEntry tic;
319 TSCEntry tsc; 346 TSCEntry tsc;
320 bool enabled;
321}; 347};
322 348
323/// Returns the number of bytes per pixel of the input texture format. 349/// Returns the number of bytes per pixel of the input texture format.
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index f7de3471b..cb82ecf3f 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -11,14 +11,16 @@
11 11
12namespace VideoCore { 12namespace VideoCore {
13 13
14std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) { 14std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
15 return std::make_unique<OpenGL::RendererOpenGL>(emu_window); 15 Core::System& system) {
16 return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
16} 17}
17 18
18u16 GetResolutionScaleFactor(const RendererBase& renderer) { 19u16 GetResolutionScaleFactor(const RendererBase& renderer) {
19 return !Settings::values.resolution_factor 20 return static_cast<u16>(
20 ? renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio() 21 Settings::values.resolution_factor
21 : Settings::values.resolution_factor; 22 ? Settings::values.resolution_factor
23 : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
22} 24}
23 25
24} // namespace VideoCore 26} // namespace VideoCore
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 5b373bcb1..3c583f195 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -6,6 +6,10 @@
6 6
7#include <memory> 7#include <memory>
8 8
9namespace Core {
10class System;
11}
12
9namespace Core::Frontend { 13namespace Core::Frontend {
10class EmuWindow; 14class EmuWindow;
11} 15}
@@ -20,7 +24,8 @@ class RendererBase;
20 * @note The returned renderer instance is simply allocated. Its Init() 24 * @note The returned renderer instance is simply allocated. Its Init()
21 * function still needs to be called to fully complete its setup. 25 * function still needs to be called to fully complete its setup.
22 */ 26 */
23std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window); 27std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
28 Core::System& system);
24 29
25u16 GetResolutionScaleFactor(const RendererBase& renderer); 30u16 GetResolutionScaleFactor(const RendererBase& renderer);
26 31
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <future>
9#include <string> 7#include <string>
10 8
11namespace WebService { 9namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..dc149d2ed 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/web_result.h" 12#include "common/web_result.h"
13#include "core/settings.h"
14#include "web_service/web_backend.h" 13#include "web_service/web_backend.h"
15 14
16namespace WebService { 15namespace WebService {
@@ -25,7 +24,7 @@ constexpr u32 TIMEOUT_SECONDS = 30;
25struct Client::Impl { 24struct Client::Impl {
26 Impl(std::string host, std::string username, std::string token) 25 Impl(std::string host, std::string username, std::string token)
27 : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} { 26 : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} {
28 std::lock_guard<std::mutex> lock(jwt_cache.mutex); 27 std::lock_guard lock{jwt_cache.mutex};
29 if (this->username == jwt_cache.username && this->token == jwt_cache.token) { 28 if (this->username == jwt_cache.username && this->token == jwt_cache.token) {
30 jwt = jwt_cache.jwt; 29 jwt = jwt_cache.jwt;
31 } 30 }
@@ -152,7 +151,7 @@ struct Client::Impl {
152 if (result.result_code != Common::WebResult::Code::Success) { 151 if (result.result_code != Common::WebResult::Code::Success) {
153 LOG_ERROR(WebService, "UpdateJWT failed"); 152 LOG_ERROR(WebService, "UpdateJWT failed");
154 } else { 153 } else {
155 std::lock_guard<std::mutex> lock(jwt_cache.mutex); 154 std::lock_guard lock{jwt_cache.mutex};
156 jwt_cache.username = username; 155 jwt_cache.username = username;
157 jwt_cache.token = token; 156 jwt_cache.token = token;
158 jwt_cache.jwt = jwt = result.returned_data; 157 jwt_cache.jwt = jwt = result.returned_data;
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 4cab599b4..2eb86d6e5 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -31,6 +31,8 @@ add_executable(yuzu
31 configuration/configure_general.h 31 configuration/configure_general.h
32 configuration/configure_graphics.cpp 32 configuration/configure_graphics.cpp
33 configuration/configure_graphics.h 33 configuration/configure_graphics.h
34 configuration/configure_hotkeys.cpp
35 configuration/configure_hotkeys.h
34 configuration/configure_input.cpp 36 configuration/configure_input.cpp
35 configuration/configure_input.h 37 configuration/configure_input.h
36 configuration/configure_input_player.cpp 38 configuration/configure_input_player.cpp
@@ -54,8 +56,6 @@ add_executable(yuzu
54 debugger/graphics/graphics_breakpoints.cpp 56 debugger/graphics/graphics_breakpoints.cpp
55 debugger/graphics/graphics_breakpoints.h 57 debugger/graphics/graphics_breakpoints.h
56 debugger/graphics/graphics_breakpoints_p.h 58 debugger/graphics/graphics_breakpoints_p.h
57 debugger/graphics/graphics_surface.cpp
58 debugger/graphics/graphics_surface.h
59 debugger/console.cpp 59 debugger/console.cpp
60 debugger/console.h 60 debugger/console.h
61 debugger/profiler.cpp 61 debugger/profiler.cpp
@@ -78,6 +78,8 @@ add_executable(yuzu
78 ui_settings.h 78 ui_settings.h
79 util/limitable_input_dialog.cpp 79 util/limitable_input_dialog.cpp
80 util/limitable_input_dialog.h 80 util/limitable_input_dialog.h
81 util/sequence_dialog/sequence_dialog.cpp
82 util/sequence_dialog/sequence_dialog.h
81 util/spinbox.cpp 83 util/spinbox.cpp
82 util/spinbox.h 84 util/spinbox.h
83 util/util.cpp 85 util/util.cpp
@@ -95,6 +97,7 @@ set(UIS
95 configuration/configure_gamelist.ui 97 configuration/configure_gamelist.ui
96 configuration/configure_general.ui 98 configuration/configure_general.ui
97 configuration/configure_graphics.ui 99 configuration/configure_graphics.ui
100 configuration/configure_hotkeys.ui
98 configuration/configure_input.ui 101 configuration/configure_input.ui
99 configuration/configure_input_player.ui 102 configuration/configure_input_player.ui
100 configuration/configure_input_simple.ui 103 configuration/configure_input_simple.ui
@@ -105,7 +108,6 @@ set(UIS
105 configuration/configure_touchscreen_advanced.ui 108 configuration/configure_touchscreen_advanced.ui
106 configuration/configure_web.ui 109 configuration/configure_web.ui
107 compatdb.ui 110 compatdb.ui
108 hotkeys.ui
109 loading_screen.ui 111 loading_screen.ui
110 main.ui 112 main.ui
111) 113)
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 5c1b65a2c..743b24d76 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -4,6 +4,7 @@
4 4
5#include <mutex> 5#include <mutex>
6#include <QDialogButtonBox> 6#include <QDialogButtonBox>
7#include <QHeaderView>
7#include <QLabel> 8#include <QLabel>
8#include <QLineEdit> 9#include <QLineEdit>
9#include <QScrollArea> 10#include <QScrollArea>
@@ -58,10 +59,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
58 59
59 scroll_area = new QScrollArea; 60 scroll_area = new QScrollArea;
60 61
61 buttons = new QDialogButtonBox; 62 buttons = new QDialogButtonBox(QDialogButtonBox::Cancel | QDialogButtonBox::Ok);
62 buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole);
63 buttons->addButton(tr("OK"), QDialogButtonBox::AcceptRole);
64
65 connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept); 63 connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept);
66 connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject); 64 connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject);
67 65
@@ -163,6 +161,6 @@ void QtProfileSelector::SelectProfile(
163 161
164void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) { 162void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) {
165 // Acquire the HLE mutex 163 // Acquire the HLE mutex
166 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 164 std::lock_guard lock{HLE::g_hle_lock};
167 callback(uuid); 165 callback(uuid);
168} 166}
diff --git a/src/yuzu/applets/profile_select.h b/src/yuzu/applets/profile_select.h
index 868573324..1c2922e54 100644
--- a/src/yuzu/applets/profile_select.h
+++ b/src/yuzu/applets/profile_select.h
@@ -7,6 +7,7 @@
7#include <vector> 7#include <vector>
8#include <QDialog> 8#include <QDialog>
9#include <QList> 9#include <QList>
10#include <QTreeView>
10#include "core/frontend/applets/profile_select.h" 11#include "core/frontend/applets/profile_select.h"
11 12
12class GMainWindow; 13class GMainWindow;
@@ -16,7 +17,6 @@ class QLabel;
16class QScrollArea; 17class QScrollArea;
17class QStandardItem; 18class QStandardItem;
18class QStandardItemModel; 19class QStandardItemModel;
19class QTreeView;
20class QVBoxLayout; 20class QVBoxLayout;
21 21
22class QtProfileSelectionDialog final : public QDialog { 22class QtProfileSelectionDialog final : public QDialog {
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index 8a26fdff1..f3eb29b25 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -75,13 +75,13 @@ QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
75 length_label->setText(QStringLiteral("%1/%2").arg(text.size()).arg(parameters.max_length)); 75 length_label->setText(QStringLiteral("%1/%2").arg(text.size()).arg(parameters.max_length));
76 }); 76 });
77 77
78 buttons = new QDialogButtonBox; 78 buttons = new QDialogButtonBox(QDialogButtonBox::Cancel);
79 buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole); 79 if (parameters.submit_text.empty()) {
80 buttons->addButton(parameters.submit_text.empty() 80 buttons->addButton(QDialogButtonBox::Ok);
81 ? tr("OK") 81 } else {
82 : QString::fromStdU16String(parameters.submit_text), 82 buttons->addButton(QString::fromStdU16String(parameters.submit_text),
83 QDialogButtonBox::AcceptRole); 83 QDialogButtonBox::AcceptRole);
84 84 }
85 connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept); 85 connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept);
86 connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject); 86 connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject);
87 layout->addWidget(header_label); 87 layout->addWidget(header_label);
@@ -141,12 +141,12 @@ void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
141 141
142void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) { 142void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) {
143 // Acquire the HLE mutex 143 // Acquire the HLE mutex
144 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 144 std::lock_guard lock{HLE::g_hle_lock};
145 text_output(text); 145 text_output(text);
146} 146}
147 147
148void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() { 148void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() {
149 // Acquire the HLE mutex 149 // Acquire the HLE mutex
150 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 150 std::lock_guard lock{HLE::g_hle_lock};
151 finished_check(); 151 finished_check();
152} 152}
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..ac80b2fa2 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
@@ -102,12 +104,12 @@ void QtWebBrowser::OpenPage(std::string_view url, std::function<void()> unpack_r
102 104
103void QtWebBrowser::MainWindowUnpackRomFS() { 105void QtWebBrowser::MainWindowUnpackRomFS() {
104 // Acquire the HLE mutex 106 // Acquire the HLE mutex
105 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 107 std::lock_guard lock{HLE::g_hle_lock};
106 unpack_romfs_callback(); 108 unpack_romfs_callback();
107} 109}
108 110
109void QtWebBrowser::MainWindowFinishedBrowsing() { 111void QtWebBrowser::MainWindowFinishedBrowsing() {
110 // Acquire the HLE mutex 112 // Acquire the HLE mutex
111 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); 113 std::lock_guard lock{HLE::g_hle_lock};
112 finished_callback(); 114 finished_callback();
113} 115}
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index e1825e607..c29f2d2dc 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -27,16 +27,25 @@
27EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 27EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
28 28
29void EmuThread::run() { 29void EmuThread::run() {
30 if (!Settings::values.use_multi_core) { 30 render_window->MakeCurrent();
31 // Single core mode must acquire OpenGL context for entire emulation session
32 render_window->MakeCurrent();
33 }
34 31
35 MicroProfileOnThreadCreate("EmuThread"); 32 MicroProfileOnThreadCreate("EmuThread");
36 33
37 stop_run = false; 34 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
35
36 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
37 stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
38 emit LoadProgress(stage, value, total);
39 });
40
41 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
42
43 if (Settings::values.use_asynchronous_gpu_emulation) {
44 // Release OpenGL context for the GPU thread
45 render_window->DoneCurrent();
46 }
38 47
39 // holds whether the cpu was running during the last iteration, 48 // Holds whether the cpu was running during the last iteration,
40 // so that the DebugModeLeft signal can be emitted before the 49 // so that the DebugModeLeft signal can be emitted before the
41 // next execution step 50 // next execution step
42 bool was_active = false; 51 bool was_active = false;
@@ -65,7 +74,7 @@ void EmuThread::run() {
65 74
66 was_active = false; 75 was_active = false;
67 } else { 76 } else {
68 std::unique_lock<std::mutex> lock(running_mutex); 77 std::unique_lock lock{running_mutex};
69 running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; }); 78 running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
70 } 79 }
71 } 80 }
@@ -184,7 +193,6 @@ GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
184 setAttribute(Qt::WA_AcceptTouchEvents); 193 setAttribute(Qt::WA_AcceptTouchEvents);
185 194
186 InputCommon::Init(); 195 InputCommon::Init();
187 InputCommon::StartJoystickEventHandler();
188 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent), 196 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent),
189 &GMainWindow::OnLoadComplete); 197 &GMainWindow::OnLoadComplete);
190} 198}
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 288ce1572..9608b959f 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -24,6 +24,10 @@ class GRenderWindow;
24class QSurface; 24class QSurface;
25class QOpenGLContext; 25class QOpenGLContext;
26 26
27namespace VideoCore {
28enum class LoadCallbackStage;
29}
30
27class EmuThread : public QThread { 31class EmuThread : public QThread {
28 Q_OBJECT 32 Q_OBJECT
29 33
@@ -51,7 +55,7 @@ public:
51 * @note This function is thread-safe 55 * @note This function is thread-safe
52 */ 56 */
53 void SetRunning(bool running) { 57 void SetRunning(bool running) {
54 std::unique_lock<std::mutex> lock(running_mutex); 58 std::unique_lock lock{running_mutex};
55 this->running = running; 59 this->running = running;
56 lock.unlock(); 60 lock.unlock();
57 running_cv.notify_all(); 61 running_cv.notify_all();
@@ -77,7 +81,7 @@ public:
77private: 81private:
78 bool exec_step = false; 82 bool exec_step = false;
79 bool running = false; 83 bool running = false;
80 std::atomic<bool> stop_run{false}; 84 std::atomic_bool stop_run{false};
81 std::mutex running_mutex; 85 std::mutex running_mutex;
82 std::condition_variable running_cv; 86 std::condition_variable running_cv;
83 87
@@ -103,6 +107,8 @@ signals:
103 void DebugModeLeft(); 107 void DebugModeLeft();
104 108
105 void ErrorThrown(Core::System::ResultStatus, std::string); 109 void ErrorThrown(Core::System::ResultStatus, std::string);
110
111 void LoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total);
106}; 112};
107 113
108class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow { 114class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 5f0896f84..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,15 +53,15 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
61 button(QWizard::CancelButton)->setVisible(false); 61 button(QWizard::CancelButton)->setVisible(false);
62 62
63 testcase_watcher.setFuture(QtConcurrent::run( 63 testcase_watcher.setFuture(QtConcurrent::run(
64 [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); 64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
65 break; 65 break;
66 default: 66 default:
67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); 67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index ddf4cf552..ca60bc0c9 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <QKeySequence>
5#include <QSettings> 7#include <QSettings>
6#include "common/file_util.h" 8#include "common/file_util.h"
7#include "configure_input_simple.h" 9#include "configure_input_simple.h"
@@ -9,7 +11,6 @@
9#include "core/hle/service/hid/controllers/npad.h" 11#include "core/hle/service/hid/controllers/npad.h"
10#include "input_common/main.h" 12#include "input_common/main.h"
11#include "yuzu/configuration/config.h" 13#include "yuzu/configuration/config.h"
12#include "yuzu/ui_settings.h"
13 14
14Config::Config() { 15Config::Config() {
15 // TODO: Don't hardcode the path; let the frontend decide where to put the config files. 16 // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
@@ -17,7 +18,6 @@ Config::Config() {
17 FileUtil::CreateFullPath(qt_config_loc); 18 FileUtil::CreateFullPath(qt_config_loc);
18 qt_config = 19 qt_config =
19 std::make_unique<QSettings>(QString::fromStdString(qt_config_loc), QSettings::IniFormat); 20 std::make_unique<QSettings>(QString::fromStdString(qt_config_loc), QSettings::IniFormat);
20
21 Reload(); 21 Reload();
22} 22}
23 23
@@ -205,11 +205,32 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
205 Qt::Key_Control, Qt::Key_Shift, Qt::Key_AltGr, Qt::Key_ApplicationRight, 205 Qt::Key_Control, Qt::Key_Shift, Qt::Key_AltGr, Qt::Key_ApplicationRight,
206}; 206};
207 207
208// This shouldn't have anything except static initializers (no functions). So
209// QKeySequnce(...).toString() is NOT ALLOWED HERE.
210// This must be in alphabetical order according to action name as it must have the same order as
211// UISetting::values.shortcuts, which is alphabetically ordered.
212const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{
213 {{"Capture Screenshot", "Main Window", {"Ctrl+P", Qt::ApplicationShortcut}},
214 {"Continue/Pause Emulation", "Main Window", {"F4", Qt::WindowShortcut}},
215 {"Decrease Speed Limit", "Main Window", {"-", Qt::ApplicationShortcut}},
216 {"Exit yuzu", "Main Window", {"Ctrl+Q", Qt::WindowShortcut}},
217 {"Exit Fullscreen", "Main Window", {"Esc", Qt::WindowShortcut}},
218 {"Fullscreen", "Main Window", {"F11", Qt::WindowShortcut}},
219 {"Increase Speed Limit", "Main Window", {"+", Qt::ApplicationShortcut}},
220 {"Load Amiibo", "Main Window", {"F2", Qt::ApplicationShortcut}},
221 {"Load File", "Main Window", {"Ctrl+O", Qt::WindowShortcut}},
222 {"Restart Emulation", "Main Window", {"F6", Qt::WindowShortcut}},
223 {"Stop Emulation", "Main Window", {"F5", Qt::WindowShortcut}},
224 {"Toggle Filter Bar", "Main Window", {"Ctrl+F", Qt::WindowShortcut}},
225 {"Toggle Speed Limit", "Main Window", {"Ctrl+Z", Qt::ApplicationShortcut}},
226 {"Toggle Status Bar", "Main Window", {"Ctrl+S", Qt::WindowShortcut}},
227 {"Change Docked Mode", "Main Window", {"F10", Qt::ApplicationShortcut}}}};
228
208void Config::ReadPlayerValues() { 229void Config::ReadPlayerValues() {
209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 230 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
210 auto& player = Settings::values.players[p]; 231 auto& player = Settings::values.players[p];
211 232
212 player.connected = qt_config->value(QString("player_%1_connected").arg(p), false).toBool(); 233 player.connected = ReadSetting(QString("player_%1_connected").arg(p), false).toBool();
213 234
214 player.type = static_cast<Settings::ControllerType>( 235 player.type = static_cast<Settings::ControllerType>(
215 qt_config 236 qt_config
@@ -269,7 +290,7 @@ void Config::ReadPlayerValues() {
269} 290}
270 291
271void Config::ReadDebugValues() { 292void Config::ReadDebugValues() {
272 Settings::values.debug_pad_enabled = qt_config->value("debug_pad_enabled", false).toBool(); 293 Settings::values.debug_pad_enabled = ReadSetting("debug_pad_enabled", false).toBool();
273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 294 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); 295 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
275 Settings::values.debug_pad_buttons[i] = 296 Settings::values.debug_pad_buttons[i] =
@@ -298,7 +319,7 @@ void Config::ReadDebugValues() {
298} 319}
299 320
300void Config::ReadKeyboardValues() { 321void Config::ReadKeyboardValues() {
301 Settings::values.keyboard_enabled = qt_config->value("keyboard_enabled", false).toBool(); 322 Settings::values.keyboard_enabled = ReadSetting("keyboard_enabled", false).toBool();
302 323
303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(), 324 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam); 325 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -311,7 +332,7 @@ void Config::ReadKeyboardValues() {
311} 332}
312 333
313void Config::ReadMouseValues() { 334void Config::ReadMouseValues() {
314 Settings::values.mouse_enabled = qt_config->value("mouse_enabled", false).toBool(); 335 Settings::values.mouse_enabled = ReadSetting("mouse_enabled", false).toBool();
315 336
316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 337 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]); 338 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
@@ -327,16 +348,14 @@ void Config::ReadMouseValues() {
327} 348}
328 349
329void Config::ReadTouchscreenValues() { 350void Config::ReadTouchscreenValues() {
330 Settings::values.touchscreen.enabled = qt_config->value("touchscreen_enabled", true).toBool(); 351 Settings::values.touchscreen.enabled = ReadSetting("touchscreen_enabled", true).toBool();
331 Settings::values.touchscreen.device = 352 Settings::values.touchscreen.device =
332 qt_config->value("touchscreen_device", "engine:emu_window").toString().toStdString(); 353 ReadSetting("touchscreen_device", "engine:emu_window").toString().toStdString();
333 354
334 Settings::values.touchscreen.finger = qt_config->value("touchscreen_finger", 0).toUInt(); 355 Settings::values.touchscreen.finger = ReadSetting("touchscreen_finger", 0).toUInt();
335 Settings::values.touchscreen.rotation_angle = qt_config->value("touchscreen_angle", 0).toUInt(); 356 Settings::values.touchscreen.rotation_angle = ReadSetting("touchscreen_angle", 0).toUInt();
336 Settings::values.touchscreen.diameter_x = 357 Settings::values.touchscreen.diameter_x = ReadSetting("touchscreen_diameter_x", 15).toUInt();
337 qt_config->value("touchscreen_diameter_x", 15).toUInt(); 358 Settings::values.touchscreen.diameter_y = ReadSetting("touchscreen_diameter_y", 15).toUInt();
338 Settings::values.touchscreen.diameter_y =
339 qt_config->value("touchscreen_diameter_y", 15).toUInt();
340 qt_config->endGroup(); 359 qt_config->endGroup();
341} 360}
342 361
@@ -357,38 +376,41 @@ void Config::ReadValues() {
357 ReadTouchscreenValues(); 376 ReadTouchscreenValues();
358 377
359 Settings::values.motion_device = 378 Settings::values.motion_device =
360 qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01") 379 ReadSetting("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
361 .toString() 380 .toString()
362 .toStdString(); 381 .toStdString();
363 382
364 qt_config->beginGroup("Core"); 383 qt_config->beginGroup("Core");
365 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 384 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
366 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 385 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
367 qt_config->endGroup(); 386 qt_config->endGroup();
368 387
369 qt_config->beginGroup("Renderer"); 388 qt_config->beginGroup("Renderer");
370 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); 389 Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat();
371 Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool(); 390 Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
372 Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt(); 391 Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
392 Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool();
373 Settings::values.use_accurate_gpu_emulation = 393 Settings::values.use_accurate_gpu_emulation =
374 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 394 ReadSetting("use_accurate_gpu_emulation", false).toBool();
395 Settings::values.use_asynchronous_gpu_emulation =
396 ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
375 397
376 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 398 Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
377 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 399 Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
378 Settings::values.bg_blue = qt_config->value("bg_blue", 0.0).toFloat(); 400 Settings::values.bg_blue = ReadSetting("bg_blue", 0.0).toFloat();
379 qt_config->endGroup(); 401 qt_config->endGroup();
380 402
381 qt_config->beginGroup("Audio"); 403 qt_config->beginGroup("Audio");
382 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); 404 Settings::values.sink_id = ReadSetting("output_engine", "auto").toString().toStdString();
383 Settings::values.enable_audio_stretching = 405 Settings::values.enable_audio_stretching =
384 qt_config->value("enable_audio_stretching", true).toBool(); 406 ReadSetting("enable_audio_stretching", true).toBool();
385 Settings::values.audio_device_id = 407 Settings::values.audio_device_id =
386 qt_config->value("output_device", "auto").toString().toStdString(); 408 ReadSetting("output_device", "auto").toString().toStdString();
387 Settings::values.volume = qt_config->value("volume", 1).toFloat(); 409 Settings::values.volume = ReadSetting("volume", 1).toFloat();
388 qt_config->endGroup(); 410 qt_config->endGroup();
389 411
390 qt_config->beginGroup("Data Storage"); 412 qt_config->beginGroup("Data Storage");
391 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 413 Settings::values.use_virtual_sd = ReadSetting("use_virtual_sd", true).toBool();
392 FileUtil::GetUserPath( 414 FileUtil::GetUserPath(
393 FileUtil::UserPath::NANDDir, 415 FileUtil::UserPath::NANDDir,
394 qt_config 416 qt_config
@@ -406,30 +428,29 @@ void Config::ReadValues() {
406 qt_config->endGroup(); 428 qt_config->endGroup();
407 429
408 qt_config->beginGroup("Core"); 430 qt_config->beginGroup("Core");
409 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 431 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
410 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 432 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
411 qt_config->endGroup(); 433 qt_config->endGroup();
412 434
413 qt_config->beginGroup("System"); 435 qt_config->beginGroup("System");
414 Settings::values.use_docked_mode = qt_config->value("use_docked_mode", false).toBool(); 436 Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
415 Settings::values.enable_nfc = qt_config->value("enable_nfc", true).toBool();
416 437
417 Settings::values.current_user = std::clamp<int>(qt_config->value("current_user", 0).toInt(), 0, 438 Settings::values.current_user =
418 Service::Account::MAX_USERS - 1); 439 std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
419 440
420 Settings::values.language_index = qt_config->value("language_index", 1).toInt(); 441 Settings::values.language_index = ReadSetting("language_index", 1).toInt();
421 442
422 const auto rng_seed_enabled = qt_config->value("rng_seed_enabled", false).toBool(); 443 const auto rng_seed_enabled = ReadSetting("rng_seed_enabled", false).toBool();
423 if (rng_seed_enabled) { 444 if (rng_seed_enabled) {
424 Settings::values.rng_seed = qt_config->value("rng_seed", 0).toULongLong(); 445 Settings::values.rng_seed = ReadSetting("rng_seed", 0).toULongLong();
425 } else { 446 } else {
426 Settings::values.rng_seed = std::nullopt; 447 Settings::values.rng_seed = std::nullopt;
427 } 448 }
428 449
429 const auto custom_rtc_enabled = qt_config->value("custom_rtc_enabled", false).toBool(); 450 const auto custom_rtc_enabled = ReadSetting("custom_rtc_enabled", false).toBool();
430 if (custom_rtc_enabled) { 451 if (custom_rtc_enabled) {
431 Settings::values.custom_rtc = 452 Settings::values.custom_rtc =
432 std::chrono::seconds(qt_config->value("custom_rtc", 0).toULongLong()); 453 std::chrono::seconds(ReadSetting("custom_rtc", 0).toULongLong());
433 } else { 454 } else {
434 Settings::values.custom_rtc = std::nullopt; 455 Settings::values.custom_rtc = std::nullopt;
435 } 456 }
@@ -437,35 +458,35 @@ void Config::ReadValues() {
437 qt_config->endGroup(); 458 qt_config->endGroup();
438 459
439 qt_config->beginGroup("Miscellaneous"); 460 qt_config->beginGroup("Miscellaneous");
440 Settings::values.log_filter = qt_config->value("log_filter", "*:Info").toString().toStdString(); 461 Settings::values.log_filter = ReadSetting("log_filter", "*:Info").toString().toStdString();
441 Settings::values.use_dev_keys = qt_config->value("use_dev_keys", false).toBool(); 462 Settings::values.use_dev_keys = ReadSetting("use_dev_keys", false).toBool();
442 qt_config->endGroup(); 463 qt_config->endGroup();
443 464
444 qt_config->beginGroup("Debugging"); 465 qt_config->beginGroup("Debugging");
445 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool(); 466 Settings::values.use_gdbstub = ReadSetting("use_gdbstub", false).toBool();
446 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt(); 467 Settings::values.gdbstub_port = ReadSetting("gdbstub_port", 24689).toInt();
447 Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString(); 468 Settings::values.program_args = ReadSetting("program_args", "").toString().toStdString();
448 Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool(); 469 Settings::values.dump_exefs = ReadSetting("dump_exefs", false).toBool();
449 Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool(); 470 Settings::values.dump_nso = ReadSetting("dump_nso", false).toBool();
450 qt_config->endGroup(); 471 qt_config->endGroup();
451 472
452 qt_config->beginGroup("WebService"); 473 qt_config->beginGroup("WebService");
453 Settings::values.enable_telemetry = qt_config->value("enable_telemetry", true).toBool(); 474 Settings::values.enable_telemetry = ReadSetting("enable_telemetry", true).toBool();
454 Settings::values.web_api_url = 475 Settings::values.web_api_url =
455 qt_config->value("web_api_url", "https://api.yuzu-emu.org").toString().toStdString(); 476 ReadSetting("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
456 Settings::values.yuzu_username = qt_config->value("yuzu_username").toString().toStdString(); 477 Settings::values.yuzu_username = ReadSetting("yuzu_username").toString().toStdString();
457 Settings::values.yuzu_token = qt_config->value("yuzu_token").toString().toStdString(); 478 Settings::values.yuzu_token = ReadSetting("yuzu_token").toString().toStdString();
458 qt_config->endGroup(); 479 qt_config->endGroup();
459 480
460 const auto size = qt_config->beginReadArray("DisabledAddOns"); 481 const auto size = qt_config->beginReadArray("DisabledAddOns");
461 for (int i = 0; i < size; ++i) { 482 for (int i = 0; i < size; ++i) {
462 qt_config->setArrayIndex(i); 483 qt_config->setArrayIndex(i);
463 const auto title_id = qt_config->value("title_id", 0).toULongLong(); 484 const auto title_id = ReadSetting("title_id", 0).toULongLong();
464 std::vector<std::string> out; 485 std::vector<std::string> out;
465 const auto d_size = qt_config->beginReadArray("disabled"); 486 const auto d_size = qt_config->beginReadArray("disabled");
466 for (int j = 0; j < d_size; ++j) { 487 for (int j = 0; j < d_size; ++j) {
467 qt_config->setArrayIndex(j); 488 qt_config->setArrayIndex(j);
468 out.push_back(qt_config->value("d", "").toString().toStdString()); 489 out.push_back(ReadSetting("d", "").toString().toStdString());
469 } 490 }
470 qt_config->endArray(); 491 qt_config->endArray();
471 Settings::values.disabled_addons.insert_or_assign(title_id, out); 492 Settings::values.disabled_addons.insert_or_assign(title_id, out);
@@ -473,72 +494,64 @@ void Config::ReadValues() {
473 qt_config->endArray(); 494 qt_config->endArray();
474 495
475 qt_config->beginGroup("UI"); 496 qt_config->beginGroup("UI");
476 UISettings::values.theme = qt_config->value("theme", UISettings::themes[0].second).toString(); 497 UISettings::values.theme = ReadSetting("theme", UISettings::themes[0].second).toString();
477 UISettings::values.enable_discord_presence = 498 UISettings::values.enable_discord_presence =
478 qt_config->value("enable_discord_presence", true).toBool(); 499 ReadSetting("enable_discord_presence", true).toBool();
479 UISettings::values.screenshot_resolution_factor = 500 UISettings::values.screenshot_resolution_factor =
480 static_cast<u16>(qt_config->value("screenshot_resolution_factor", 0).toUInt()); 501 static_cast<u16>(ReadSetting("screenshot_resolution_factor", 0).toUInt());
481 UISettings::values.select_user_on_boot = 502 UISettings::values.select_user_on_boot = ReadSetting("select_user_on_boot", false).toBool();
482 qt_config->value("select_user_on_boot", false).toBool();
483 503
484 qt_config->beginGroup("UIGameList"); 504 qt_config->beginGroup("UIGameList");
485 UISettings::values.show_unknown = qt_config->value("show_unknown", true).toBool(); 505 UISettings::values.show_unknown = ReadSetting("show_unknown", true).toBool();
486 UISettings::values.show_add_ons = qt_config->value("show_add_ons", true).toBool(); 506 UISettings::values.show_add_ons = ReadSetting("show_add_ons", true).toBool();
487 UISettings::values.icon_size = qt_config->value("icon_size", 64).toUInt(); 507 UISettings::values.icon_size = ReadSetting("icon_size", 64).toUInt();
488 UISettings::values.row_1_text_id = qt_config->value("row_1_text_id", 3).toUInt(); 508 UISettings::values.row_1_text_id = ReadSetting("row_1_text_id", 3).toUInt();
489 UISettings::values.row_2_text_id = qt_config->value("row_2_text_id", 2).toUInt(); 509 UISettings::values.row_2_text_id = ReadSetting("row_2_text_id", 2).toUInt();
490 qt_config->endGroup(); 510 qt_config->endGroup();
491 511
492 qt_config->beginGroup("UILayout"); 512 qt_config->beginGroup("UILayout");
493 UISettings::values.geometry = qt_config->value("geometry").toByteArray(); 513 UISettings::values.geometry = ReadSetting("geometry").toByteArray();
494 UISettings::values.state = qt_config->value("state").toByteArray(); 514 UISettings::values.state = ReadSetting("state").toByteArray();
495 UISettings::values.renderwindow_geometry = 515 UISettings::values.renderwindow_geometry = ReadSetting("geometryRenderWindow").toByteArray();
496 qt_config->value("geometryRenderWindow").toByteArray(); 516 UISettings::values.gamelist_header_state = ReadSetting("gameListHeaderState").toByteArray();
497 UISettings::values.gamelist_header_state =
498 qt_config->value("gameListHeaderState").toByteArray();
499 UISettings::values.microprofile_geometry = 517 UISettings::values.microprofile_geometry =
500 qt_config->value("microProfileDialogGeometry").toByteArray(); 518 ReadSetting("microProfileDialogGeometry").toByteArray();
501 UISettings::values.microprofile_visible = 519 UISettings::values.microprofile_visible =
502 qt_config->value("microProfileDialogVisible", false).toBool(); 520 ReadSetting("microProfileDialogVisible", false).toBool();
503 qt_config->endGroup(); 521 qt_config->endGroup();
504 522
505 qt_config->beginGroup("Paths"); 523 qt_config->beginGroup("Paths");
506 UISettings::values.roms_path = qt_config->value("romsPath").toString(); 524 UISettings::values.roms_path = ReadSetting("romsPath").toString();
507 UISettings::values.symbols_path = qt_config->value("symbolsPath").toString(); 525 UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
508 UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString(); 526 UISettings::values.game_directory_path = ReadSetting("gameListRootDir", ".").toString();
509 UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool(); 527 UISettings::values.game_directory_deepscan = ReadSetting("gameListDeepScan", false).toBool();
510 UISettings::values.recent_files = qt_config->value("recentFiles").toStringList(); 528 UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
511 qt_config->endGroup(); 529 qt_config->endGroup();
512 530
513 qt_config->beginGroup("Shortcuts"); 531 qt_config->beginGroup("Shortcuts");
514 QStringList groups = qt_config->childGroups(); 532 for (auto [name, group, shortcut] : default_hotkeys) {
515 for (auto group : groups) { 533 auto [keyseq, context] = shortcut;
516 qt_config->beginGroup(group); 534 qt_config->beginGroup(group);
517 535 qt_config->beginGroup(name);
518 QStringList hotkeys = qt_config->childGroups(); 536 UISettings::values.shortcuts.push_back(
519 for (auto hotkey : hotkeys) { 537 {name,
520 qt_config->beginGroup(hotkey); 538 group,
521 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut( 539 {ReadSetting("KeySeq", keyseq).toString(), ReadSetting("Context", context).toInt()}});
522 group + "/" + hotkey, 540 qt_config->endGroup();
523 UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(),
524 qt_config->value("Context").toInt())));
525 qt_config->endGroup();
526 }
527
528 qt_config->endGroup(); 541 qt_config->endGroup();
529 } 542 }
530 qt_config->endGroup(); 543 qt_config->endGroup();
531 544
532 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool(); 545 UISettings::values.single_window_mode = ReadSetting("singleWindowMode", true).toBool();
533 UISettings::values.fullscreen = qt_config->value("fullscreen", false).toBool(); 546 UISettings::values.fullscreen = ReadSetting("fullscreen", false).toBool();
534 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool(); 547 UISettings::values.display_titlebar = ReadSetting("displayTitleBars", true).toBool();
535 UISettings::values.show_filter_bar = qt_config->value("showFilterBar", true).toBool(); 548 UISettings::values.show_filter_bar = ReadSetting("showFilterBar", true).toBool();
536 UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool(); 549 UISettings::values.show_status_bar = ReadSetting("showStatusBar", true).toBool();
537 UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool(); 550 UISettings::values.confirm_before_closing = ReadSetting("confirmClose", true).toBool();
538 UISettings::values.first_start = qt_config->value("firstStart", true).toBool(); 551 UISettings::values.first_start = ReadSetting("firstStart", true).toBool();
539 UISettings::values.callout_flags = qt_config->value("calloutFlags", 0).toUInt(); 552 UISettings::values.callout_flags = ReadSetting("calloutFlags", 0).toUInt();
540 UISettings::values.show_console = qt_config->value("showConsole", false).toBool(); 553 UISettings::values.show_console = ReadSetting("showConsole", false).toBool();
541 UISettings::values.profile_index = qt_config->value("profileIndex", 0).toUInt(); 554 UISettings::values.profile_index = ReadSetting("profileIndex", 0).toUInt();
542 555
543 ApplyDefaultProfileIfInputInvalid(); 556 ApplyDefaultProfileIfInputInvalid();
544 557
@@ -549,62 +562,79 @@ void Config::SavePlayerValues() {
549 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 562 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
550 const auto& player = Settings::values.players[p]; 563 const auto& player = Settings::values.players[p];
551 564
552 qt_config->setValue(QString("player_%1_connected").arg(p), player.connected); 565 WriteSetting(QString("player_%1_connected").arg(p), player.connected, false);
553 qt_config->setValue(QString("player_%1_type").arg(p), static_cast<u8>(player.type)); 566 WriteSetting(QString("player_%1_type").arg(p), static_cast<u8>(player.type),
567 static_cast<u8>(Settings::ControllerType::DualJoycon));
554 568
555 qt_config->setValue(QString("player_%1_body_color_left").arg(p), player.body_color_left); 569 WriteSetting(QString("player_%1_body_color_left").arg(p), player.body_color_left,
556 qt_config->setValue(QString("player_%1_body_color_right").arg(p), player.body_color_right); 570 Settings::JOYCON_BODY_NEON_BLUE);
557 qt_config->setValue(QString("player_%1_button_color_left").arg(p), 571 WriteSetting(QString("player_%1_body_color_right").arg(p), player.body_color_right,
558 player.button_color_left); 572 Settings::JOYCON_BODY_NEON_RED);
559 qt_config->setValue(QString("player_%1_button_color_right").arg(p), 573 WriteSetting(QString("player_%1_button_color_left").arg(p), player.button_color_left,
560 player.button_color_right); 574 Settings::JOYCON_BUTTONS_NEON_BLUE);
575 WriteSetting(QString("player_%1_button_color_right").arg(p), player.button_color_right,
576 Settings::JOYCON_BUTTONS_NEON_RED);
561 577
562 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 578 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
563 qt_config->setValue(QString("player_%1_").arg(p) + 579 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
564 QString::fromStdString(Settings::NativeButton::mapping[i]), 580 WriteSetting(QString("player_%1_").arg(p) +
565 QString::fromStdString(player.buttons[i])); 581 QString::fromStdString(Settings::NativeButton::mapping[i]),
582 QString::fromStdString(player.buttons[i]),
583 QString::fromStdString(default_param));
566 } 584 }
567 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 585 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
568 qt_config->setValue(QString("player_%1_").arg(p) + 586 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
569 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 587 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
570 QString::fromStdString(player.analogs[i])); 588 default_analogs[i][3], default_analogs[i][4], 0.5f);
589 WriteSetting(QString("player_%1_").arg(p) +
590 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
591 QString::fromStdString(player.analogs[i]),
592 QString::fromStdString(default_param));
571 } 593 }
572 } 594 }
573} 595}
574 596
575void Config::SaveDebugValues() { 597void Config::SaveDebugValues() {
576 qt_config->setValue("debug_pad_enabled", Settings::values.debug_pad_enabled); 598 WriteSetting("debug_pad_enabled", Settings::values.debug_pad_enabled, false);
577 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 599 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
578 qt_config->setValue(QString("debug_pad_") + 600 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
579 QString::fromStdString(Settings::NativeButton::mapping[i]), 601 WriteSetting(QString("debug_pad_") +
580 QString::fromStdString(Settings::values.debug_pad_buttons[i])); 602 QString::fromStdString(Settings::NativeButton::mapping[i]),
603 QString::fromStdString(Settings::values.debug_pad_buttons[i]),
604 QString::fromStdString(default_param));
581 } 605 }
582 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 606 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
583 qt_config->setValue(QString("debug_pad_") + 607 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
584 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 608 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
585 QString::fromStdString(Settings::values.debug_pad_analogs[i])); 609 default_analogs[i][3], default_analogs[i][4], 0.5f);
610 WriteSetting(QString("debug_pad_") +
611 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
612 QString::fromStdString(Settings::values.debug_pad_analogs[i]),
613 QString::fromStdString(default_param));
586 } 614 }
587} 615}
588 616
589void Config::SaveMouseValues() { 617void Config::SaveMouseValues() {
590 qt_config->setValue("mouse_enabled", Settings::values.mouse_enabled); 618 WriteSetting("mouse_enabled", Settings::values.mouse_enabled, false);
591 619
592 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 620 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
593 qt_config->setValue(QString("mouse_") + 621 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
594 QString::fromStdString(Settings::NativeMouseButton::mapping[i]), 622 WriteSetting(QString("mouse_") +
595 QString::fromStdString(Settings::values.mouse_buttons[i])); 623 QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
624 QString::fromStdString(Settings::values.mouse_buttons[i]),
625 QString::fromStdString(default_param));
596 } 626 }
597} 627}
598 628
599void Config::SaveTouchscreenValues() { 629void Config::SaveTouchscreenValues() {
600 qt_config->setValue("touchscreen_enabled", Settings::values.touchscreen.enabled); 630 WriteSetting("touchscreen_enabled", Settings::values.touchscreen.enabled, true);
601 qt_config->setValue("touchscreen_device", 631 WriteSetting("touchscreen_device", QString::fromStdString(Settings::values.touchscreen.device),
602 QString::fromStdString(Settings::values.touchscreen.device)); 632 "engine:emu_window");
603 633
604 qt_config->setValue("touchscreen_finger", Settings::values.touchscreen.finger); 634 WriteSetting("touchscreen_finger", Settings::values.touchscreen.finger, 0);
605 qt_config->setValue("touchscreen_angle", Settings::values.touchscreen.rotation_angle); 635 WriteSetting("touchscreen_angle", Settings::values.touchscreen.rotation_angle, 0);
606 qt_config->setValue("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x); 636 WriteSetting("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x, 15);
607 qt_config->setValue("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y); 637 WriteSetting("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y, 15);
608} 638}
609 639
610void Config::SaveValues() { 640void Config::SaveValues() {
@@ -615,88 +645,95 @@ void Config::SaveValues() {
615 SaveMouseValues(); 645 SaveMouseValues();
616 SaveTouchscreenValues(); 646 SaveTouchscreenValues();
617 647
618 qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device)); 648 WriteSetting("motion_device", QString::fromStdString(Settings::values.motion_device),
619 qt_config->setValue("keyboard_enabled", Settings::values.keyboard_enabled); 649 "engine:motion_emu,update_period:100,sensitivity:0.01");
650 WriteSetting("keyboard_enabled", Settings::values.keyboard_enabled, false);
620 651
621 qt_config->endGroup(); 652 qt_config->endGroup();
622 653
623 qt_config->beginGroup("Core"); 654 qt_config->beginGroup("Core");
624 qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit); 655 WriteSetting("use_cpu_jit", Settings::values.use_cpu_jit, true);
625 qt_config->setValue("use_multi_core", Settings::values.use_multi_core); 656 WriteSetting("use_multi_core", Settings::values.use_multi_core, false);
626 qt_config->endGroup(); 657 qt_config->endGroup();
627 658
628 qt_config->beginGroup("Renderer"); 659 qt_config->beginGroup("Renderer");
629 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); 660 WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0);
630 qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit); 661 WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
631 qt_config->setValue("frame_limit", Settings::values.frame_limit); 662 WriteSetting("frame_limit", Settings::values.frame_limit, 100);
632 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 663 WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true);
664 WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
665 WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
666 false);
633 667
634 // Cast to double because Qt's written float values are not human-readable 668 // Cast to double because Qt's written float values are not human-readable
635 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 669 WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
636 qt_config->setValue("bg_green", (double)Settings::values.bg_green); 670 WriteSetting("bg_green", (double)Settings::values.bg_green, 0.0);
637 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 671 WriteSetting("bg_blue", (double)Settings::values.bg_blue, 0.0);
638 qt_config->endGroup(); 672 qt_config->endGroup();
639 673
640 qt_config->beginGroup("Audio"); 674 qt_config->beginGroup("Audio");
641 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); 675 WriteSetting("output_engine", QString::fromStdString(Settings::values.sink_id), "auto");
642 qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); 676 WriteSetting("enable_audio_stretching", Settings::values.enable_audio_stretching, true);
643 qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id)); 677 WriteSetting("output_device", QString::fromStdString(Settings::values.audio_device_id), "auto");
644 qt_config->setValue("volume", Settings::values.volume); 678 WriteSetting("volume", Settings::values.volume, 1.0f);
645 qt_config->endGroup(); 679 qt_config->endGroup();
646 680
647 qt_config->beginGroup("Data Storage"); 681 qt_config->beginGroup("Data Storage");
648 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 682 WriteSetting("use_virtual_sd", Settings::values.use_virtual_sd, true);
649 qt_config->setValue("nand_directory", 683 WriteSetting("nand_directory",
650 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir))); 684 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
651 qt_config->setValue("sdmc_directory", 685 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
652 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir))); 686 WriteSetting("sdmc_directory",
687 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
688 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
653 qt_config->endGroup(); 689 qt_config->endGroup();
654 690
655 qt_config->beginGroup("System"); 691 qt_config->beginGroup("System");
656 qt_config->setValue("use_docked_mode", Settings::values.use_docked_mode); 692 WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
657 qt_config->setValue("enable_nfc", Settings::values.enable_nfc); 693 WriteSetting("current_user", Settings::values.current_user, 0);
658 qt_config->setValue("current_user", Settings::values.current_user); 694 WriteSetting("language_index", Settings::values.language_index, 1);
659 qt_config->setValue("language_index", Settings::values.language_index);
660 695
661 qt_config->setValue("rng_seed_enabled", Settings::values.rng_seed.has_value()); 696 WriteSetting("rng_seed_enabled", Settings::values.rng_seed.has_value(), false);
662 qt_config->setValue("rng_seed", Settings::values.rng_seed.value_or(0)); 697 WriteSetting("rng_seed", Settings::values.rng_seed.value_or(0), 0);
663 698
664 qt_config->setValue("custom_rtc_enabled", Settings::values.custom_rtc.has_value()); 699 WriteSetting("custom_rtc_enabled", Settings::values.custom_rtc.has_value(), false);
665 qt_config->setValue("custom_rtc", 700 WriteSetting("custom_rtc",
666 QVariant::fromValue<long long>( 701 QVariant::fromValue<long long>(
667 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count())); 702 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
703 0);
668 704
669 qt_config->endGroup(); 705 qt_config->endGroup();
670 706
671 qt_config->beginGroup("Miscellaneous"); 707 qt_config->beginGroup("Miscellaneous");
672 qt_config->setValue("log_filter", QString::fromStdString(Settings::values.log_filter)); 708 WriteSetting("log_filter", QString::fromStdString(Settings::values.log_filter), "*:Info");
673 qt_config->setValue("use_dev_keys", Settings::values.use_dev_keys); 709 WriteSetting("use_dev_keys", Settings::values.use_dev_keys, false);
674 qt_config->endGroup(); 710 qt_config->endGroup();
675 711
676 qt_config->beginGroup("Debugging"); 712 qt_config->beginGroup("Debugging");
677 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub); 713 WriteSetting("use_gdbstub", Settings::values.use_gdbstub, false);
678 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port); 714 WriteSetting("gdbstub_port", Settings::values.gdbstub_port, 24689);
679 qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args)); 715 WriteSetting("program_args", QString::fromStdString(Settings::values.program_args), "");
680 qt_config->setValue("dump_exefs", Settings::values.dump_exefs); 716 WriteSetting("dump_exefs", Settings::values.dump_exefs, false);
681 qt_config->setValue("dump_nso", Settings::values.dump_nso); 717 WriteSetting("dump_nso", Settings::values.dump_nso, false);
682 qt_config->endGroup(); 718 qt_config->endGroup();
683 719
684 qt_config->beginGroup("WebService"); 720 qt_config->beginGroup("WebService");
685 qt_config->setValue("enable_telemetry", Settings::values.enable_telemetry); 721 WriteSetting("enable_telemetry", Settings::values.enable_telemetry, true);
686 qt_config->setValue("web_api_url", QString::fromStdString(Settings::values.web_api_url)); 722 WriteSetting("web_api_url", QString::fromStdString(Settings::values.web_api_url),
687 qt_config->setValue("yuzu_username", QString::fromStdString(Settings::values.yuzu_username)); 723 "https://api.yuzu-emu.org");
688 qt_config->setValue("yuzu_token", QString::fromStdString(Settings::values.yuzu_token)); 724 WriteSetting("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
725 WriteSetting("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
689 qt_config->endGroup(); 726 qt_config->endGroup();
690 727
691 qt_config->beginWriteArray("DisabledAddOns"); 728 qt_config->beginWriteArray("DisabledAddOns");
692 int i = 0; 729 int i = 0;
693 for (const auto& elem : Settings::values.disabled_addons) { 730 for (const auto& elem : Settings::values.disabled_addons) {
694 qt_config->setArrayIndex(i); 731 qt_config->setArrayIndex(i);
695 qt_config->setValue("title_id", QVariant::fromValue<u64>(elem.first)); 732 WriteSetting("title_id", QVariant::fromValue<u64>(elem.first), 0);
696 qt_config->beginWriteArray("disabled"); 733 qt_config->beginWriteArray("disabled");
697 for (std::size_t j = 0; j < elem.second.size(); ++j) { 734 for (std::size_t j = 0; j < elem.second.size(); ++j) {
698 qt_config->setArrayIndex(static_cast<int>(j)); 735 qt_config->setArrayIndex(static_cast<int>(j));
699 qt_config->setValue("d", QString::fromStdString(elem.second[j])); 736 WriteSetting("d", QString::fromStdString(elem.second[j]), "");
700 } 737 }
701 qt_config->endArray(); 738 qt_config->endArray();
702 ++i; 739 ++i;
@@ -704,60 +741,93 @@ void Config::SaveValues() {
704 qt_config->endArray(); 741 qt_config->endArray();
705 742
706 qt_config->beginGroup("UI"); 743 qt_config->beginGroup("UI");
707 qt_config->setValue("theme", UISettings::values.theme); 744 WriteSetting("theme", UISettings::values.theme, UISettings::themes[0].second);
708 qt_config->setValue("enable_discord_presence", UISettings::values.enable_discord_presence); 745 WriteSetting("enable_discord_presence", UISettings::values.enable_discord_presence, true);
709 qt_config->setValue("screenshot_resolution_factor", 746 WriteSetting("screenshot_resolution_factor", UISettings::values.screenshot_resolution_factor,
710 UISettings::values.screenshot_resolution_factor); 747 0);
711 qt_config->setValue("select_user_on_boot", UISettings::values.select_user_on_boot); 748 WriteSetting("select_user_on_boot", UISettings::values.select_user_on_boot, false);
712 749
713 qt_config->beginGroup("UIGameList"); 750 qt_config->beginGroup("UIGameList");
714 qt_config->setValue("show_unknown", UISettings::values.show_unknown); 751 WriteSetting("show_unknown", UISettings::values.show_unknown, true);
715 qt_config->setValue("show_add_ons", UISettings::values.show_add_ons); 752 WriteSetting("show_add_ons", UISettings::values.show_add_ons, true);
716 qt_config->setValue("icon_size", UISettings::values.icon_size); 753 WriteSetting("icon_size", UISettings::values.icon_size, 64);
717 qt_config->setValue("row_1_text_id", UISettings::values.row_1_text_id); 754 WriteSetting("row_1_text_id", UISettings::values.row_1_text_id, 3);
718 qt_config->setValue("row_2_text_id", UISettings::values.row_2_text_id); 755 WriteSetting("row_2_text_id", UISettings::values.row_2_text_id, 2);
719 qt_config->endGroup(); 756 qt_config->endGroup();
720 757
721 qt_config->beginGroup("UILayout"); 758 qt_config->beginGroup("UILayout");
722 qt_config->setValue("geometry", UISettings::values.geometry); 759 WriteSetting("geometry", UISettings::values.geometry);
723 qt_config->setValue("state", UISettings::values.state); 760 WriteSetting("state", UISettings::values.state);
724 qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry); 761 WriteSetting("geometryRenderWindow", UISettings::values.renderwindow_geometry);
725 qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state); 762 WriteSetting("gameListHeaderState", UISettings::values.gamelist_header_state);
726 qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry); 763 WriteSetting("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
727 qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible); 764 WriteSetting("microProfileDialogVisible", UISettings::values.microprofile_visible, false);
728 qt_config->endGroup(); 765 qt_config->endGroup();
729 766
730 qt_config->beginGroup("Paths"); 767 qt_config->beginGroup("Paths");
731 qt_config->setValue("romsPath", UISettings::values.roms_path); 768 WriteSetting("romsPath", UISettings::values.roms_path);
732 qt_config->setValue("symbolsPath", UISettings::values.symbols_path); 769 WriteSetting("symbolsPath", UISettings::values.symbols_path);
733 qt_config->setValue("screenshotPath", UISettings::values.screenshot_path); 770 WriteSetting("screenshotPath", UISettings::values.screenshot_path);
734 qt_config->setValue("gameListRootDir", UISettings::values.gamedir); 771 WriteSetting("gameListRootDir", UISettings::values.game_directory_path, ".");
735 qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan); 772 WriteSetting("gameListDeepScan", UISettings::values.game_directory_deepscan, false);
736 qt_config->setValue("recentFiles", UISettings::values.recent_files); 773 WriteSetting("recentFiles", UISettings::values.recent_files);
737 qt_config->endGroup(); 774 qt_config->endGroup();
738 775
739 qt_config->beginGroup("Shortcuts"); 776 qt_config->beginGroup("Shortcuts");
740 for (auto shortcut : UISettings::values.shortcuts) { 777 // Lengths of UISettings::values.shortcuts & default_hotkeys are same.
741 qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first); 778 // However, their ordering must also be the same.
742 qt_config->setValue(shortcut.first + "/Context", shortcut.second.second); 779 for (std::size_t i = 0; i < default_hotkeys.size(); i++) {
780 auto [name, group, shortcut] = UISettings::values.shortcuts[i];
781 qt_config->beginGroup(group);
782 qt_config->beginGroup(name);
783 WriteSetting("KeySeq", shortcut.first, default_hotkeys[i].shortcut.first);
784 WriteSetting("Context", shortcut.second, default_hotkeys[i].shortcut.second);
785 qt_config->endGroup();
786 qt_config->endGroup();
743 } 787 }
744 qt_config->endGroup(); 788 qt_config->endGroup();
745 789
746 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode); 790 WriteSetting("singleWindowMode", UISettings::values.single_window_mode, true);
747 qt_config->setValue("fullscreen", UISettings::values.fullscreen); 791 WriteSetting("fullscreen", UISettings::values.fullscreen, false);
748 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar); 792 WriteSetting("displayTitleBars", UISettings::values.display_titlebar, true);
749 qt_config->setValue("showFilterBar", UISettings::values.show_filter_bar); 793 WriteSetting("showFilterBar", UISettings::values.show_filter_bar, true);
750 qt_config->setValue("showStatusBar", UISettings::values.show_status_bar); 794 WriteSetting("showStatusBar", UISettings::values.show_status_bar, true);
751 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing); 795 WriteSetting("confirmClose", UISettings::values.confirm_before_closing, true);
752 qt_config->setValue("firstStart", UISettings::values.first_start); 796 WriteSetting("firstStart", UISettings::values.first_start, true);
753 qt_config->setValue("calloutFlags", UISettings::values.callout_flags); 797 WriteSetting("calloutFlags", UISettings::values.callout_flags, 0);
754 qt_config->setValue("showConsole", UISettings::values.show_console); 798 WriteSetting("showConsole", UISettings::values.show_console, false);
755 qt_config->setValue("profileIndex", UISettings::values.profile_index); 799 WriteSetting("profileIndex", UISettings::values.profile_index, 0);
756 qt_config->endGroup(); 800 qt_config->endGroup();
757} 801}
758 802
803QVariant Config::ReadSetting(const QString& name) const {
804 return qt_config->value(name);
805}
806
807QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) const {
808 QVariant result;
809 if (qt_config->value(name + "/default", false).toBool()) {
810 result = default_value;
811 } else {
812 result = qt_config->value(name, default_value);
813 }
814 return result;
815}
816
817void Config::WriteSetting(const QString& name, const QVariant& value) {
818 qt_config->setValue(name, value);
819}
820
821void Config::WriteSetting(const QString& name, const QVariant& value,
822 const QVariant& default_value) {
823 qt_config->setValue(name + "/default", value == default_value);
824 qt_config->setValue(name, value);
825}
826
759void Config::Reload() { 827void Config::Reload() {
760 ReadValues(); 828 ReadValues();
829 // To apply default value changes
830 SaveValues();
761 Settings::Apply(); 831 Settings::Apply();
762} 832}
763 833
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index e73ad19bb..221d2364c 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -9,6 +9,7 @@
9#include <string> 9#include <string>
10#include <QVariant> 10#include <QVariant>
11#include "core/settings.h" 11#include "core/settings.h"
12#include "yuzu/ui_settings.h"
12 13
13class QSettings; 14class QSettings;
14 15
@@ -42,6 +43,13 @@ private:
42 void SaveMouseValues(); 43 void SaveMouseValues();
43 void SaveTouchscreenValues(); 44 void SaveTouchscreenValues();
44 45
46 QVariant ReadSetting(const QString& name) const;
47 QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
48 void WriteSetting(const QString& name, const QVariant& value);
49 void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
50
51 static const std::array<UISettings::Shortcut, 15> default_hotkeys;
52
45 std::unique_ptr<QSettings> qt_config; 53 std::unique_ptr<QSettings> qt_config;
46 std::string qt_config_loc; 54 std::string qt_config_loc;
47}; 55};
diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui
index 3f03f0b77..267717bc9 100644
--- a/src/yuzu/configuration/configure.ui
+++ b/src/yuzu/configuration/configure.ui
@@ -7,9 +7,15 @@
7 <x>0</x> 7 <x>0</x>
8 <y>0</y> 8 <y>0</y>
9 <width>382</width> 9 <width>382</width>
10 <height>241</height> 10 <height>650</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="minimumSize">
14 <size>
15 <width>0</width>
16 <height>650</height>
17 </size>
18 </property>
13 <property name="windowTitle"> 19 <property name="windowTitle">
14 <string>yuzu Configuration</string> 20 <string>yuzu Configuration</string>
15 </property> 21 </property>
@@ -62,6 +68,11 @@
62 <string>Input</string> 68 <string>Input</string>
63 </attribute> 69 </attribute>
64 </widget> 70 </widget>
71 <widget class="ConfigureHotkeys" name="hotkeysTab">
72 <attribute name="title">
73 <string>Hotkeys</string>
74 </attribute>
75 </widget>
65 <widget class="ConfigureGraphics" name="graphicsTab"> 76 <widget class="ConfigureGraphics" name="graphicsTab">
66 <attribute name="title"> 77 <attribute name="title">
67 <string>Graphics</string> 78 <string>Graphics</string>
@@ -150,6 +161,12 @@
150 <header>configuration/configure_input_simple.h</header> 161 <header>configuration/configure_input_simple.h</header>
151 <container>1</container> 162 <container>1</container>
152 </customwidget> 163 </customwidget>
164 <customwidget>
165 <class>ConfigureHotkeys</class>
166 <extends>QWidget</extends>
167 <header>configuration/configure_hotkeys.h</header>
168 <container>1</container>
169 </customwidget>
153 </customwidgets> 170 </customwidgets>
154 <resources/> 171 <resources/>
155 <connections> 172 <connections>
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index aa7de7b54..550cf9dca 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -7,7 +7,6 @@
7#include "common/file_util.h" 7#include "common/file_util.h"
8#include "common/logging/backend.h" 8#include "common/logging/backend.h"
9#include "common/logging/filter.h" 9#include "common/logging/filter.h"
10#include "common/logging/log.h"
11#include "core/core.h" 10#include "core/core.h"
12#include "core/settings.h" 11#include "core/settings.h"
13#include "ui_configure_debug.h" 12#include "ui_configure_debug.h"
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index d802443d0..51bd1f121 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -8,20 +8,22 @@
8#include "ui_configure.h" 8#include "ui_configure.h"
9#include "yuzu/configuration/config.h" 9#include "yuzu/configuration/config.h"
10#include "yuzu/configuration/configure_dialog.h" 10#include "yuzu/configuration/configure_dialog.h"
11#include "yuzu/configuration/configure_input_player.h"
11#include "yuzu/hotkeys.h" 12#include "yuzu/hotkeys.h"
12 13
13ConfigureDialog::ConfigureDialog(QWidget* parent, const HotkeyRegistry& registry) 14ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
14 : QDialog(parent), ui(new Ui::ConfigureDialog) { 15 : QDialog(parent), registry(registry), ui(new Ui::ConfigureDialog) {
15 ui->setupUi(this); 16 ui->setupUi(this);
16 ui->generalTab->PopulateHotkeyList(registry); 17 ui->hotkeysTab->Populate(registry);
17 this->setConfiguration(); 18 this->setConfiguration();
18 this->PopulateSelectionList(); 19 this->PopulateSelectionList();
19 connect(ui->selectorList, &QListWidget::itemSelectionChanged, this, 20 connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
20 &ConfigureDialog::UpdateVisibleTabs); 21 &ConfigureDialog::UpdateVisibleTabs);
21
22 adjustSize(); 22 adjustSize();
23
24 ui->selectorList->setCurrentRow(0); 23 ui->selectorList->setCurrentRow(0);
24
25 // Synchronise lists upon initialisation
26 ui->hotkeysTab->EmitHotkeysChanged();
25} 27}
26 28
27ConfigureDialog::~ConfigureDialog() = default; 29ConfigureDialog::~ConfigureDialog() = default;
@@ -34,11 +36,13 @@ void ConfigureDialog::applyConfiguration() {
34 ui->systemTab->applyConfiguration(); 36 ui->systemTab->applyConfiguration();
35 ui->profileManagerTab->applyConfiguration(); 37 ui->profileManagerTab->applyConfiguration();
36 ui->inputTab->applyConfiguration(); 38 ui->inputTab->applyConfiguration();
39 ui->hotkeysTab->applyConfiguration(registry);
37 ui->graphicsTab->applyConfiguration(); 40 ui->graphicsTab->applyConfiguration();
38 ui->audioTab->applyConfiguration(); 41 ui->audioTab->applyConfiguration();
39 ui->debugTab->applyConfiguration(); 42 ui->debugTab->applyConfiguration();
40 ui->webTab->applyConfiguration(); 43 ui->webTab->applyConfiguration();
41 Settings::Apply(); 44 Settings::Apply();
45 Settings::LogSettings();
42} 46}
43 47
44void ConfigureDialog::PopulateSelectionList() { 48void ConfigureDialog::PopulateSelectionList() {
@@ -46,7 +50,7 @@ void ConfigureDialog::PopulateSelectionList() {
46 {{tr("General"), {tr("General"), tr("Web"), tr("Debug"), tr("Game List")}}, 50 {{tr("General"), {tr("General"), tr("Web"), tr("Debug"), tr("Game List")}},
47 {tr("System"), {tr("System"), tr("Profiles"), tr("Audio")}}, 51 {tr("System"), {tr("System"), tr("Profiles"), tr("Audio")}},
48 {tr("Graphics"), {tr("Graphics")}}, 52 {tr("Graphics"), {tr("Graphics")}},
49 {tr("Controls"), {tr("Input")}}}}; 53 {tr("Controls"), {tr("Input"), tr("Hotkeys")}}}};
50 54
51 for (const auto& entry : items) { 55 for (const auto& entry : items) {
52 auto* const item = new QListWidgetItem(entry.first); 56 auto* const item = new QListWidgetItem(entry.first);
@@ -65,6 +69,7 @@ void ConfigureDialog::UpdateVisibleTabs() {
65 {tr("System"), ui->systemTab}, 69 {tr("System"), ui->systemTab},
66 {tr("Profiles"), ui->profileManagerTab}, 70 {tr("Profiles"), ui->profileManagerTab},
67 {tr("Input"), ui->inputTab}, 71 {tr("Input"), ui->inputTab},
72 {tr("Hotkeys"), ui->hotkeysTab},
68 {tr("Graphics"), ui->graphicsTab}, 73 {tr("Graphics"), ui->graphicsTab},
69 {tr("Audio"), ui->audioTab}, 74 {tr("Audio"), ui->audioTab},
70 {tr("Debug"), ui->debugTab}, 75 {tr("Debug"), ui->debugTab},
diff --git a/src/yuzu/configuration/configure_dialog.h b/src/yuzu/configuration/configure_dialog.h
index 243d9fa09..2363ba584 100644
--- a/src/yuzu/configuration/configure_dialog.h
+++ b/src/yuzu/configuration/configure_dialog.h
@@ -17,7 +17,7 @@ class ConfigureDialog : public QDialog {
17 Q_OBJECT 17 Q_OBJECT
18 18
19public: 19public:
20 explicit ConfigureDialog(QWidget* parent, const HotkeyRegistry& registry); 20 explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry);
21 ~ConfigureDialog() override; 21 ~ConfigureDialog() override;
22 22
23 void applyConfiguration(); 23 void applyConfiguration();
@@ -28,4 +28,5 @@ private:
28 void PopulateSelectionList(); 28 void PopulateSelectionList();
29 29
30 std::unique_ptr<Ui::ConfigureDialog> ui; 30 std::unique_ptr<Ui::ConfigureDialog> ui;
31 HotkeyRegistry& registry;
31}; 32};
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 4116b6cd7..e48f4f5a3 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -28,25 +28,19 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
28ConfigureGeneral::~ConfigureGeneral() = default; 28ConfigureGeneral::~ConfigureGeneral() = default;
29 29
30void ConfigureGeneral::setConfiguration() { 30void ConfigureGeneral::setConfiguration() {
31 ui->toggle_deepscan->setChecked(UISettings::values.gamedir_deepscan); 31 ui->toggle_deepscan->setChecked(UISettings::values.game_directory_deepscan);
32 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); 32 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
33 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); 33 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
34 ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); 34 ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
35 ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit); 35 ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
36 ui->enable_nfc->setChecked(Settings::values.enable_nfc);
37}
38
39void ConfigureGeneral::PopulateHotkeyList(const HotkeyRegistry& registry) {
40 ui->widget->Populate(registry);
41} 36}
42 37
43void ConfigureGeneral::applyConfiguration() { 38void ConfigureGeneral::applyConfiguration() {
44 UISettings::values.gamedir_deepscan = ui->toggle_deepscan->isChecked(); 39 UISettings::values.game_directory_deepscan = ui->toggle_deepscan->isChecked();
45 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); 40 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
46 UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); 41 UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked();
47 UISettings::values.theme = 42 UISettings::values.theme =
48 ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); 43 ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
49 44
50 Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked(); 45 Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
51 Settings::values.enable_nfc = ui->enable_nfc->isChecked();
52} 46}
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h
index 59738af40..df41d995b 100644
--- a/src/yuzu/configuration/configure_general.h
+++ b/src/yuzu/configuration/configure_general.h
@@ -20,7 +20,6 @@ public:
20 explicit ConfigureGeneral(QWidget* parent = nullptr); 20 explicit ConfigureGeneral(QWidget* parent = nullptr);
21 ~ConfigureGeneral() override; 21 ~ConfigureGeneral() override;
22 22
23 void PopulateHotkeyList(const HotkeyRegistry& registry);
24 void applyConfiguration(); 23 void applyConfiguration();
25 24
26private: 25private:
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index dff0ad5d0..1a5721fe7 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -71,26 +71,6 @@
71 </widget> 71 </widget>
72 </item> 72 </item>
73 <item> 73 <item>
74 <widget class="QGroupBox" name="EmulationGroupBox">
75 <property name="title">
76 <string>Emulation</string>
77 </property>
78 <layout class="QHBoxLayout" name="EmulationHorizontalLayout">
79 <item>
80 <layout class="QVBoxLayout" name="EmulationVerticalLayout">
81 <item>
82 <widget class="QCheckBox" name="enable_nfc">
83 <property name="text">
84 <string>Enable NFC</string>
85 </property>
86 </widget>
87 </item>
88 </layout>
89 </item>
90 </layout>
91 </widget>
92 </item>
93 <item>
94 <widget class="QGroupBox" name="theme_group_box"> 74 <widget class="QGroupBox" name="theme_group_box">
95 <property name="title"> 75 <property name="title">
96 <string>Theme</string> 76 <string>Theme</string>
@@ -118,22 +98,6 @@
118 </widget> 98 </widget>
119 </item> 99 </item>
120 <item> 100 <item>
121 <widget class="QGroupBox" name="HotKeysGroupBox">
122 <property name="title">
123 <string>Hotkeys</string>
124 </property>
125 <layout class="QHBoxLayout" name="HotKeysHorizontalLayout">
126 <item>
127 <layout class="QVBoxLayout" name="HotKeysVerticalLayout">
128 <item>
129 <widget class="GHotkeysDialog" name="widget" native="true"/>
130 </item>
131 </layout>
132 </item>
133 </layout>
134 </widget>
135 </item>
136 <item>
137 <spacer name="verticalSpacer"> 101 <spacer name="verticalSpacer">
138 <property name="orientation"> 102 <property name="orientation">
139 <enum>Qt::Vertical</enum> 103 <enum>Qt::Vertical</enum>
@@ -150,14 +114,6 @@
150 </item> 114 </item>
151 </layout> 115 </layout>
152 </widget> 116 </widget>
153 <customwidgets>
154 <customwidget>
155 <class>GHotkeysDialog</class>
156 <extends>QWidget</extends>
157 <header>hotkeys.h</header>
158 <container>1</container>
159 </customwidget>
160 </customwidgets>
161 <resources/> 117 <resources/>
162 <connections/> 118 <connections/>
163</ui> 119</ui>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 8290b4384..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -62,9 +62,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
62 const QColor new_bg_color = QColorDialog::getColor(bg_color); 62 const QColor new_bg_color = QColorDialog::getColor(bg_color);
63 if (!new_bg_color.isValid()) 63 if (!new_bg_color.isValid())
64 return; 64 return;
65 bg_color = new_bg_color; 65 UpdateBackgroundColorButton(new_bg_color);
66 ui->bg_button->setStyleSheet(
67 QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
68 }); 66 });
69} 67}
70 68
@@ -75,11 +73,12 @@ void ConfigureGraphics::setConfiguration() {
75 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); 73 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
76 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); 74 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
77 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
78 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
79 bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
80 Settings::values.bg_blue); 79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
81 ui->bg_button->setStyleSheet( 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
82 QString("QPushButton { background-color: %1 }").arg(bg_color.name())); 81 Settings::values.bg_blue));
83} 82}
84 83
85void ConfigureGraphics::applyConfiguration() { 84void ConfigureGraphics::applyConfiguration() {
@@ -87,8 +86,21 @@ void ConfigureGraphics::applyConfiguration() {
87 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 86 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
88 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); 87 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
89 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
91 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
92 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
93 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
94} 96}
97
98void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
99 bg_color = color;
100
101 QPixmap pixmap(ui->bg_button->size());
102 pixmap.fill(bg_color);
103
104 const QIcon color_icon(pixmap);
105 ui->bg_button->setIcon(color_icon);
106}
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index d6ffc6fde..f2799822d 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -23,6 +23,8 @@ public:
23private: 23private:
24 void setConfiguration(); 24 void setConfiguration();
25 25
26 void UpdateBackgroundColorButton(QColor color);
27
26 std::unique_ptr<Ui::ConfigureGraphics> ui; 28 std::unique_ptr<Ui::ConfigureGraphics> ui;
27 QColor bg_color; 29 QColor bg_color;
28}; 30};
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index e278cdd05..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -50,6 +50,13 @@
50 </layout> 50 </layout>
51 </item> 51 </item>
52 <item> 52 <item>
53 <widget class="QCheckBox" name="use_disk_shader_cache">
54 <property name="text">
55 <string>Use disk shader cache</string>
56 </property>
57 </widget>
58 </item>
59 <item>
53 <widget class="QCheckBox" name="use_accurate_gpu_emulation"> 60 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
54 <property name="text"> 61 <property name="text">
55 <string>Use accurate GPU emulation (slow)</string> 62 <string>Use accurate GPU emulation (slow)</string>
@@ -57,6 +64,13 @@
57 </widget> 64 </widget>
58 </item> 65 </item>
59 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
60 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
61 <item> 75 <item>
62 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
new file mode 100644
index 000000000..bfb562535
--- /dev/null
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -0,0 +1,121 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <QMessageBox>
6#include <QStandardItemModel>
7#include "core/settings.h"
8#include "ui_configure_hotkeys.h"
9#include "yuzu/configuration/configure_hotkeys.h"
10#include "yuzu/hotkeys.h"
11#include "yuzu/util/sequence_dialog/sequence_dialog.h"
12
13ConfigureHotkeys::ConfigureHotkeys(QWidget* parent)
14 : QWidget(parent), ui(std::make_unique<Ui::ConfigureHotkeys>()) {
15 ui->setupUi(this);
16 setFocusPolicy(Qt::ClickFocus);
17
18 model = new QStandardItemModel(this);
19 model->setColumnCount(3);
20 model->setHorizontalHeaderLabels({tr("Action"), tr("Hotkey"), tr("Context")});
21
22 connect(ui->hotkey_list, &QTreeView::doubleClicked, this, &ConfigureHotkeys::Configure);
23 ui->hotkey_list->setModel(model);
24
25 // TODO(Kloen): Make context configurable as well (hiding the column for now)
26 ui->hotkey_list->hideColumn(2);
27
28 ui->hotkey_list->setColumnWidth(0, 200);
29 ui->hotkey_list->resizeColumnToContents(1);
30}
31
32ConfigureHotkeys::~ConfigureHotkeys() = default;
33
34void ConfigureHotkeys::EmitHotkeysChanged() {
35 emit HotkeysChanged(GetUsedKeyList());
36}
37
38QList<QKeySequence> ConfigureHotkeys::GetUsedKeyList() const {
39 QList<QKeySequence> list;
40 for (int r = 0; r < model->rowCount(); r++) {
41 const QStandardItem* parent = model->item(r, 0);
42 for (int r2 = 0; r2 < parent->rowCount(); r2++) {
43 const QStandardItem* keyseq = parent->child(r2, 1);
44 list << QKeySequence::fromString(keyseq->text(), QKeySequence::NativeText);
45 }
46 }
47 return list;
48}
49
50void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
51 for (const auto& group : registry.hotkey_groups) {
52 auto* parent_item = new QStandardItem(group.first);
53 parent_item->setEditable(false);
54 for (const auto& hotkey : group.second) {
55 auto* action = new QStandardItem(hotkey.first);
56 auto* keyseq =
57 new QStandardItem(hotkey.second.keyseq.toString(QKeySequence::NativeText));
58 action->setEditable(false);
59 keyseq->setEditable(false);
60 parent_item->appendRow({action, keyseq});
61 }
62 model->appendRow(parent_item);
63 }
64
65 ui->hotkey_list->expandAll();
66}
67
68void ConfigureHotkeys::Configure(QModelIndex index) {
69 if (index.parent() == QModelIndex())
70 return;
71
72 index = index.sibling(index.row(), 1);
73 auto* model = ui->hotkey_list->model();
74 auto previous_key = model->data(index);
75
76 auto* hotkey_dialog = new SequenceDialog;
77 int return_code = hotkey_dialog->exec();
78
79 auto key_sequence = hotkey_dialog->GetSequence();
80
81 if (return_code == QDialog::Rejected || key_sequence.isEmpty())
82 return;
83
84 if (IsUsedKey(key_sequence) && key_sequence != QKeySequence(previous_key.toString())) {
85 QMessageBox::critical(this, tr("Error in inputted key"),
86 tr("You're using a key that's already bound."));
87 } else {
88 model->setData(index, key_sequence.toString(QKeySequence::NativeText));
89 EmitHotkeysChanged();
90 }
91}
92
93bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) {
94 return GetUsedKeyList().contains(key_sequence);
95}
96
97void ConfigureHotkeys::applyConfiguration(HotkeyRegistry& registry) {
98 for (int key_id = 0; key_id < model->rowCount(); key_id++) {
99 const QStandardItem* parent = model->item(key_id, 0);
100 for (int key_column_id = 0; key_column_id < parent->rowCount(); key_column_id++) {
101 const QStandardItem* action = parent->child(key_column_id, 0);
102 const QStandardItem* keyseq = parent->child(key_column_id, 1);
103 for (auto& [group, sub_actions] : registry.hotkey_groups) {
104 if (group != parent->text())
105 continue;
106 for (auto& [action_name, hotkey] : sub_actions) {
107 if (action_name != action->text())
108 continue;
109 hotkey.keyseq = QKeySequence(keyseq->text());
110 }
111 }
112 }
113 }
114
115 registry.SaveHotkeys();
116 Settings::Apply();
117}
118
119void ConfigureHotkeys::retranslateUi() {
120 ui->retranslateUi(this);
121}
diff --git a/src/yuzu/configuration/configure_hotkeys.h b/src/yuzu/configuration/configure_hotkeys.h
new file mode 100644
index 000000000..cd203aad6
--- /dev/null
+++ b/src/yuzu/configuration/configure_hotkeys.h
@@ -0,0 +1,48 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9#include "core/settings.h"
10
11namespace Ui {
12class ConfigureHotkeys;
13}
14
15class HotkeyRegistry;
16class QStandardItemModel;
17
18class ConfigureHotkeys : public QWidget {
19 Q_OBJECT
20
21public:
22 explicit ConfigureHotkeys(QWidget* parent = nullptr);
23 ~ConfigureHotkeys() override;
24
25 void applyConfiguration(HotkeyRegistry& registry);
26 void retranslateUi();
27
28 void EmitHotkeysChanged();
29
30 /**
31 * Populates the hotkey list widget using data from the provided registry.
32 * Called everytime the Configure dialog is opened.
33 * @param registry The HotkeyRegistry whose data is used to populate the list.
34 */
35 void Populate(const HotkeyRegistry& registry);
36
37signals:
38 void HotkeysChanged(QList<QKeySequence> new_key_list);
39
40private:
41 void Configure(QModelIndex index);
42 bool IsUsedKey(QKeySequence key_sequence);
43 QList<QKeySequence> GetUsedKeyList() const;
44
45 std::unique_ptr<Ui::ConfigureHotkeys> ui;
46
47 QStandardItemModel* model;
48};
diff --git a/src/yuzu/configuration/configure_hotkeys.ui b/src/yuzu/configuration/configure_hotkeys.ui
new file mode 100644
index 000000000..0d0b70f38
--- /dev/null
+++ b/src/yuzu/configuration/configure_hotkeys.ui
@@ -0,0 +1,42 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ConfigureHotkeys</class>
4 <widget class="QWidget" name="ConfigureHotkeys">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>363</width>
10 <height>388</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Hotkey Settings</string>
15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout">
17 <item>
18 <layout class="QVBoxLayout" name="verticalLayout_2">
19 <item>
20 <widget class="QLabel" name="label_2">
21 <property name="text">
22 <string>Double-click on a binding to change it.</string>
23 </property>
24 </widget>
25 </item>
26 <item>
27 <widget class="QTreeView" name="hotkey_list">
28 <property name="editTriggers">
29 <set>QAbstractItemView::NoEditTriggers</set>
30 </property>
31 <property name="sortingEnabled">
32 <bool>false</bool>
33 </property>
34 </widget>
35 </item>
36 </layout>
37 </item>
38 </layout>
39 </widget>
40 <resources/>
41 <connections/>
42</ui> \ No newline at end of file
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index ba2b32c4f..c5a245ebe 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -7,6 +7,7 @@
7#include <utility> 7#include <utility>
8#include <QColorDialog> 8#include <QColorDialog>
9#include <QGridLayout> 9#include <QGridLayout>
10#include <QKeyEvent>
10#include <QMenu> 11#include <QMenu>
11#include <QMessageBox> 12#include <QMessageBox>
12#include <QTimer> 13#include <QTimer>
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index 7a53f6715..ade8d4435 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -11,17 +11,21 @@
11#include <string> 11#include <string>
12 12
13#include <QDialog> 13#include <QDialog>
14#include <QKeyEvent>
15 14
16#include "common/param_package.h" 15#include "common/param_package.h"
17#include "core/settings.h" 16#include "core/settings.h"
18#include "input_common/main.h"
19#include "ui_configure_input.h" 17#include "ui_configure_input.h"
20 18
19class QKeyEvent;
21class QPushButton; 20class QPushButton;
22class QString; 21class QString;
23class QTimer; 22class QTimer;
24 23
24namespace InputCommon::Polling {
25class DevicePoller;
26enum class DeviceType;
27} // namespace InputCommon::Polling
28
25namespace Ui { 29namespace Ui {
26class ConfigureInputPlayer; 30class ConfigureInputPlayer;
27} 31}
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp
index e13d2eac8..022b94609 100644
--- a/src/yuzu/configuration/configure_per_general.cpp
+++ b/src/yuzu/configuration/configure_per_general.cpp
@@ -8,7 +8,6 @@
8 8
9#include <QHeaderView> 9#include <QHeaderView>
10#include <QMenu> 10#include <QMenu>
11#include <QMessageBox>
12#include <QStandardItemModel> 11#include <QStandardItemModel>
13#include <QString> 12#include <QString>
14#include <QTimer> 13#include <QTimer>
diff --git a/src/yuzu/configuration/configure_per_general.h b/src/yuzu/configuration/configure_per_general.h
index a4494446c..f8a7d5326 100644
--- a/src/yuzu/configuration/configure_per_general.h
+++ b/src/yuzu/configuration/configure_per_general.h
@@ -7,16 +7,16 @@
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9 9
10#include <QKeyEvent> 10#include <QDialog>
11#include <QList> 11#include <QList>
12#include <QWidget>
13 12
14#include "core/file_sys/vfs_types.h" 13#include "core/file_sys/vfs_types.h"
15 14
16class QTreeView;
17class QGraphicsScene; 15class QGraphicsScene;
18class QStandardItem; 16class QStandardItem;
19class QStandardItemModel; 17class QStandardItemModel;
18class QTreeView;
19class QVBoxLayout;
20 20
21namespace Ui { 21namespace Ui {
22class ConfigurePerGameGeneral; 22class ConfigurePerGameGeneral;
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index 94e27349d..10645a2b3 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -2,23 +2,19 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <array>
6#include <chrono>
7#include <optional>
8
6#include <QFileDialog> 9#include <QFileDialog>
7#include <QGraphicsItem> 10#include <QGraphicsItem>
8#include <QGraphicsScene>
9#include <QHeaderView>
10#include <QMessageBox> 11#include <QMessageBox>
11#include <QStandardItemModel>
12#include <QTreeView>
13#include <QVBoxLayout>
14#include "common/assert.h" 12#include "common/assert.h"
15#include "common/file_util.h" 13#include "common/file_util.h"
16#include "common/string_util.h"
17#include "core/core.h" 14#include "core/core.h"
18#include "core/settings.h" 15#include "core/settings.h"
19#include "ui_configure_system.h" 16#include "ui_configure_system.h"
20#include "yuzu/configuration/configure_system.h" 17#include "yuzu/configuration/configure_system.h"
21#include "yuzu/util/limitable_input_dialog.h"
22 18
23namespace { 19namespace {
24constexpr std::array<int, 12> days_in_month = {{ 20constexpr std::array<int, 12> days_in_month = {{
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.h b/src/yuzu/configuration/configure_touchscreen_advanced.h
index 41cd255fb..3d0772c87 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.h
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.h
@@ -6,8 +6,6 @@
6 6
7#include <memory> 7#include <memory>
8#include <QDialog> 8#include <QDialog>
9#include <QWidget>
10#include "yuzu/configuration/config.h"
11 9
12namespace Ui { 10namespace Ui {
13class ConfigureTouchscreenAdvanced; 11class ConfigureTouchscreenAdvanced;
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
deleted file mode 100644
index 209798521..000000000
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ /dev/null
@@ -1,461 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <QBoxLayout>
6#include <QComboBox>
7#include <QDebug>
8#include <QFileDialog>
9#include <QLabel>
10#include <QMouseEvent>
11#include <QPushButton>
12#include <QScrollArea>
13#include <QSpinBox>
14#include "common/vector_math.h"
15#include "core/core.h"
16#include "core/memory.h"
17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/gpu.h"
19#include "video_core/textures/decoders.h"
20#include "video_core/textures/texture.h"
21#include "yuzu/debugger/graphics/graphics_surface.h"
22#include "yuzu/util/spinbox.h"
23
24static Tegra::Texture::TextureFormat ConvertToTextureFormat(
25 Tegra::RenderTargetFormat render_target_format) {
26 switch (render_target_format) {
27 case Tegra::RenderTargetFormat::RGBA8_UNORM:
28 return Tegra::Texture::TextureFormat::A8R8G8B8;
29 case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
30 return Tegra::Texture::TextureFormat::A2B10G10R10;
31 default:
32 UNIMPLEMENTED_MSG("Unimplemented RT format");
33 return Tegra::Texture::TextureFormat::A8R8G8B8;
34 }
35}
36
37SurfacePicture::SurfacePicture(QWidget* parent, GraphicsSurfaceWidget* surface_widget_)
38 : QLabel(parent), surface_widget(surface_widget_) {}
39
40SurfacePicture::~SurfacePicture() = default;
41
42void SurfacePicture::mousePressEvent(QMouseEvent* event) {
43 // Only do something while the left mouse button is held down
44 if (!(event->buttons() & Qt::LeftButton))
45 return;
46
47 if (pixmap() == nullptr)
48 return;
49
50 if (surface_widget)
51 surface_widget->Pick(event->x() * pixmap()->width() / width(),
52 event->y() * pixmap()->height() / height());
53}
54
55void SurfacePicture::mouseMoveEvent(QMouseEvent* event) {
56 // We also want to handle the event if the user moves the mouse while holding down the LMB
57 mousePressEvent(event);
58}
59
60GraphicsSurfaceWidget::GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext> debug_context,
61 QWidget* parent)
62 : BreakPointObserverDock(debug_context, tr("Maxwell Surface Viewer"), parent),
63 surface_source(Source::RenderTarget0) {
64 setObjectName("MaxwellSurface");
65
66 surface_source_list = new QComboBox;
67 surface_source_list->addItem(tr("Render Target 0"));
68 surface_source_list->addItem(tr("Render Target 1"));
69 surface_source_list->addItem(tr("Render Target 2"));
70 surface_source_list->addItem(tr("Render Target 3"));
71 surface_source_list->addItem(tr("Render Target 4"));
72 surface_source_list->addItem(tr("Render Target 5"));
73 surface_source_list->addItem(tr("Render Target 6"));
74 surface_source_list->addItem(tr("Render Target 7"));
75 surface_source_list->addItem(tr("Z Buffer"));
76 surface_source_list->addItem(tr("Custom"));
77 surface_source_list->setCurrentIndex(static_cast<int>(surface_source));
78
79 surface_address_control = new CSpinBox;
80 surface_address_control->SetBase(16);
81 surface_address_control->SetRange(0, 0x7FFFFFFFFFFFFFFF);
82 surface_address_control->SetPrefix("0x");
83
84 unsigned max_dimension = 16384; // TODO: Find actual maximum
85
86 surface_width_control = new QSpinBox;
87 surface_width_control->setRange(0, max_dimension);
88
89 surface_height_control = new QSpinBox;
90 surface_height_control->setRange(0, max_dimension);
91
92 surface_picker_x_control = new QSpinBox;
93 surface_picker_x_control->setRange(0, max_dimension - 1);
94
95 surface_picker_y_control = new QSpinBox;
96 surface_picker_y_control->setRange(0, max_dimension - 1);
97
98 surface_format_control = new QComboBox;
99
100 // Color formats sorted by Maxwell texture format index
101 surface_format_control->addItem(tr("None"));
102 surface_format_control->addItem(tr("Unknown"));
103 surface_format_control->addItem(tr("Unknown"));
104 surface_format_control->addItem(tr("Unknown"));
105 surface_format_control->addItem(tr("Unknown"));
106 surface_format_control->addItem(tr("Unknown"));
107 surface_format_control->addItem(tr("Unknown"));
108 surface_format_control->addItem(tr("Unknown"));
109 surface_format_control->addItem(tr("A8R8G8B8"));
110 surface_format_control->addItem(tr("Unknown"));
111 surface_format_control->addItem(tr("Unknown"));
112 surface_format_control->addItem(tr("Unknown"));
113 surface_format_control->addItem(tr("Unknown"));
114 surface_format_control->addItem(tr("Unknown"));
115 surface_format_control->addItem(tr("Unknown"));
116 surface_format_control->addItem(tr("Unknown"));
117 surface_format_control->addItem(tr("Unknown"));
118 surface_format_control->addItem(tr("Unknown"));
119 surface_format_control->addItem(tr("Unknown"));
120 surface_format_control->addItem(tr("Unknown"));
121 surface_format_control->addItem(tr("Unknown"));
122 surface_format_control->addItem(tr("Unknown"));
123 surface_format_control->addItem(tr("Unknown"));
124 surface_format_control->addItem(tr("Unknown"));
125 surface_format_control->addItem(tr("Unknown"));
126 surface_format_control->addItem(tr("Unknown"));
127 surface_format_control->addItem(tr("Unknown"));
128 surface_format_control->addItem(tr("Unknown"));
129 surface_format_control->addItem(tr("Unknown"));
130 surface_format_control->addItem(tr("Unknown"));
131 surface_format_control->addItem(tr("Unknown"));
132 surface_format_control->addItem(tr("Unknown"));
133 surface_format_control->addItem(tr("Unknown"));
134 surface_format_control->addItem(tr("Unknown"));
135 surface_format_control->addItem(tr("Unknown"));
136 surface_format_control->addItem(tr("Unknown"));
137 surface_format_control->addItem(tr("DXT1"));
138 surface_format_control->addItem(tr("DXT23"));
139 surface_format_control->addItem(tr("DXT45"));
140 surface_format_control->addItem(tr("DXN1"));
141 surface_format_control->addItem(tr("DXN2"));
142
143 surface_info_label = new QLabel();
144 surface_info_label->setWordWrap(true);
145
146 surface_picture_label = new SurfacePicture(0, this);
147 surface_picture_label->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed);
148 surface_picture_label->setAlignment(Qt::AlignLeft | Qt::AlignTop);
149 surface_picture_label->setScaledContents(false);
150
151 auto scroll_area = new QScrollArea();
152 scroll_area->setBackgroundRole(QPalette::Dark);
153 scroll_area->setWidgetResizable(false);
154 scroll_area->setWidget(surface_picture_label);
155
156 save_surface = new QPushButton(QIcon::fromTheme("document-save"), tr("Save"));
157
158 // Connections
159 connect(this, &GraphicsSurfaceWidget::Update, this, &GraphicsSurfaceWidget::OnUpdate);
160 connect(surface_source_list,
161 static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
162 &GraphicsSurfaceWidget::OnSurfaceSourceChanged);
163 connect(surface_address_control, &CSpinBox::ValueChanged, this,
164 &GraphicsSurfaceWidget::OnSurfaceAddressChanged);
165 connect(surface_width_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
166 this, &GraphicsSurfaceWidget::OnSurfaceWidthChanged);
167 connect(surface_height_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
168 this, &GraphicsSurfaceWidget::OnSurfaceHeightChanged);
169 connect(surface_format_control,
170 static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
171 &GraphicsSurfaceWidget::OnSurfaceFormatChanged);
172 connect(surface_picker_x_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
173 this, &GraphicsSurfaceWidget::OnSurfacePickerXChanged);
174 connect(surface_picker_y_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
175 this, &GraphicsSurfaceWidget::OnSurfacePickerYChanged);
176 connect(save_surface, &QPushButton::clicked, this, &GraphicsSurfaceWidget::SaveSurface);
177
178 auto main_widget = new QWidget;
179 auto main_layout = new QVBoxLayout;
180 {
181 auto sub_layout = new QHBoxLayout;
182 sub_layout->addWidget(new QLabel(tr("Source:")));
183 sub_layout->addWidget(surface_source_list);
184 main_layout->addLayout(sub_layout);
185 }
186 {
187 auto sub_layout = new QHBoxLayout;
188 sub_layout->addWidget(new QLabel(tr("GPU Address:")));
189 sub_layout->addWidget(surface_address_control);
190 main_layout->addLayout(sub_layout);
191 }
192 {
193 auto sub_layout = new QHBoxLayout;
194 sub_layout->addWidget(new QLabel(tr("Width:")));
195 sub_layout->addWidget(surface_width_control);
196 main_layout->addLayout(sub_layout);
197 }
198 {
199 auto sub_layout = new QHBoxLayout;
200 sub_layout->addWidget(new QLabel(tr("Height:")));
201 sub_layout->addWidget(surface_height_control);
202 main_layout->addLayout(sub_layout);
203 }
204 {
205 auto sub_layout = new QHBoxLayout;
206 sub_layout->addWidget(new QLabel(tr("Format:")));
207 sub_layout->addWidget(surface_format_control);
208 main_layout->addLayout(sub_layout);
209 }
210 main_layout->addWidget(scroll_area);
211
212 auto info_layout = new QHBoxLayout;
213 {
214 auto xy_layout = new QVBoxLayout;
215 {
216 {
217 auto sub_layout = new QHBoxLayout;
218 sub_layout->addWidget(new QLabel(tr("X:")));
219 sub_layout->addWidget(surface_picker_x_control);
220 xy_layout->addLayout(sub_layout);
221 }
222 {
223 auto sub_layout = new QHBoxLayout;
224 sub_layout->addWidget(new QLabel(tr("Y:")));
225 sub_layout->addWidget(surface_picker_y_control);
226 xy_layout->addLayout(sub_layout);
227 }
228 }
229 info_layout->addLayout(xy_layout);
230 surface_info_label->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Minimum);
231 info_layout->addWidget(surface_info_label);
232 }
233 main_layout->addLayout(info_layout);
234
235 main_layout->addWidget(save_surface);
236 main_widget->setLayout(main_layout);
237 setWidget(main_widget);
238
239 // Load current data - TODO: Make sure this works when emulation is not running
240 if (debug_context && debug_context->at_breakpoint) {
241 emit Update();
242 widget()->setEnabled(debug_context->at_breakpoint);
243 } else {
244 widget()->setEnabled(false);
245 }
246}
247
248void GraphicsSurfaceWidget::OnBreakPointHit(Tegra::DebugContext::Event event, void* data) {
249 emit Update();
250 widget()->setEnabled(true);
251}
252
253void GraphicsSurfaceWidget::OnResumed() {
254 widget()->setEnabled(false);
255}
256
257void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) {
258 surface_source = static_cast<Source>(new_value);
259 emit Update();
260}
261
262void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) {
263 if (surface_address != new_value) {
264 surface_address = static_cast<Tegra::GPUVAddr>(new_value);
265
266 surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
267 emit Update();
268 }
269}
270
271void GraphicsSurfaceWidget::OnSurfaceWidthChanged(int new_value) {
272 if (surface_width != static_cast<unsigned>(new_value)) {
273 surface_width = static_cast<unsigned>(new_value);
274
275 surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
276 emit Update();
277 }
278}
279
280void GraphicsSurfaceWidget::OnSurfaceHeightChanged(int new_value) {
281 if (surface_height != static_cast<unsigned>(new_value)) {
282 surface_height = static_cast<unsigned>(new_value);
283
284 surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
285 emit Update();
286 }
287}
288
289void GraphicsSurfaceWidget::OnSurfaceFormatChanged(int new_value) {
290 if (surface_format != static_cast<Tegra::Texture::TextureFormat>(new_value)) {
291 surface_format = static_cast<Tegra::Texture::TextureFormat>(new_value);
292
293 surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
294 emit Update();
295 }
296}
297
298void GraphicsSurfaceWidget::OnSurfacePickerXChanged(int new_value) {
299 if (surface_picker_x != new_value) {
300 surface_picker_x = new_value;
301 Pick(surface_picker_x, surface_picker_y);
302 }
303}
304
305void GraphicsSurfaceWidget::OnSurfacePickerYChanged(int new_value) {
306 if (surface_picker_y != new_value) {
307 surface_picker_y = new_value;
308 Pick(surface_picker_x, surface_picker_y);
309 }
310}
311
312void GraphicsSurfaceWidget::Pick(int x, int y) {
313 surface_picker_x_control->setValue(x);
314 surface_picker_y_control->setValue(y);
315
316 if (x < 0 || x >= static_cast<int>(surface_width) || y < 0 ||
317 y >= static_cast<int>(surface_height)) {
318 surface_info_label->setText(tr("Pixel out of bounds"));
319 surface_info_label->setAlignment(Qt::AlignLeft | Qt::AlignVCenter);
320 return;
321 }
322
323 surface_info_label->setText(QString("Raw: <Unimplemented>\n(%1)").arg("<Unimplemented>"));
324 surface_info_label->setAlignment(Qt::AlignLeft | Qt::AlignVCenter);
325}
326
327void GraphicsSurfaceWidget::OnUpdate() {
328 auto& gpu = Core::System::GetInstance().GPU();
329
330 QPixmap pixmap;
331
332 switch (surface_source) {
333 case Source::RenderTarget0:
334 case Source::RenderTarget1:
335 case Source::RenderTarget2:
336 case Source::RenderTarget3:
337 case Source::RenderTarget4:
338 case Source::RenderTarget5:
339 case Source::RenderTarget6:
340 case Source::RenderTarget7: {
341 // TODO: Store a reference to the registers in the debug context instead of accessing them
342 // directly...
343
344 const auto& registers = gpu.Maxwell3D().regs;
345 const auto& rt = registers.rt[static_cast<std::size_t>(surface_source) -
346 static_cast<std::size_t>(Source::RenderTarget0)];
347
348 surface_address = rt.Address();
349 surface_width = rt.width;
350 surface_height = rt.height;
351 if (rt.format != Tegra::RenderTargetFormat::NONE) {
352 surface_format = ConvertToTextureFormat(rt.format);
353 }
354
355 break;
356 }
357
358 case Source::Custom: {
359 // Keep user-specified values
360 break;
361 }
362
363 default:
364 qDebug() << "Unknown surface source " << static_cast<int>(surface_source);
365 break;
366 }
367
368 surface_address_control->SetValue(surface_address);
369 surface_width_control->setValue(surface_width);
370 surface_height_control->setValue(surface_height);
371 surface_format_control->setCurrentIndex(static_cast<int>(surface_format));
372
373 if (surface_address == 0) {
374 surface_picture_label->hide();
375 surface_info_label->setText(tr("(invalid surface address)"));
376 surface_info_label->setAlignment(Qt::AlignCenter);
377 surface_picker_x_control->setEnabled(false);
378 surface_picker_y_control->setEnabled(false);
379 save_surface->setEnabled(false);
380 return;
381 }
382
383 // TODO: Implement a good way to visualize alpha components!
384
385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
386 std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
387
388 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
389 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
390 auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
391 *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width,
392 surface_height, 1U);
393
394 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
395 surface_width, surface_height);
396
397 surface_picture_label->show();
398
399 for (unsigned int y = 0; y < surface_height; ++y) {
400 for (unsigned int x = 0; x < surface_width; ++x) {
401 Math::Vec4<u8> color;
402 color[0] = texture_data[x + y * surface_width + 0];
403 color[1] = texture_data[x + y * surface_width + 1];
404 color[2] = texture_data[x + y * surface_width + 2];
405 color[3] = texture_data[x + y * surface_width + 3];
406 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a()));
407 }
408 }
409
410 pixmap = QPixmap::fromImage(decoded_image);
411 surface_picture_label->setPixmap(pixmap);
412 surface_picture_label->resize(pixmap.size());
413
414 // Update the info with pixel data
415 surface_picker_x_control->setEnabled(true);
416 surface_picker_y_control->setEnabled(true);
417 Pick(surface_picker_x, surface_picker_y);
418
419 // Enable saving the converted pixmap to file
420 save_surface->setEnabled(true);
421}
422
423void GraphicsSurfaceWidget::SaveSurface() {
424 QString png_filter = tr("Portable Network Graphic (*.png)");
425 QString bin_filter = tr("Binary data (*.bin)");
426
427 QString selectedFilter;
428 QString filename = QFileDialog::getSaveFileName(
429 this, tr("Save Surface"),
430 QString("texture-0x%1.png").arg(QString::number(surface_address, 16)),
431 QString("%1;;%2").arg(png_filter, bin_filter), &selectedFilter);
432
433 if (filename.isEmpty()) {
434 // If the user canceled the dialog, don't save anything.
435 return;
436 }
437
438 if (selectedFilter == png_filter) {
439 const QPixmap* pixmap = surface_picture_label->pixmap();
440 ASSERT_MSG(pixmap != nullptr, "No pixmap set");
441
442 QFile file(filename);
443 file.open(QIODevice::WriteOnly);
444 if (pixmap)
445 pixmap->save(&file, "PNG");
446 } else if (selectedFilter == bin_filter) {
447 auto& gpu = Core::System::GetInstance().GPU();
448 std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
449
450 const u8* buffer = Memory::GetPointer(*address);
451 ASSERT_MSG(buffer != nullptr, "Memory not accessible");
452
453 QFile file(filename);
454 file.open(QIODevice::WriteOnly);
455 int size = surface_width * surface_height * Tegra::Texture::BytesPerPixel(surface_format);
456 QByteArray data(reinterpret_cast<const char*>(buffer), size);
457 file.write(data);
458 } else {
459 UNREACHABLE_MSG("Unhandled filter selected");
460 }
461}
diff --git a/src/yuzu/debugger/graphics/graphics_surface.h b/src/yuzu/debugger/graphics/graphics_surface.h
deleted file mode 100644
index 323e39d94..000000000
--- a/src/yuzu/debugger/graphics/graphics_surface.h
+++ /dev/null
@@ -1,96 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <QLabel>
8#include <QPushButton>
9#include "video_core/memory_manager.h"
10#include "video_core/textures/texture.h"
11#include "yuzu/debugger/graphics/graphics_breakpoint_observer.h"
12
13class QComboBox;
14class QSpinBox;
15class CSpinBox;
16
17class GraphicsSurfaceWidget;
18
19class SurfacePicture : public QLabel {
20 Q_OBJECT
21
22public:
23 explicit SurfacePicture(QWidget* parent = nullptr,
24 GraphicsSurfaceWidget* surface_widget = nullptr);
25 ~SurfacePicture() override;
26
27protected slots:
28 void mouseMoveEvent(QMouseEvent* event) override;
29 void mousePressEvent(QMouseEvent* event) override;
30
31private:
32 GraphicsSurfaceWidget* surface_widget;
33};
34
35class GraphicsSurfaceWidget : public BreakPointObserverDock {
36 Q_OBJECT
37
38 using Event = Tegra::DebugContext::Event;
39
40 enum class Source {
41 RenderTarget0 = 0,
42 RenderTarget1 = 1,
43 RenderTarget2 = 2,
44 RenderTarget3 = 3,
45 RenderTarget4 = 4,
46 RenderTarget5 = 5,
47 RenderTarget6 = 6,
48 RenderTarget7 = 7,
49 ZBuffer = 8,
50 Custom = 9,
51 };
52
53public:
54 explicit GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext> debug_context,
55 QWidget* parent = nullptr);
56 void Pick(int x, int y);
57
58public slots:
59 void OnSurfaceSourceChanged(int new_value);
60 void OnSurfaceAddressChanged(qint64 new_value);
61 void OnSurfaceWidthChanged(int new_value);
62 void OnSurfaceHeightChanged(int new_value);
63 void OnSurfaceFormatChanged(int new_value);
64 void OnSurfacePickerXChanged(int new_value);
65 void OnSurfacePickerYChanged(int new_value);
66 void OnUpdate();
67
68signals:
69 void Update();
70
71private:
72 void OnBreakPointHit(Tegra::DebugContext::Event event, void* data) override;
73 void OnResumed() override;
74
75 void SaveSurface();
76
77 QComboBox* surface_source_list;
78 CSpinBox* surface_address_control;
79 QSpinBox* surface_width_control;
80 QSpinBox* surface_height_control;
81 QComboBox* surface_format_control;
82
83 SurfacePicture* surface_picture_label;
84 QSpinBox* surface_picker_x_control;
85 QSpinBox* surface_picker_y_control;
86 QLabel* surface_info_label;
87 QPushButton* save_surface;
88
89 Source surface_source;
90 Tegra::GPUVAddr surface_address;
91 unsigned surface_width;
92 unsigned surface_height;
93 Tegra::Texture::TextureFormat surface_format;
94 int surface_picker_x = 0;
95 int surface_picker_y = 0;
96};
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 8b30e0a85..86e03e46d 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -7,6 +7,7 @@
7#include <QMouseEvent> 7#include <QMouseEvent>
8#include <QPainter> 8#include <QPainter>
9#include <QString> 9#include <QString>
10#include <QTimer>
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "common/microprofile.h" 12#include "common/microprofile.h"
12#include "yuzu/debugger/profiler.h" 13#include "yuzu/debugger/profiler.h"
diff --git a/src/yuzu/debugger/profiler.h b/src/yuzu/debugger/profiler.h
index eae1e9e3c..8e69fdb06 100644
--- a/src/yuzu/debugger/profiler.h
+++ b/src/yuzu/debugger/profiler.h
@@ -4,10 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <QAbstractItemModel> 7#include <QWidget>
8#include <QDockWidget> 8
9#include <QTimer> 9class QAction;
10#include "common/microprofile.h" 10class QHideEvent;
11class QShowEvent;
11 12
12class MicroProfileDialog : public QWidget { 13class MicroProfileDialog : public QWidget {
13 Q_OBJECT 14 Q_OBJECT
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 0c0864742..593bb681f 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -13,7 +13,6 @@
13#include "core/hle/kernel/readable_event.h" 13#include "core/hle/kernel/readable_event.h"
14#include "core/hle/kernel/scheduler.h" 14#include "core/hle/kernel/scheduler.h"
15#include "core/hle/kernel/thread.h" 15#include "core/hle/kernel/thread.h"
16#include "core/hle/kernel/timer.h"
17#include "core/hle/kernel/wait_object.h" 16#include "core/hle/kernel/wait_object.h"
18#include "core/memory.h" 17#include "core/memory.h"
19 18
@@ -82,9 +81,8 @@ QString WaitTreeText::GetText() const {
82 return text; 81 return text;
83} 82}
84 83
85WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { 84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
86 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 85 : mutex_address(mutex_address) {
87
88 mutex_value = Memory::Read32(mutex_address); 86 mutex_value = Memory::Read32(mutex_address);
89 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); 87 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
90 owner = handle_table.Get<Kernel::Thread>(owner_handle); 88 owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -155,8 +153,6 @@ std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitO
155 switch (object.GetHandleType()) { 153 switch (object.GetHandleType()) {
156 case Kernel::HandleType::ReadableEvent: 154 case Kernel::HandleType::ReadableEvent:
157 return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); 155 return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object));
158 case Kernel::HandleType::Timer:
159 return std::make_unique<WaitTreeTimer>(static_cast<const Kernel::Timer&>(object));
160 case Kernel::HandleType::Thread: 156 case Kernel::HandleType::Thread:
161 return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object)); 157 return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object));
162 default: 158 default:
@@ -238,6 +234,9 @@ QString WaitTreeThread::GetText() const {
238 case Kernel::ThreadStatus::WaitMutex: 234 case Kernel::ThreadStatus::WaitMutex:
239 status = tr("waiting for mutex"); 235 status = tr("waiting for mutex");
240 break; 236 break;
237 case Kernel::ThreadStatus::WaitCondVar:
238 status = tr("waiting for condition variable");
239 break;
241 case Kernel::ThreadStatus::WaitArb: 240 case Kernel::ThreadStatus::WaitArb:
242 status = tr("waiting for address arbiter"); 241 status = tr("waiting for address arbiter");
243 break; 242 break;
@@ -273,6 +272,7 @@ QColor WaitTreeThread::GetColor() const {
273 case Kernel::ThreadStatus::WaitSynchAll: 272 case Kernel::ThreadStatus::WaitSynchAll:
274 case Kernel::ThreadStatus::WaitSynchAny: 273 case Kernel::ThreadStatus::WaitSynchAny:
275 case Kernel::ThreadStatus::WaitMutex: 274 case Kernel::ThreadStatus::WaitMutex:
275 case Kernel::ThreadStatus::WaitCondVar:
276 case Kernel::ThreadStatus::WaitArb: 276 case Kernel::ThreadStatus::WaitArb:
277 return QColor(Qt::GlobalColor::red); 277 return QColor(Qt::GlobalColor::red);
278 case Kernel::ThreadStatus::Dormant: 278 case Kernel::ThreadStatus::Dormant:
@@ -319,7 +319,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
319 319
320 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 320 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
321 if (mutex_wait_address != 0) { 321 if (mutex_wait_address != 0) {
322 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); 322 const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
323 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
323 } else { 324 } else {
324 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); 325 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
325 } 326 }
@@ -348,23 +349,6 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeEvent::GetChildren() const {
348 return list; 349 return list;
349} 350}
350 351
351WaitTreeTimer::WaitTreeTimer(const Kernel::Timer& object) : WaitTreeWaitObject(object) {}
352WaitTreeTimer::~WaitTreeTimer() = default;
353
354std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeTimer::GetChildren() const {
355 std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren());
356
357 const auto& timer = static_cast<const Kernel::Timer&>(object);
358
359 list.push_back(std::make_unique<WaitTreeText>(
360 tr("reset type = %1").arg(GetResetTypeQString(timer.GetResetType()))));
361 list.push_back(
362 std::make_unique<WaitTreeText>(tr("initial delay = %1").arg(timer.GetInitialDelay())));
363 list.push_back(
364 std::make_unique<WaitTreeText>(tr("interval delay = %1").arg(timer.GetIntervalDelay())));
365 return list;
366}
367
368WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list) 352WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list)
369 : thread_list(list) {} 353 : thread_list(list) {}
370WaitTreeThreadList::~WaitTreeThreadList() = default; 354WaitTreeThreadList::~WaitTreeThreadList() = default;
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index e639ef412..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,10 +17,10 @@
17class EmuThread; 17class EmuThread;
18 18
19namespace Kernel { 19namespace Kernel {
20class HandleTable;
20class ReadableEvent; 21class ReadableEvent;
21class WaitObject; 22class WaitObject;
22class Thread; 23class Thread;
23class Timer;
24} // namespace Kernel 24} // namespace Kernel
25 25
26class WaitTreeThread; 26class WaitTreeThread;
@@ -73,7 +73,7 @@ public:
73class WaitTreeMutexInfo : public WaitTreeExpandableItem { 73class WaitTreeMutexInfo : public WaitTreeExpandableItem {
74 Q_OBJECT 74 Q_OBJECT
75public: 75public:
76 explicit WaitTreeMutexInfo(VAddr mutex_address); 76 explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
77 ~WaitTreeMutexInfo() override; 77 ~WaitTreeMutexInfo() override;
78 78
79 QString GetText() const override; 79 QString GetText() const override;
@@ -150,15 +150,6 @@ public:
150 std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; 150 std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
151}; 151};
152 152
153class WaitTreeTimer : public WaitTreeWaitObject {
154 Q_OBJECT
155public:
156 explicit WaitTreeTimer(const Kernel::Timer& object);
157 ~WaitTreeTimer() override;
158
159 std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
160};
161
162class WaitTreeThreadList : public WaitTreeExpandableItem { 153class WaitTreeThreadList : public WaitTreeExpandableItem {
163 Q_OBJECT 154 Q_OBJECT
164public: 155public:
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index c0e3c5fa9..b0ca766ec 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -18,6 +18,7 @@
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/logging/log.h" 19#include "common/logging/log.h"
20#include "core/file_sys/patch_manager.h" 20#include "core/file_sys/patch_manager.h"
21#include "core/file_sys/registered_cache.h"
21#include "yuzu/compatibility_list.h" 22#include "yuzu/compatibility_list.h"
22#include "yuzu/game_list.h" 23#include "yuzu/game_list.h"
23#include "yuzu/game_list_p.h" 24#include "yuzu/game_list_p.h"
@@ -193,8 +194,9 @@ void GameList::onFilterCloseClicked() {
193 main_window->filterBarSetChecked(false); 194 main_window->filterBarSetChecked(false);
194} 195}
195 196
196GameList::GameList(FileSys::VirtualFilesystem vfs, GMainWindow* parent) 197GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvider* provider,
197 : QWidget{parent}, vfs(std::move(vfs)) { 198 GMainWindow* parent)
199 : QWidget{parent}, vfs(std::move(vfs)), provider(provider) {
198 watcher = new QFileSystemWatcher(this); 200 watcher = new QFileSystemWatcher(this);
199 connect(watcher, &QFileSystemWatcher::directoryChanged, this, &GameList::RefreshGameDirectory); 201 connect(watcher, &QFileSystemWatcher::directoryChanged, this, &GameList::RefreshGameDirectory);
200 202
@@ -329,6 +331,8 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
329 QMenu context_menu; 331 QMenu context_menu;
330 QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); 332 QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
331 QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location")); 333 QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location"));
334 QAction* open_transferable_shader_cache =
335 context_menu.addAction(tr("Open Transferable Shader Cache"));
332 context_menu.addSeparator(); 336 context_menu.addSeparator();
333 QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS")); 337 QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
334 QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard")); 338 QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
@@ -344,6 +348,8 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
344 [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); }); 348 [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); });
345 connect(open_lfs_location, &QAction::triggered, 349 connect(open_lfs_location, &QAction::triggered,
346 [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); }); 350 [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); });
351 connect(open_transferable_shader_cache, &QAction::triggered,
352 [&]() { emit OpenTransferableShaderCacheRequested(program_id); });
347 connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); }); 353 connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); });
348 connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); }); 354 connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); });
349 connect(navigate_to_gamedb_entry, &QAction::triggered, 355 connect(navigate_to_gamedb_entry, &QAction::triggered,
@@ -428,7 +434,8 @@ void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) {
428 434
429 emit ShouldCancelWorker(); 435 emit ShouldCancelWorker();
430 436
431 GameListWorker* worker = new GameListWorker(vfs, dir_path, deep_scan, compatibility_list); 437 GameListWorker* worker =
438 new GameListWorker(vfs, provider, dir_path, deep_scan, compatibility_list);
432 439
433 connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection); 440 connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection);
434 connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating, 441 connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating,
@@ -460,9 +467,10 @@ void GameList::LoadInterfaceLayout() {
460const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca", "xci", "nsp"}; 467const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca", "xci", "nsp"};
461 468
462void GameList::RefreshGameDirectory() { 469void GameList::RefreshGameDirectory() {
463 if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) { 470 if (!UISettings::values.game_directory_path.isEmpty() && current_worker != nullptr) {
464 LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list."); 471 LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
465 search_field->clear(); 472 search_field->clear();
466 PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan); 473 PopulateAsync(UISettings::values.game_directory_path,
474 UISettings::values.game_directory_deepscan);
467 } 475 }
468} 476}
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b317eb2fc..56007eef8 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -26,8 +26,9 @@ class GameListSearchField;
26class GMainWindow; 26class GMainWindow;
27 27
28namespace FileSys { 28namespace FileSys {
29class ManualContentProvider;
29class VfsFilesystem; 30class VfsFilesystem;
30} 31} // namespace FileSys
31 32
32enum class GameListOpenTarget { 33enum class GameListOpenTarget {
33 SaveData, 34 SaveData,
@@ -47,7 +48,8 @@ public:
47 COLUMN_COUNT, // Number of columns 48 COLUMN_COUNT, // Number of columns
48 }; 49 };
49 50
50 explicit GameList(std::shared_ptr<FileSys::VfsFilesystem> vfs, GMainWindow* parent = nullptr); 51 explicit GameList(std::shared_ptr<FileSys::VfsFilesystem> vfs,
52 FileSys::ManualContentProvider* provider, GMainWindow* parent = nullptr);
51 ~GameList() override; 53 ~GameList() override;
52 54
53 void clearFilter(); 55 void clearFilter();
@@ -66,6 +68,7 @@ signals:
66 void GameChosen(QString game_path); 68 void GameChosen(QString game_path);
67 void ShouldCancelWorker(); 69 void ShouldCancelWorker();
68 void OpenFolderRequested(u64 program_id, GameListOpenTarget target); 70 void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
71 void OpenTransferableShaderCacheRequested(u64 program_id);
69 void DumpRomFSRequested(u64 program_id, const std::string& game_path); 72 void DumpRomFSRequested(u64 program_id, const std::string& game_path);
70 void CopyTIDRequested(u64 program_id); 73 void CopyTIDRequested(u64 program_id);
71 void NavigateToGamedbEntryRequested(u64 program_id, 74 void NavigateToGamedbEntryRequested(u64 program_id,
@@ -85,6 +88,7 @@ private:
85 void RefreshGameDirectory(); 88 void RefreshGameDirectory();
86 89
87 std::shared_ptr<FileSys::VfsFilesystem> vfs; 90 std::shared_ptr<FileSys::VfsFilesystem> vfs;
91 FileSys::ManualContentProvider* provider;
88 GameListSearchField* search_field; 92 GameListSearchField* search_field;
89 GMainWindow* main_window = nullptr; 93 GMainWindow* main_window = nullptr;
90 QVBoxLayout* layout = nullptr; 94 QVBoxLayout* layout = nullptr;
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp
index b37710f59..8687e7c5a 100644
--- a/src/yuzu/game_list_worker.cpp
+++ b/src/yuzu/game_list_worker.cpp
@@ -12,12 +12,15 @@
12 12
13#include "common/common_paths.h" 13#include "common/common_paths.h"
14#include "common/file_util.h" 14#include "common/file_util.h"
15#include "core/core.h"
16#include "core/file_sys/card_image.h"
15#include "core/file_sys/content_archive.h" 17#include "core/file_sys/content_archive.h"
16#include "core/file_sys/control_metadata.h" 18#include "core/file_sys/control_metadata.h"
17#include "core/file_sys/mode.h" 19#include "core/file_sys/mode.h"
18#include "core/file_sys/nca_metadata.h" 20#include "core/file_sys/nca_metadata.h"
19#include "core/file_sys/patch_manager.h" 21#include "core/file_sys/patch_manager.h"
20#include "core/file_sys/registered_cache.h" 22#include "core/file_sys/registered_cache.h"
23#include "core/file_sys/submission_package.h"
21#include "core/hle/service/filesystem/filesystem.h" 24#include "core/hle/service/filesystem/filesystem.h"
22#include "core/loader/loader.h" 25#include "core/loader/loader.h"
23#include "yuzu/compatibility_list.h" 26#include "yuzu/compatibility_list.h"
@@ -119,20 +122,25 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri
119} 122}
120} // Anonymous namespace 123} // Anonymous namespace
121 124
122GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs, QString dir_path, bool deep_scan, 125GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs,
123 const CompatibilityList& compatibility_list) 126 FileSys::ManualContentProvider* provider, QString dir_path,
124 : vfs(std::move(vfs)), dir_path(std::move(dir_path)), deep_scan(deep_scan), 127 bool deep_scan, const CompatibilityList& compatibility_list)
128 : vfs(std::move(vfs)), provider(provider), dir_path(std::move(dir_path)), deep_scan(deep_scan),
125 compatibility_list(compatibility_list) {} 129 compatibility_list(compatibility_list) {}
126 130
127GameListWorker::~GameListWorker() = default; 131GameListWorker::~GameListWorker() = default;
128 132
129void GameListWorker::AddInstalledTitlesToGameList() { 133void GameListWorker::AddTitlesToGameList() {
130 const auto cache = Service::FileSystem::GetUnionContents(); 134 const auto& cache = dynamic_cast<FileSys::ContentProviderUnion&>(
131 const auto installed_games = cache.ListEntriesFilter(FileSys::TitleType::Application, 135 Core::System::GetInstance().GetContentProvider());
132 FileSys::ContentRecordType::Program); 136 const auto installed_games = cache.ListEntriesFilterOrigin(
137 std::nullopt, FileSys::TitleType::Application, FileSys::ContentRecordType::Program);
133 138
134 for (const auto& game : installed_games) { 139 for (const auto& [slot, game] : installed_games) {
135 const auto file = cache.GetEntryUnparsed(game); 140 if (slot == FileSys::ContentProviderUnionSlot::FrontendManual)
141 continue;
142
143 const auto file = cache.GetEntryUnparsed(game.title_id, game.type);
136 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(file); 144 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(file);
137 if (!loader) 145 if (!loader)
138 continue; 146 continue;
@@ -150,45 +158,13 @@ void GameListWorker::AddInstalledTitlesToGameList() {
150 emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id, 158 emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id,
151 compatibility_list, patch)); 159 compatibility_list, patch));
152 } 160 }
153
154 const auto control_data = cache.ListEntriesFilter(FileSys::TitleType::Application,
155 FileSys::ContentRecordType::Control);
156
157 for (const auto& entry : control_data) {
158 auto nca = cache.GetEntry(entry);
159 if (nca != nullptr) {
160 nca_control_map.insert_or_assign(entry.title_id, std::move(nca));
161 }
162 }
163} 161}
164 162
165void GameListWorker::FillControlMap(const std::string& dir_path) { 163void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path,
166 const auto nca_control_callback = [this](u64* num_entries_out, const std::string& directory, 164 unsigned int recursion) {
167 const std::string& virtual_name) -> bool { 165 const auto callback = [this, target, recursion](u64* num_entries_out,
168 if (stop_processing) { 166 const std::string& directory,
169 // Breaks the callback loop 167 const std::string& virtual_name) -> bool {
170 return false;
171 }
172
173 const std::string physical_name = directory + DIR_SEP + virtual_name;
174 const QFileInfo file_info(QString::fromStdString(physical_name));
175 if (!file_info.isDir() && file_info.suffix() == QStringLiteral("nca")) {
176 auto nca =
177 std::make_unique<FileSys::NCA>(vfs->OpenFile(physical_name, FileSys::Mode::Read));
178 if (nca->GetType() == FileSys::NCAContentType::Control) {
179 const u64 title_id = nca->GetTitleId();
180 nca_control_map.insert_or_assign(title_id, std::move(nca));
181 }
182 }
183 return true;
184 };
185
186 FileUtil::ForeachDirectoryEntry(nullptr, dir_path, nca_control_callback);
187}
188
189void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion) {
190 const auto callback = [this, recursion](u64* num_entries_out, const std::string& directory,
191 const std::string& virtual_name) -> bool {
192 if (stop_processing) { 168 if (stop_processing) {
193 // Breaks the callback loop. 169 // Breaks the callback loop.
194 return false; 170 return false;
@@ -198,7 +174,8 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
198 const bool is_dir = FileUtil::IsDirectory(physical_name); 174 const bool is_dir = FileUtil::IsDirectory(physical_name);
199 if (!is_dir && 175 if (!is_dir &&
200 (HasSupportedFileExtension(physical_name) || IsExtractedNCAMain(physical_name))) { 176 (HasSupportedFileExtension(physical_name) || IsExtractedNCAMain(physical_name))) {
201 auto loader = Loader::GetLoader(vfs->OpenFile(physical_name, FileSys::Mode::Read)); 177 const auto file = vfs->OpenFile(physical_name, FileSys::Mode::Read);
178 auto loader = Loader::GetLoader(file);
202 if (!loader) { 179 if (!loader) {
203 return true; 180 return true;
204 } 181 }
@@ -209,31 +186,42 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
209 return true; 186 return true;
210 } 187 }
211 188
212 std::vector<u8> icon;
213 const auto res1 = loader->ReadIcon(icon);
214
215 u64 program_id = 0; 189 u64 program_id = 0;
216 const auto res2 = loader->ReadProgramId(program_id); 190 const auto res2 = loader->ReadProgramId(program_id);
217 191
218 std::string name = " "; 192 if (target == ScanTarget::FillManualContentProvider) {
219 const auto res3 = loader->ReadTitle(name); 193 if (res2 == Loader::ResultStatus::Success && file_type == Loader::FileType::NCA) {
194 provider->AddEntry(FileSys::TitleType::Application,
195 FileSys::GetCRTypeFromNCAType(FileSys::NCA{file}.GetType()),
196 program_id, file);
197 } else if (res2 == Loader::ResultStatus::Success &&
198 (file_type == Loader::FileType::XCI ||
199 file_type == Loader::FileType::NSP)) {
200 const auto nsp = file_type == Loader::FileType::NSP
201 ? std::make_shared<FileSys::NSP>(file)
202 : FileSys::XCI{file}.GetSecurePartitionNSP();
203 for (const auto& title : nsp->GetNCAs()) {
204 for (const auto& entry : title.second) {
205 provider->AddEntry(entry.first.first, entry.first.second, title.first,
206 entry.second->GetBaseFile());
207 }
208 }
209 }
210 } else {
211 std::vector<u8> icon;
212 const auto res1 = loader->ReadIcon(icon);
220 213
221 const FileSys::PatchManager patch{program_id}; 214 std::string name = " ";
215 const auto res3 = loader->ReadTitle(name);
222 216
223 if (res1 != Loader::ResultStatus::Success && res3 != Loader::ResultStatus::Success && 217 const FileSys::PatchManager patch{program_id};
224 res2 == Loader::ResultStatus::Success) {
225 // Use from metadata pool.
226 if (nca_control_map.find(program_id) != nca_control_map.end()) {
227 const auto& nca = nca_control_map[program_id];
228 GetMetadataFromControlNCA(patch, *nca, icon, name);
229 }
230 }
231 218
232 emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id, 219 emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id,
233 compatibility_list, patch)); 220 compatibility_list, patch));
221 }
234 } else if (is_dir && recursion > 0) { 222 } else if (is_dir && recursion > 0) {
235 watch_list.append(QString::fromStdString(physical_name)); 223 watch_list.append(QString::fromStdString(physical_name));
236 AddFstEntriesToGameList(physical_name, recursion - 1); 224 ScanFileSystem(target, physical_name, recursion - 1);
237 } 225 }
238 226
239 return true; 227 return true;
@@ -245,10 +233,11 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
245void GameListWorker::run() { 233void GameListWorker::run() {
246 stop_processing = false; 234 stop_processing = false;
247 watch_list.append(dir_path); 235 watch_list.append(dir_path);
248 FillControlMap(dir_path.toStdString()); 236 provider->ClearAllEntries();
249 AddInstalledTitlesToGameList(); 237 ScanFileSystem(ScanTarget::FillManualContentProvider, dir_path.toStdString(),
250 AddFstEntriesToGameList(dir_path.toStdString(), deep_scan ? 256 : 0); 238 deep_scan ? 256 : 0);
251 nca_control_map.clear(); 239 AddTitlesToGameList();
240 ScanFileSystem(ScanTarget::PopulateGameList, dir_path.toStdString(), deep_scan ? 256 : 0);
252 emit Finished(watch_list); 241 emit Finished(watch_list);
253} 242}
254 243
diff --git a/src/yuzu/game_list_worker.h b/src/yuzu/game_list_worker.h
index 0e42d0bde..7c3074af9 100644
--- a/src/yuzu/game_list_worker.h
+++ b/src/yuzu/game_list_worker.h
@@ -33,7 +33,8 @@ class GameListWorker : public QObject, public QRunnable {
33 Q_OBJECT 33 Q_OBJECT
34 34
35public: 35public:
36 GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs, QString dir_path, bool deep_scan, 36 GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs,
37 FileSys::ManualContentProvider* provider, QString dir_path, bool deep_scan,
37 const CompatibilityList& compatibility_list); 38 const CompatibilityList& compatibility_list);
38 ~GameListWorker() override; 39 ~GameListWorker() override;
39 40
@@ -58,12 +59,17 @@ signals:
58 void Finished(QStringList watch_list); 59 void Finished(QStringList watch_list);
59 60
60private: 61private:
61 void AddInstalledTitlesToGameList(); 62 void AddTitlesToGameList();
62 void FillControlMap(const std::string& dir_path); 63
63 void AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion = 0); 64 enum class ScanTarget {
65 FillManualContentProvider,
66 PopulateGameList,
67 };
68
69 void ScanFileSystem(ScanTarget target, const std::string& dir_path, unsigned int recursion = 0);
64 70
65 std::shared_ptr<FileSys::VfsFilesystem> vfs; 71 std::shared_ptr<FileSys::VfsFilesystem> vfs;
66 std::map<u64, std::unique_ptr<FileSys::NCA>> nca_control_map; 72 FileSys::ManualContentProvider* provider;
67 QStringList watch_list; 73 QStringList watch_list;
68 QString dir_path; 74 QString dir_path;
69 bool deep_scan; 75 bool deep_scan;
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp
index dce399774..4582e7f21 100644
--- a/src/yuzu/hotkeys.cpp
+++ b/src/yuzu/hotkeys.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <map>
6#include <QKeySequence> 5#include <QKeySequence>
7#include <QShortcut> 6#include <QShortcut>
8#include <QTreeWidgetItem> 7#include <QTreeWidgetItem>
@@ -13,47 +12,32 @@
13HotkeyRegistry::HotkeyRegistry() = default; 12HotkeyRegistry::HotkeyRegistry() = default;
14HotkeyRegistry::~HotkeyRegistry() = default; 13HotkeyRegistry::~HotkeyRegistry() = default;
15 14
16void HotkeyRegistry::LoadHotkeys() {
17 // Make sure NOT to use a reference here because it would become invalid once we call
18 // beginGroup()
19 for (auto shortcut : UISettings::values.shortcuts) {
20 const QStringList cat = shortcut.first.split('/');
21 Q_ASSERT(cat.size() >= 2);
22
23 // RegisterHotkey assigns default keybindings, so use old values as default parameters
24 Hotkey& hk = hotkey_groups[cat[0]][cat[1]];
25 if (!shortcut.second.first.isEmpty()) {
26 hk.keyseq = QKeySequence::fromString(shortcut.second.first);
27 hk.context = static_cast<Qt::ShortcutContext>(shortcut.second.second);
28 }
29 if (hk.shortcut)
30 hk.shortcut->setKey(hk.keyseq);
31 }
32}
33
34void HotkeyRegistry::SaveHotkeys() { 15void HotkeyRegistry::SaveHotkeys() {
35 UISettings::values.shortcuts.clear(); 16 UISettings::values.shortcuts.clear();
36 for (const auto& group : hotkey_groups) { 17 for (const auto& group : hotkey_groups) {
37 for (const auto& hotkey : group.second) { 18 for (const auto& hotkey : group.second) {
38 UISettings::values.shortcuts.emplace_back( 19 UISettings::values.shortcuts.push_back(
39 UISettings::Shortcut(group.first + '/' + hotkey.first, 20 {hotkey.first, group.first,
40 UISettings::ContextualShortcut(hotkey.second.keyseq.toString(), 21 UISettings::ContextualShortcut(hotkey.second.keyseq.toString(),
41 hotkey.second.context))); 22 hotkey.second.context)});
42 } 23 }
43 } 24 }
44} 25}
45 26
46void HotkeyRegistry::RegisterHotkey(const QString& group, const QString& action, 27void HotkeyRegistry::LoadHotkeys() {
47 const QKeySequence& default_keyseq, 28 // Make sure NOT to use a reference here because it would become invalid once we call
48 Qt::ShortcutContext default_context) { 29 // beginGroup()
49 auto& hotkey_group = hotkey_groups[group]; 30 for (auto shortcut : UISettings::values.shortcuts) {
50 if (hotkey_group.find(action) != hotkey_group.end()) { 31 Hotkey& hk = hotkey_groups[shortcut.group][shortcut.name];
51 return; 32 if (!shortcut.shortcut.first.isEmpty()) {
33 hk.keyseq = QKeySequence::fromString(shortcut.shortcut.first, QKeySequence::NativeText);
34 hk.context = static_cast<Qt::ShortcutContext>(shortcut.shortcut.second);
35 }
36 if (hk.shortcut) {
37 hk.shortcut->disconnect();
38 hk.shortcut->setKey(hk.keyseq);
39 }
52 } 40 }
53
54 auto& hotkey_action = hotkey_groups[group][action];
55 hotkey_action.keyseq = default_keyseq;
56 hotkey_action.context = default_context;
57} 41}
58 42
59QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action, QWidget* widget) { 43QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action, QWidget* widget) {
@@ -65,24 +49,11 @@ QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action
65 return hk.shortcut; 49 return hk.shortcut;
66} 50}
67 51
68GHotkeysDialog::GHotkeysDialog(QWidget* parent) : QWidget(parent) { 52QKeySequence HotkeyRegistry::GetKeySequence(const QString& group, const QString& action) {
69 ui.setupUi(this); 53 return hotkey_groups[group][action].keyseq;
70} 54}
71 55
72void GHotkeysDialog::Populate(const HotkeyRegistry& registry) { 56Qt::ShortcutContext HotkeyRegistry::GetShortcutContext(const QString& group,
73 for (const auto& group : registry.hotkey_groups) { 57 const QString& action) {
74 QTreeWidgetItem* toplevel_item = new QTreeWidgetItem(QStringList(group.first)); 58 return hotkey_groups[group][action].context;
75 for (const auto& hotkey : group.second) {
76 QStringList columns;
77 columns << hotkey.first << hotkey.second.keyseq.toString();
78 QTreeWidgetItem* item = new QTreeWidgetItem(columns);
79 toplevel_item->addChild(item);
80 }
81 ui.treeWidget->addTopLevelItem(toplevel_item);
82 }
83 // TODO: Make context configurable as well (hiding the column for now)
84 ui.treeWidget->setColumnCount(2);
85
86 ui.treeWidget->resizeColumnToContents(0);
87 ui.treeWidget->resizeColumnToContents(1);
88} 59}
diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h
index f38e6c002..4f526dc7e 100644
--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <map> 7#include <map>
8#include "ui_hotkeys.h"
9 8
10class QDialog; 9class QDialog;
11class QKeySequence; 10class QKeySequence;
@@ -14,7 +13,7 @@ class QShortcut;
14 13
15class HotkeyRegistry final { 14class HotkeyRegistry final {
16public: 15public:
17 friend class GHotkeysDialog; 16 friend class ConfigureHotkeys;
18 17
19 explicit HotkeyRegistry(); 18 explicit HotkeyRegistry();
20 ~HotkeyRegistry(); 19 ~HotkeyRegistry();
@@ -49,22 +48,27 @@ public:
49 QShortcut* GetHotkey(const QString& group, const QString& action, QWidget* widget); 48 QShortcut* GetHotkey(const QString& group, const QString& action, QWidget* widget);
50 49
51 /** 50 /**
52 * Register a hotkey. 51 * Returns a QKeySequence object whose signal can be connected to QAction::setShortcut.
53 * 52 *
54 * @param group General group this hotkey belongs to (e.g. "Main Window", "Debugger") 53 * @param group General group this hotkey belongs to (e.g. "Main Window", "Debugger").
55 * @param action Name of the action (e.g. "Start Emulation", "Load Image") 54 * @param action Name of the action (e.g. "Start Emulation", "Load Image").
56 * @param default_keyseq Default key sequence to assign if the hotkey wasn't present in the 55 */
57 * settings file before 56 QKeySequence GetKeySequence(const QString& group, const QString& action);
58 * @param default_context Default context to assign if the hotkey wasn't present in the settings 57
59 * file before 58 /**
60 * @warning Both the group and action strings will be displayed in the hotkey settings dialog 59 * Returns a Qt::ShortcutContext object who can be connected to other
60 * QAction::setShortcutContext.
61 *
62 * @param group General group this shortcut context belongs to (e.g. "Main Window",
63 * "Debugger").
64 * @param action Name of the action (e.g. "Start Emulation", "Load Image").
61 */ 65 */
62 void RegisterHotkey(const QString& group, const QString& action, 66 Qt::ShortcutContext GetShortcutContext(const QString& group, const QString& action);
63 const QKeySequence& default_keyseq = {},
64 Qt::ShortcutContext default_context = Qt::WindowShortcut);
65 67
66private: 68private:
67 struct Hotkey { 69 struct Hotkey {
70 Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
71
68 QKeySequence keyseq; 72 QKeySequence keyseq;
69 QShortcut* shortcut = nullptr; 73 QShortcut* shortcut = nullptr;
70 Qt::ShortcutContext context = Qt::WindowShortcut; 74 Qt::ShortcutContext context = Qt::WindowShortcut;
@@ -75,15 +79,3 @@ private:
75 79
76 HotkeyGroupMap hotkey_groups; 80 HotkeyGroupMap hotkey_groups;
77}; 81};
78
79class GHotkeysDialog : public QWidget {
80 Q_OBJECT
81
82public:
83 explicit GHotkeysDialog(QWidget* parent = nullptr);
84
85 void Populate(const HotkeyRegistry& registry);
86
87private:
88 Ui::hotkeys ui;
89};
diff --git a/src/yuzu/hotkeys.ui b/src/yuzu/hotkeys.ui
deleted file mode 100644
index 050fe064e..000000000
--- a/src/yuzu/hotkeys.ui
+++ /dev/null
@@ -1,46 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>hotkeys</class>
4 <widget class="QWidget" name="hotkeys">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>363</width>
10 <height>388</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Hotkey Settings</string>
15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout">
17 <item>
18 <widget class="QTreeWidget" name="treeWidget">
19 <property name="selectionBehavior">
20 <enum>QAbstractItemView::SelectItems</enum>
21 </property>
22 <property name="headerHidden">
23 <bool>false</bool>
24 </property>
25 <column>
26 <property name="text">
27 <string>Action</string>
28 </property>
29 </column>
30 <column>
31 <property name="text">
32 <string>Hotkey</string>
33 </property>
34 </column>
35 <column>
36 <property name="text">
37 <string>Context</string>
38 </property>
39 </column>
40 </widget>
41 </item>
42 </layout>
43 </widget>
44 <resources/>
45 <connections/>
46</ui>
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 907aac4f1..4e2d988cd 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -43,6 +43,7 @@ QProgressBar {
43} 43}
44QProgressBar::chunk { 44QProgressBar::chunk {
45 background-color: #0ab9e6; 45 background-color: #0ab9e6;
46 width: 1px;
46})"; 47})";
47 48
48constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"( 49constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"(
@@ -53,7 +54,8 @@ QProgressBar {
53 padding: 2px; 54 padding: 2px;
54} 55}
55QProgressBar::chunk { 56QProgressBar::chunk {
56 background-color: #ff3c28; 57 background-color: #ff3c28;
58 width: 1px;
57})"; 59})";
58 60
59constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"( 61constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"(
@@ -190,7 +192,12 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
190 } 192 }
191 193
192 // update labels and progress bar 194 // update labels and progress bar
193 ui->stage->setText(stage_translations[stage].arg(value).arg(total)); 195 if (stage == VideoCore::LoadCallbackStage::Decompile ||
196 stage == VideoCore::LoadCallbackStage::Build) {
197 ui->stage->setText(stage_translations[stage].arg(value).arg(total));
198 } else {
199 ui->stage->setText(stage_translations[stage]);
200 }
194 ui->value->setText(estimate); 201 ui->value->setText(estimate);
195 ui->progress_bar->setValue(static_cast<int>(value)); 202 ui->progress_bar->setValue(static_cast<int>(value));
196 previous_time = now; 203 previous_time = now;
diff --git a/src/yuzu/loading_screen.ui b/src/yuzu/loading_screen.ui
index a67d273fd..820b47536 100644
--- a/src/yuzu/loading_screen.ui
+++ b/src/yuzu/loading_screen.ui
@@ -132,7 +132,7 @@ border-radius: 15px;
132font: 75 15pt &quot;Arial&quot;;</string> 132font: 75 15pt &quot;Arial&quot;;</string>
133 </property> 133 </property>
134 <property name="text"> 134 <property name="text">
135 <string>Stage 1 of 2. Estimate Time 5m 4s</string> 135 <string>Estimated Time 5m 4s</string>
136 </property> 136 </property>
137 </widget> 137 </widget>
138 </item> 138 </item>
@@ -146,6 +146,9 @@ font: 75 15pt &quot;Arial&quot;;</string>
146 <property name="text"> 146 <property name="text">
147 <string/> 147 <string/>
148 </property> 148 </property>
149 <property name="alignment">
150 <set>Qt::AlignCenter</set>
151 </property>
149 <property name="margin"> 152 <property name="margin">
150 <number>30</number> 153 <number>30</number>
151 </property> 154 </property>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ae3b49709..bdee44b04 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,9 +11,11 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
18#include "core/frontend/scope_acquire_window_context.h"
17#include "core/hle/service/acc/profile_manager.h" 19#include "core/hle/service/acc/profile_manager.h"
18#include "core/hle/service/am/applets/applets.h" 20#include "core/hle/service/am/applets/applets.h"
19#include "core/hle/service/hid/controllers/npad.h" 21#include "core/hle/service/hid/controllers/npad.h"
@@ -35,14 +37,20 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
35#include <glad/glad.h> 37#include <glad/glad.h>
36 38
37#define QT_NO_OPENGL 39#define QT_NO_OPENGL
40#include <QClipboard>
41#include <QDesktopServices>
38#include <QDesktopWidget> 42#include <QDesktopWidget>
39#include <QDialogButtonBox> 43#include <QDialogButtonBox>
40#include <QFile> 44#include <QFile>
41#include <QFileDialog> 45#include <QFileDialog>
46#include <QInputDialog>
42#include <QMessageBox> 47#include <QMessageBox>
48#include <QProgressBar>
49#include <QProgressDialog>
50#include <QShortcut>
51#include <QStatusBar>
43#include <QtConcurrent/QtConcurrent> 52#include <QtConcurrent/QtConcurrent>
44#include <QtGui> 53
45#include <QtWidgets>
46#include <fmt/format.h> 54#include <fmt/format.h>
47#include "common/common_paths.h" 55#include "common/common_paths.h"
48#include "common/detached_tasks.h" 56#include "common/detached_tasks.h"
@@ -53,11 +61,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
53#include "common/microprofile.h" 61#include "common/microprofile.h"
54#include "common/scm_rev.h" 62#include "common/scm_rev.h"
55#include "common/scope_exit.h" 63#include "common/scope_exit.h"
56#include "common/string_util.h"
57#include "common/telemetry.h" 64#include "common/telemetry.h"
58#include "core/core.h" 65#include "core/core.h"
59#include "core/crypto/key_manager.h" 66#include "core/crypto/key_manager.h"
60#include "core/file_sys/bis_factory.h"
61#include "core/file_sys/card_image.h" 67#include "core/file_sys/card_image.h"
62#include "core/file_sys/content_archive.h" 68#include "core/file_sys/content_archive.h"
63#include "core/file_sys/control_metadata.h" 69#include "core/file_sys/control_metadata.h"
@@ -69,7 +75,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
69#include "core/frontend/applets/software_keyboard.h" 75#include "core/frontend/applets/software_keyboard.h"
70#include "core/hle/kernel/process.h" 76#include "core/hle/kernel/process.h"
71#include "core/hle/service/filesystem/filesystem.h" 77#include "core/hle/service/filesystem/filesystem.h"
72#include "core/hle/service/filesystem/fsp_ldr.h"
73#include "core/hle/service/nfp/nfp.h" 78#include "core/hle/service/nfp/nfp.h"
74#include "core/hle/service/sm/sm.h" 79#include "core/hle/service/sm/sm.h"
75#include "core/loader/loader.h" 80#include "core/loader/loader.h"
@@ -85,7 +90,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
85#include "yuzu/configuration/configure_dialog.h" 90#include "yuzu/configuration/configure_dialog.h"
86#include "yuzu/debugger/console.h" 91#include "yuzu/debugger/console.h"
87#include "yuzu/debugger/graphics/graphics_breakpoints.h" 92#include "yuzu/debugger/graphics/graphics_breakpoints.h"
88#include "yuzu/debugger/graphics/graphics_surface.h"
89#include "yuzu/debugger/profiler.h" 93#include "yuzu/debugger/profiler.h"
90#include "yuzu/debugger/wait_tree.h" 94#include "yuzu/debugger/wait_tree.h"
91#include "yuzu/discord.h" 95#include "yuzu/discord.h"
@@ -166,7 +170,8 @@ static void InitializeLogging() {
166 170
167GMainWindow::GMainWindow() 171GMainWindow::GMainWindow()
168 : config(new Config()), emu_thread(nullptr), 172 : config(new Config()), emu_thread(nullptr),
169 vfs(std::make_shared<FileSys::RealVfsFilesystem>()) { 173 vfs(std::make_shared<FileSys::RealVfsFilesystem>()),
174 provider(std::make_unique<FileSys::ManualContentProvider>()) {
170 InitializeLogging(); 175 InitializeLogging();
171 176
172 debug_context = Tegra::DebugContext::Construct(); 177 debug_context = Tegra::DebugContext::Construct();
@@ -198,13 +203,18 @@ GMainWindow::GMainWindow()
198 .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc)); 203 .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc));
199 show(); 204 show();
200 205
206 Core::System::GetInstance().SetContentProvider(
207 std::make_unique<FileSys::ContentProviderUnion>());
208 Core::System::GetInstance().RegisterContentProvider(
209 FileSys::ContentProviderUnionSlot::FrontendManual, provider.get());
210 Service::FileSystem::CreateFactories(*vfs);
211
201 // Gen keys if necessary 212 // Gen keys if necessary
202 OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning); 213 OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning);
203 214
204 // Necessary to load titles from nand in gamelist.
205 Service::FileSystem::CreateFactories(*vfs);
206 game_list->LoadCompatibilityList(); 215 game_list->LoadCompatibilityList();
207 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan); 216 game_list->PopulateAsync(UISettings::values.game_directory_path,
217 UISettings::values.game_directory_deepscan);
208 218
209 // Show one-time "callout" messages to the user 219 // Show one-time "callout" messages to the user
210 ShowTelemetryCallout(); 220 ShowTelemetryCallout();
@@ -338,6 +348,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
338 .arg(QString::fromStdString(std::to_string(key_code)))); 348 .arg(QString::fromStdString(std::to_string(key_code))));
339 }; 349 };
340 350
351 QMessageBox::information(
352 this, tr("Exit"),
353 tr("To exit the web application, use the game provided controls to select exit, select the "
354 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
355
341 bool running_exit_check = false; 356 bool running_exit_check = false;
342 while (!finished) { 357 while (!finished) {
343 QApplication::processEvents(); 358 QApplication::processEvents();
@@ -409,7 +424,7 @@ void GMainWindow::InitializeWidgets() {
409 render_window = new GRenderWindow(this, emu_thread.get()); 424 render_window = new GRenderWindow(this, emu_thread.get());
410 render_window->hide(); 425 render_window->hide();
411 426
412 game_list = new GameList(vfs, this); 427 game_list = new GameList(vfs, provider.get(), this);
413 ui.horizontalLayout->addWidget(game_list); 428 ui.horizontalLayout->addWidget(game_list);
414 429
415 loading_screen = new LoadingScreen(this); 430 loading_screen = new LoadingScreen(this);
@@ -468,11 +483,6 @@ void GMainWindow::InitializeDebugWidgets() {
468 graphicsBreakpointsWidget->hide(); 483 graphicsBreakpointsWidget->hide();
469 debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction()); 484 debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
470 485
471 graphicsSurfaceWidget = new GraphicsSurfaceWidget(debug_context, this);
472 addDockWidget(Qt::RightDockWidgetArea, graphicsSurfaceWidget);
473 graphicsSurfaceWidget->hide();
474 debug_menu->addAction(graphicsSurfaceWidget->toggleViewAction());
475
476 waitTreeWidget = new WaitTreeWidget(this); 486 waitTreeWidget = new WaitTreeWidget(this);
477 addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget); 487 addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget);
478 waitTreeWidget->hide(); 488 waitTreeWidget->hide();
@@ -504,32 +514,34 @@ void GMainWindow::InitializeRecentFileMenuActions() {
504} 514}
505 515
506void GMainWindow::InitializeHotkeys() { 516void GMainWindow::InitializeHotkeys() {
507 hotkey_registry.RegisterHotkey("Main Window", "Load File", QKeySequence::Open);
508 hotkey_registry.RegisterHotkey("Main Window", "Start Emulation");
509 hotkey_registry.RegisterHotkey("Main Window", "Continue/Pause", QKeySequence(Qt::Key_F4));
510 hotkey_registry.RegisterHotkey("Main Window", "Restart", QKeySequence(Qt::Key_F5));
511 hotkey_registry.RegisterHotkey("Main Window", "Fullscreen", QKeySequence::FullScreen);
512 hotkey_registry.RegisterHotkey("Main Window", "Exit Fullscreen", QKeySequence(Qt::Key_Escape),
513 Qt::ApplicationShortcut);
514 hotkey_registry.RegisterHotkey("Main Window", "Toggle Speed Limit", QKeySequence("CTRL+Z"),
515 Qt::ApplicationShortcut);
516 hotkey_registry.RegisterHotkey("Main Window", "Increase Speed Limit", QKeySequence("+"),
517 Qt::ApplicationShortcut);
518 hotkey_registry.RegisterHotkey("Main Window", "Decrease Speed Limit", QKeySequence("-"),
519 Qt::ApplicationShortcut);
520 hotkey_registry.RegisterHotkey("Main Window", "Load Amiibo", QKeySequence(Qt::Key_F2),
521 Qt::ApplicationShortcut);
522 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
523 QKeySequence(QKeySequence::Print));
524
525 hotkey_registry.LoadHotkeys(); 517 hotkey_registry.LoadHotkeys();
526 518
519 ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence("Main Window", "Load File"));
520 ui.action_Load_File->setShortcutContext(
521 hotkey_registry.GetShortcutContext("Main Window", "Load File"));
522
523 ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence("Main Window", "Exit yuzu"));
524 ui.action_Exit->setShortcutContext(
525 hotkey_registry.GetShortcutContext("Main Window", "Exit yuzu"));
526
527 ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence("Main Window", "Stop Emulation"));
528 ui.action_Stop->setShortcutContext(
529 hotkey_registry.GetShortcutContext("Main Window", "Stop Emulation"));
530
531 ui.action_Show_Filter_Bar->setShortcut(
532 hotkey_registry.GetKeySequence("Main Window", "Toggle Filter Bar"));
533 ui.action_Show_Filter_Bar->setShortcutContext(
534 hotkey_registry.GetShortcutContext("Main Window", "Toggle Filter Bar"));
535
536 ui.action_Show_Status_Bar->setShortcut(
537 hotkey_registry.GetKeySequence("Main Window", "Toggle Status Bar"));
538 ui.action_Show_Status_Bar->setShortcutContext(
539 hotkey_registry.GetShortcutContext("Main Window", "Toggle Status Bar"));
540
527 connect(hotkey_registry.GetHotkey("Main Window", "Load File", this), &QShortcut::activated, 541 connect(hotkey_registry.GetHotkey("Main Window", "Load File", this), &QShortcut::activated,
528 this, &GMainWindow::OnMenuLoadFile); 542 this, &GMainWindow::OnMenuLoadFile);
529 connect(hotkey_registry.GetHotkey("Main Window", "Start Emulation", this), 543 connect(hotkey_registry.GetHotkey("Main Window", "Continue/Pause Emulation", this),
530 &QShortcut::activated, this, &GMainWindow::OnStartGame); 544 &QShortcut::activated, this, [&] {
531 connect(hotkey_registry.GetHotkey("Main Window", "Continue/Pause", this), &QShortcut::activated,
532 this, [&] {
533 if (emulation_running) { 545 if (emulation_running) {
534 if (emu_thread->IsRunning()) { 546 if (emu_thread->IsRunning()) {
535 OnPauseGame(); 547 OnPauseGame();
@@ -538,8 +550,8 @@ void GMainWindow::InitializeHotkeys() {
538 } 550 }
539 } 551 }
540 }); 552 });
541 connect(hotkey_registry.GetHotkey("Main Window", "Restart", this), &QShortcut::activated, this, 553 connect(hotkey_registry.GetHotkey("Main Window", "Restart Emulation", this),
542 [this] { 554 &QShortcut::activated, this, [this] {
543 if (!Core::System::GetInstance().IsPoweredOn()) 555 if (!Core::System::GetInstance().IsPoweredOn())
544 return; 556 return;
545 BootGame(QString(game_path)); 557 BootGame(QString(game_path));
@@ -560,7 +572,10 @@ void GMainWindow::InitializeHotkeys() {
560 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 572 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
561 UpdateStatusBar(); 573 UpdateStatusBar();
562 }); 574 });
563 constexpr u16 SPEED_LIMIT_STEP = 5; 575 // TODO: Remove this comment/static whenever the next major release of
576 // MSVC occurs and we make it a requirement (see:
577 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
578 static constexpr u16 SPEED_LIMIT_STEP = 5;
564 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), 579 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
565 &QShortcut::activated, this, [&] { 580 &QShortcut::activated, this, [&] {
566 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 581 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -587,6 +602,12 @@ void GMainWindow::InitializeHotkeys() {
587 OnCaptureScreenshot(); 602 OnCaptureScreenshot();
588 } 603 }
589 }); 604 });
605 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
606 &QShortcut::activated, this, [&] {
607 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
608 OnDockedModeChanged(!Settings::values.use_docked_mode,
609 Settings::values.use_docked_mode);
610 });
590} 611}
591 612
592void GMainWindow::SetDefaultUIGeometry() { 613void GMainWindow::SetDefaultUIGeometry() {
@@ -631,6 +652,8 @@ void GMainWindow::RestoreUIState() {
631void GMainWindow::ConnectWidgetEvents() { 652void GMainWindow::ConnectWidgetEvents() {
632 connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile); 653 connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile);
633 connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder); 654 connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder);
655 connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this,
656 &GMainWindow::OnTransferableShaderCacheOpenFile);
634 connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS); 657 connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS);
635 connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID); 658 connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID);
636 connect(game_list, &GameList::NavigateToGamedbEntryRequested, this, 659 connect(game_list, &GameList::NavigateToGamedbEntryRequested, this,
@@ -675,7 +698,6 @@ void GMainWindow::ConnectMenuEvents() {
675 &GMainWindow::ToggleWindowMode); 698 &GMainWindow::ToggleWindowMode);
676 connect(ui.action_Display_Dock_Widget_Headers, &QAction::triggered, this, 699 connect(ui.action_Display_Dock_Widget_Headers, &QAction::triggered, this,
677 &GMainWindow::OnDisplayTitleBars); 700 &GMainWindow::OnDisplayTitleBars);
678 ui.action_Show_Filter_Bar->setShortcut(tr("CTRL+F"));
679 connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar); 701 connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar);
680 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible); 702 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
681 703
@@ -747,13 +769,15 @@ bool GMainWindow::LoadROM(const QString& filename) {
747 ShutdownGame(); 769 ShutdownGame();
748 770
749 render_window->InitRenderTarget(); 771 render_window->InitRenderTarget();
750 render_window->MakeCurrent();
751 772
752 if (!gladLoadGL()) { 773 {
753 QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"), 774 Core::Frontend::ScopeAcquireWindowContext acquire_context{*render_window};
754 tr("Your GPU may not support OpenGL 4.3, or you do not " 775 if (!gladLoadGL()) {
755 "have the latest graphics driver.")); 776 QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"),
756 return false; 777 tr("Your GPU may not support OpenGL 4.3, or you do not "
778 "have the latest graphics driver."));
779 return false;
780 }
757 } 781 }
758 782
759 QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions(); 783 QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions();
@@ -794,8 +818,6 @@ bool GMainWindow::LoadROM(const QString& filename) {
794 "wiki</a>. This message will not be shown again.")); 818 "wiki</a>. This message will not be shown again."));
795 } 819 }
796 820
797 render_window->DoneCurrent();
798
799 if (result != Core::System::ResultStatus::Success) { 821 if (result != Core::System::ResultStatus::Success) {
800 switch (result) { 822 switch (result) {
801 case Core::System::ResultStatus::ErrorGetLoader: 823 case Core::System::ResultStatus::ErrorGetLoader:
@@ -845,7 +867,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
845 } 867 }
846 game_path = filename; 868 game_path = filename;
847 869
848 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 870 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
849 return true; 871 return true;
850} 872}
851 873
@@ -886,6 +908,9 @@ void GMainWindow::BootGame(const QString& filename) {
886 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget, 908 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
887 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection); 909 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
888 910
911 connect(emu_thread.get(), &EmuThread::LoadProgress, loading_screen,
912 &LoadingScreen::OnLoadProgress, Qt::QueuedConnection);
913
889 // Update the GUI 914 // Update the GUI
890 if (ui.action_Single_Window_Mode->isChecked()) { 915 if (ui.action_Single_Window_Mode->isChecked()) {
891 game_list->hide(); 916 game_list->hide();
@@ -1062,6 +1087,39 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
1062 QDesktopServices::openUrl(QUrl::fromLocalFile(qpath)); 1087 QDesktopServices::openUrl(QUrl::fromLocalFile(qpath));
1063} 1088}
1064 1089
1090void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
1091 ASSERT(program_id != 0);
1092
1093 const QString tranferable_shader_cache_folder_path =
1094 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) + "opengl" +
1095 DIR_SEP + "transferable";
1096
1097 const QString transferable_shader_cache_file_path =
1098 tranferable_shader_cache_folder_path + DIR_SEP +
1099 QString::fromStdString(fmt::format("{:016X}.bin", program_id));
1100
1101 if (!QFile::exists(transferable_shader_cache_file_path)) {
1102 QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"),
1103 tr("A shader cache for this title does not exist."));
1104 return;
1105 }
1106
1107 // Windows supports opening a folder with selecting a specified file in explorer. On every other
1108 // OS we just open the transferable shader cache folder without preselecting the transferable
1109 // shader cache file for the selected game.
1110#if defined(Q_OS_WIN)
1111 const QString explorer = QStringLiteral("explorer");
1112 QStringList param;
1113 if (!QFileInfo(transferable_shader_cache_file_path).isDir()) {
1114 param << QStringLiteral("/select,");
1115 }
1116 param << QDir::toNativeSeparators(transferable_shader_cache_file_path);
1117 QProcess::startDetached(explorer, param);
1118#else
1119 QDesktopServices::openUrl(QUrl::fromLocalFile(tranferable_shader_cache_folder_path));
1120#endif
1121}
1122
1065static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) { 1123static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
1066 std::size_t out = 0; 1124 std::size_t out = 0;
1067 1125
@@ -1121,7 +1179,7 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
1121 return; 1179 return;
1122 } 1180 }
1123 1181
1124 const auto installed = Service::FileSystem::GetUnionContents(); 1182 const auto& installed = Core::System::GetInstance().GetContentProvider();
1125 const auto romfs_title_id = SelectRomFSDumpTarget(installed, program_id); 1183 const auto romfs_title_id = SelectRomFSDumpTarget(installed, program_id);
1126 1184
1127 if (!romfs_title_id) { 1185 if (!romfs_title_id) {
@@ -1221,8 +1279,8 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) {
1221 1279
1222 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); 1280 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
1223 if (reload) { 1281 if (reload) {
1224 game_list->PopulateAsync(UISettings::values.gamedir, 1282 game_list->PopulateAsync(UISettings::values.game_directory_path,
1225 UISettings::values.gamedir_deepscan); 1283 UISettings::values.game_directory_deepscan);
1226 } 1284 }
1227 1285
1228 config->Save(); 1286 config->Save();
@@ -1310,7 +1368,8 @@ void GMainWindow::OnMenuInstallToNAND() {
1310 const auto success = [this]() { 1368 const auto success = [this]() {
1311 QMessageBox::information(this, tr("Successfully Installed"), 1369 QMessageBox::information(this, tr("Successfully Installed"),
1312 tr("The file was successfully installed.")); 1370 tr("The file was successfully installed."));
1313 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan); 1371 game_list->PopulateAsync(UISettings::values.game_directory_path,
1372 UISettings::values.game_directory_deepscan);
1314 }; 1373 };
1315 1374
1316 const auto failed = [this]() { 1375 const auto failed = [this]() {
@@ -1437,8 +1496,8 @@ void GMainWindow::OnMenuInstallToNAND() {
1437void GMainWindow::OnMenuSelectGameListRoot() { 1496void GMainWindow::OnMenuSelectGameListRoot() {
1438 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); 1497 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
1439 if (!dir_path.isEmpty()) { 1498 if (!dir_path.isEmpty()) {
1440 UISettings::values.gamedir = dir_path; 1499 UISettings::values.game_directory_path = dir_path;
1441 game_list->PopulateAsync(dir_path, UISettings::values.gamedir_deepscan); 1500 game_list->PopulateAsync(dir_path, UISettings::values.game_directory_deepscan);
1442 } 1501 }
1443} 1502}
1444 1503
@@ -1460,7 +1519,8 @@ void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target)
1460 : FileUtil::UserPath::NANDDir, 1519 : FileUtil::UserPath::NANDDir,
1461 dir_path.toStdString()); 1520 dir_path.toStdString());
1462 Service::FileSystem::CreateFactories(*vfs); 1521 Service::FileSystem::CreateFactories(*vfs);
1463 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan); 1522 game_list->PopulateAsync(UISettings::values.game_directory_path,
1523 UISettings::values.game_directory_deepscan);
1464 } 1524 }
1465} 1525}
1466 1526
@@ -1604,6 +1664,7 @@ void GMainWindow::OnConfigure() {
1604 auto result = configureDialog.exec(); 1664 auto result = configureDialog.exec();
1605 if (result == QDialog::Accepted) { 1665 if (result == QDialog::Accepted) {
1606 configureDialog.applyConfiguration(); 1666 configureDialog.applyConfiguration();
1667 InitializeHotkeys();
1607 if (UISettings::values.theme != old_theme) 1668 if (UISettings::values.theme != old_theme)
1608 UpdateUITheme(); 1669 UpdateUITheme();
1609 if (UISettings::values.enable_discord_presence != old_discord_presence) 1670 if (UISettings::values.enable_discord_presence != old_discord_presence)
@@ -1611,8 +1672,8 @@ void GMainWindow::OnConfigure() {
1611 1672
1612 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); 1673 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
1613 if (reload) { 1674 if (reload) {
1614 game_list->PopulateAsync(UISettings::values.gamedir, 1675 game_list->PopulateAsync(UISettings::values.game_directory_path,
1615 UISettings::values.gamedir_deepscan); 1676 UISettings::values.game_directory_deepscan);
1616 } 1677 }
1617 1678
1618 config->Save(); 1679 config->Save();
@@ -1681,12 +1742,16 @@ void GMainWindow::OnToggleFilterBar() {
1681 1742
1682void GMainWindow::OnCaptureScreenshot() { 1743void GMainWindow::OnCaptureScreenshot() {
1683 OnPauseGame(); 1744 OnPauseGame();
1684 const QString path = 1745 QFileDialog png_dialog(this, tr("Capture Screenshot"), UISettings::values.screenshot_path,
1685 QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), 1746 tr("PNG Image (*.png)"));
1686 UISettings::values.screenshot_path, tr("PNG Image (*.png)")); 1747 png_dialog.setAcceptMode(QFileDialog::AcceptSave);
1687 if (!path.isEmpty()) { 1748 png_dialog.setDefaultSuffix("png");
1688 UISettings::values.screenshot_path = QFileInfo(path).path(); 1749 if (png_dialog.exec()) {
1689 render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path); 1750 const QString path = png_dialog.selectedFiles().first();
1751 if (!path.isEmpty()) {
1752 UISettings::values.screenshot_path = QFileInfo(path).path();
1753 render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
1754 }
1690 } 1755 }
1691 OnStartGame(); 1756 OnStartGame();
1692} 1757}
@@ -1858,18 +1923,19 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
1858 Service::FileSystem::CreateFactories(*vfs); 1923 Service::FileSystem::CreateFactories(*vfs);
1859 1924
1860 if (behavior == ReinitializeKeyBehavior::Warning) { 1925 if (behavior == ReinitializeKeyBehavior::Warning) {
1861 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan); 1926 game_list->PopulateAsync(UISettings::values.game_directory_path,
1927 UISettings::values.game_directory_deepscan);
1862 } 1928 }
1863} 1929}
1864 1930
1865std::optional<u64> GMainWindow::SelectRomFSDumpTarget( 1931std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed,
1866 const FileSys::RegisteredCacheUnion& installed, u64 program_id) { 1932 u64 program_id) {
1867 const auto dlc_entries = 1933 const auto dlc_entries =
1868 installed.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data); 1934 installed.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data);
1869 std::vector<FileSys::RegisteredCacheEntry> dlc_match; 1935 std::vector<FileSys::ContentProviderEntry> dlc_match;
1870 dlc_match.reserve(dlc_entries.size()); 1936 dlc_match.reserve(dlc_entries.size());
1871 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), 1937 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
1872 [&program_id, &installed](const FileSys::RegisteredCacheEntry& entry) { 1938 [&program_id, &installed](const FileSys::ContentProviderEntry& entry) {
1873 return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id && 1939 return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id &&
1874 installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success; 1940 installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success;
1875 }); 1941 });
@@ -2055,6 +2121,9 @@ int main(int argc, char* argv[]) {
2055 GMainWindow main_window; 2121 GMainWindow main_window;
2056 // After settings have been loaded by GMainWindow, apply the filter 2122 // After settings have been loaded by GMainWindow, apply the filter
2057 main_window.show(); 2123 main_window.show();
2124
2125 Settings::LogSettings();
2126
2058 int result = app.exec(); 2127 int result = app.exec();
2059 detached_tasks.WaitForAllTasks(); 2128 detached_tasks.WaitForAllTasks();
2060 return result; 2129 return result;
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 080484995..ce5045819 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -23,7 +23,6 @@ class EmuThread;
23class GameList; 23class GameList;
24class GImageInfo; 24class GImageInfo;
25class GraphicsBreakPointsWidget; 25class GraphicsBreakPointsWidget;
26class GraphicsSurfaceWidget;
27class GRenderWindow; 26class GRenderWindow;
28class LoadingScreen; 27class LoadingScreen;
29class MicroProfileDialog; 28class MicroProfileDialog;
@@ -37,7 +36,8 @@ struct SoftwareKeyboardParameters;
37} // namespace Core::Frontend 36} // namespace Core::Frontend
38 37
39namespace FileSys { 38namespace FileSys {
40class RegisteredCacheUnion; 39class ContentProvider;
40class ManualContentProvider;
41class VfsFilesystem; 41class VfsFilesystem;
42} // namespace FileSys 42} // namespace FileSys
43 43
@@ -120,7 +120,6 @@ private:
120 void InitializeWidgets(); 120 void InitializeWidgets();
121 void InitializeDebugWidgets(); 121 void InitializeDebugWidgets();
122 void InitializeRecentFileMenuActions(); 122 void InitializeRecentFileMenuActions();
123 void InitializeHotkeys();
124 123
125 void SetDefaultUIGeometry(); 124 void SetDefaultUIGeometry();
126 void RestoreUIState(); 125 void RestoreUIState();
@@ -176,6 +175,7 @@ private slots:
176 /// Called whenever a user selects a game in the game list widget. 175 /// Called whenever a user selects a game in the game list widget.
177 void OnGameListLoadFile(QString game_path); 176 void OnGameListLoadFile(QString game_path);
178 void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target); 177 void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
178 void OnTransferableShaderCacheOpenFile(u64 program_id);
179 void OnGameListDumpRomFS(u64 program_id, const std::string& game_path); 179 void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
180 void OnGameListCopyTID(u64 program_id); 180 void OnGameListCopyTID(u64 program_id);
181 void OnGameListNavigateToGamedbEntry(u64 program_id, 181 void OnGameListNavigateToGamedbEntry(u64 program_id,
@@ -195,6 +195,7 @@ private slots:
195 void OnAbout(); 195 void OnAbout();
196 void OnToggleFilterBar(); 196 void OnToggleFilterBar();
197 void OnDisplayTitleBars(bool); 197 void OnDisplayTitleBars(bool);
198 void InitializeHotkeys();
198 void ToggleFullscreen(); 199 void ToggleFullscreen();
199 void ShowFullscreen(); 200 void ShowFullscreen();
200 void HideFullscreen(); 201 void HideFullscreen();
@@ -204,7 +205,7 @@ private slots:
204 void OnReinitializeKeys(ReinitializeKeyBehavior behavior); 205 void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
205 206
206private: 207private:
207 std::optional<u64> SelectRomFSDumpTarget(const FileSys::RegisteredCacheUnion&, u64 program_id); 208 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
208 void UpdateStatusBar(); 209 void UpdateStatusBar();
209 210
210 Ui::MainWindow ui; 211 Ui::MainWindow ui;
@@ -232,12 +233,12 @@ private:
232 233
233 // FS 234 // FS
234 std::shared_ptr<FileSys::VfsFilesystem> vfs; 235 std::shared_ptr<FileSys::VfsFilesystem> vfs;
236 std::unique_ptr<FileSys::ManualContentProvider> provider;
235 237
236 // Debugger panes 238 // Debugger panes
237 ProfilerWidget* profilerWidget; 239 ProfilerWidget* profilerWidget;
238 MicroProfileDialog* microProfileDialog; 240 MicroProfileDialog* microProfileDialog;
239 GraphicsBreakPointsWidget* graphicsBreakpointsWidget; 241 GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
240 GraphicsSurfaceWidget* graphicsSurfaceWidget;
241 WaitTreeWidget* waitTreeWidget; 242 WaitTreeWidget* waitTreeWidget;
242 243
243 QAction* actions_recent_files[max_recent_files_item]; 244 QAction* actions_recent_files[max_recent_files_item];
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/ui_settings.cpp
index a314493fc..4bdc302e0 100644
--- a/src/yuzu/ui_settings.cpp
+++ b/src/yuzu/ui_settings.cpp
@@ -12,5 +12,4 @@ const Themes themes{{
12}}; 12}};
13 13
14Values values = {}; 14Values values = {};
15
16} // namespace UISettings 15} // namespace UISettings
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/ui_settings.h
index 82aaeedb0..dbd318e20 100644
--- a/src/yuzu/ui_settings.h
+++ b/src/yuzu/ui_settings.h
@@ -15,7 +15,12 @@
15namespace UISettings { 15namespace UISettings {
16 16
17using ContextualShortcut = std::pair<QString, int>; 17using ContextualShortcut = std::pair<QString, int>;
18using Shortcut = std::pair<QString, ContextualShortcut>; 18
19struct Shortcut {
20 QString name;
21 QString group;
22 ContextualShortcut shortcut;
23};
19 24
20using Themes = std::array<std::pair<const char*, const char*>, 2>; 25using Themes = std::array<std::pair<const char*, const char*>, 2>;
21extern const Themes themes; 26extern const Themes themes;
@@ -50,8 +55,8 @@ struct Values {
50 QString roms_path; 55 QString roms_path;
51 QString symbols_path; 56 QString symbols_path;
52 QString screenshot_path; 57 QString screenshot_path;
53 QString gamedir; 58 QString game_directory_path;
54 bool gamedir_deepscan; 59 bool game_directory_deepscan;
55 QStringList recent_files; 60 QStringList recent_files;
56 61
57 QString theme; 62 QString theme;
diff --git a/src/yuzu/util/sequence_dialog/sequence_dialog.cpp b/src/yuzu/util/sequence_dialog/sequence_dialog.cpp
new file mode 100644
index 000000000..d3edf6ec3
--- /dev/null
+++ b/src/yuzu/util/sequence_dialog/sequence_dialog.cpp
@@ -0,0 +1,37 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <QDialogButtonBox>
6#include <QKeySequenceEdit>
7#include <QVBoxLayout>
8#include "yuzu/util/sequence_dialog/sequence_dialog.h"
9
10SequenceDialog::SequenceDialog(QWidget* parent) : QDialog(parent) {
11 setWindowTitle(tr("Enter a hotkey"));
12 auto* layout = new QVBoxLayout(this);
13 key_sequence = new QKeySequenceEdit;
14 layout->addWidget(key_sequence);
15 auto* buttons =
16 new QDialogButtonBox(QDialogButtonBox::Ok | QDialogButtonBox::Cancel, Qt::Horizontal);
17 buttons->setCenterButtons(true);
18 layout->addWidget(buttons);
19 connect(buttons, &QDialogButtonBox::accepted, this, &QDialog::accept);
20 connect(buttons, &QDialogButtonBox::rejected, this, &QDialog::reject);
21 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
22}
23
24SequenceDialog::~SequenceDialog() = default;
25
26QKeySequence SequenceDialog::GetSequence() const {
27 // Only the first key is returned. The other 3, if present, are ignored.
28 return QKeySequence(key_sequence->keySequence()[0]);
29}
30
31bool SequenceDialog::focusNextPrevChild(bool next) {
32 return false;
33}
34
35void SequenceDialog::closeEvent(QCloseEvent*) {
36 reject();
37}
diff --git a/src/yuzu/util/sequence_dialog/sequence_dialog.h b/src/yuzu/util/sequence_dialog/sequence_dialog.h
new file mode 100644
index 000000000..969c77740
--- /dev/null
+++ b/src/yuzu/util/sequence_dialog/sequence_dialog.h
@@ -0,0 +1,24 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <QDialog>
8
9class QKeySequenceEdit;
10
11class SequenceDialog : public QDialog {
12 Q_OBJECT
13
14public:
15 explicit SequenceDialog(QWidget* parent = nullptr);
16 ~SequenceDialog() override;
17
18 QKeySequence GetSequence() const;
19 void closeEvent(QCloseEvent*) override;
20
21private:
22 QKeySequenceEdit* key_sequence;
23 bool focusNextPrevChild(bool next) override;
24};
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7a77f76e8..f24cc77fe 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -319,7 +319,6 @@ void Config::ReadValues() {
319 319
320 // System 320 // System
321 Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false); 321 Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
322 Settings::values.enable_nfc = sdl2_config->GetBoolean("System", "enable_nfc", true);
323 const auto size = sdl2_config->GetInteger("System", "users_size", 0); 322 const auto size = sdl2_config->GetInteger("System", "users_size", 0);
324 323
325 Settings::values.current_user = std::clamp<int>( 324 Settings::values.current_user = std::clamp<int>(
@@ -346,23 +345,28 @@ void Config::ReadValues() {
346 345
347 // Renderer 346 // Renderer
348 Settings::values.resolution_factor = 347 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 348 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 349 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 350 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 351 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
352 Settings::values.use_disk_shader_cache =
353 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
353 Settings::values.use_accurate_gpu_emulation = 354 Settings::values.use_accurate_gpu_emulation =
354 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 355 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
356 Settings::values.use_asynchronous_gpu_emulation =
357 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
355 358
356 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 359 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
357 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 360 Settings::values.bg_green =
358 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 361 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
362 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
359 363
360 // Audio 364 // Audio
361 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 365 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
362 Settings::values.enable_audio_stretching = 366 Settings::values.enable_audio_stretching =
363 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 367 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
364 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 368 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
365 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 369 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
366 370
367 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 371 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
368 372
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index ba51a4a51..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -110,10 +110,18 @@ use_frame_limit =
110# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default) 110# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
111frame_limit = 111frame_limit =
112 112
113# Whether to use disk based shader cache
114# 0 (default): Off, 1 : On
115use_disk_shader_cache =
116
113# Whether to use accurate GPU emulation 117# Whether to use accurate GPU emulation
114# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
115use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
116 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
117# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
118# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
119bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index d246389fa..68a176032 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -166,16 +166,16 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
166} 166}
167 167
168EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { 168EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
169 InputCommon::Init();
170
171 SDL_SetMainReady();
172
173 // Initialize the window 169 // Initialize the window
174 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { 170 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
175 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); 171 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
176 exit(1); 172 exit(1);
177 } 173 }
178 174
175 InputCommon::Init();
176
177 SDL_SetMainReady();
178
179 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); 179 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
180 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); 180 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
181 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); 181 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
@@ -226,16 +226,15 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
226 SDL_GL_SetSwapInterval(false); 226 SDL_GL_SetSwapInterval(false);
227 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, 227 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
228 Common::g_scm_desc); 228 Common::g_scm_desc);
229 Settings::LogSettings();
229 230
230 DoneCurrent(); 231 DoneCurrent();
231} 232}
232 233
233EmuWindow_SDL2::~EmuWindow_SDL2() { 234EmuWindow_SDL2::~EmuWindow_SDL2() {
234 InputCommon::SDL::CloseSDLJoysticks(); 235 InputCommon::Shutdown();
235 SDL_GL_DeleteContext(gl_context); 236 SDL_GL_DeleteContext(gl_context);
236 SDL_Quit(); 237 SDL_Quit();
237
238 InputCommon::Shutdown();
239} 238}
240 239
241void EmuWindow_SDL2::SwapBuffers() { 240void EmuWindow_SDL2::SwapBuffers() {
@@ -292,7 +291,6 @@ void EmuWindow_SDL2::PollEvents() {
292 is_open = false; 291 is_open = false;
293 break; 292 break;
294 default: 293 default:
295 InputCommon::SDL::HandleGameControllerEvent(event);
296 break; 294 break;
297 } 295 }
298 } 296 }
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 806127b12..7ea4a1b18 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -28,10 +28,12 @@
28#include "core/loader/loader.h" 28#include "core/loader/loader.h"
29#include "core/settings.h" 29#include "core/settings.h"
30#include "core/telemetry_session.h" 30#include "core/telemetry_session.h"
31#include "video_core/renderer_base.h"
31#include "yuzu_cmd/config.h" 32#include "yuzu_cmd/config.h"
32#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 33#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
33 34
34#include <getopt.h> 35#include <getopt.h>
36#include "core/file_sys/registered_cache.h"
35#ifndef _MSC_VER 37#ifndef _MSC_VER
36#include <unistd.h> 38#include <unistd.h>
37#endif 39#endif
@@ -113,9 +115,9 @@ int main(int argc, char** argv) {
113 }; 115 };
114 116
115 while (optind < argc) { 117 while (optind < argc) {
116 char arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index); 118 int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
117 if (arg != -1) { 119 if (arg != -1) {
118 switch (arg) { 120 switch (static_cast<char>(arg)) {
119 case 'g': 121 case 'g':
120 errno = 0; 122 errno = 0;
121 gdb_port = strtoul(optarg, &endarg, 0); 123 gdb_port = strtoul(optarg, &endarg, 0);
@@ -177,6 +179,7 @@ int main(int argc, char** argv) {
177 } 179 }
178 180
179 Core::System& system{Core::System::GetInstance()}; 181 Core::System& system{Core::System::GetInstance()};
182 system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>());
180 system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>()); 183 system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>());
181 Service::FileSystem::CreateFactories(*system.GetFilesystem()); 184 Service::FileSystem::CreateFactories(*system.GetFilesystem());
182 185
@@ -215,7 +218,9 @@ int main(int argc, char** argv) {
215 } 218 }
216 } 219 }
217 220
218 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 221 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
222
223 system.Renderer().Rasterizer().LoadDiskResources();
219 224
220 while (emu_window->IsOpen()) { 225 while (emu_window->IsOpen()) {
221 system.RunLoop(); 226 system.RunLoop();