summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--.travis.yml2
-rwxr-xr-x.travis/macos/build.sh2
-rw-r--r--CMakeLists.txt25
-rw-r--r--CMakeModules/GenerateSCMRev.cmake95
-rw-r--r--README.md2
m---------externals/Vulkan-Headers0
m---------externals/cubeb0
m---------externals/opus0
-rw-r--r--src/audio_core/audio_out.cpp5
-rw-r--r--src/audio_core/audio_out.h8
-rw-r--r--src/audio_core/audio_renderer.cpp7
-rw-r--r--src/audio_core/audio_renderer.h20
-rw-r--r--src/audio_core/buffer.h2
-rw-r--r--src/audio_core/codec.cpp4
-rw-r--r--src/audio_core/cubeb_sink.cpp23
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/audio_core/stream.cpp14
-rw-r--r--src/audio_core/stream.h30
-rw-r--r--src/common/CMakeLists.txt100
-rw-r--r--src/common/bit_field.h10
-rw-r--r--src/common/color.h40
-rw-r--r--src/common/common_paths.h1
-rw-r--r--src/common/file_util.cpp1
-rw-r--r--src/common/file_util.h1
-rw-r--r--src/common/logging/backend.cpp70
-rw-r--r--src/common/logging/backend.h5
-rw-r--r--src/common/logging/log.h1
-rw-r--r--src/common/math_util.h4
-rw-r--r--src/common/memory_hook.cpp (renamed from src/core/memory_hook.cpp)6
-rw-r--r--src/common/memory_hook.h (renamed from src/core/memory_hook.h)4
-rw-r--r--src/common/page_table.cpp29
-rw-r--r--src/common/page_table.h80
-rw-r--r--src/common/quaternion.h10
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/scm_rev.h1
-rw-r--r--src/common/swap.h6
-rw-r--r--src/common/thread_queue_list.h6
-rw-r--r--src/common/threadsafe_queue.h53
-rw-r--r--src/common/uint128.cpp45
-rw-r--r--src/common/uint128.h19
-rw-r--r--src/common/vector_math.h4
-rw-r--r--src/core/CMakeLists.txt11
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp15
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h12
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h8
-rw-r--r--src/core/core.cpp52
-rw-r--r--src/core/core.h14
-rw-r--r--src/core/core_cpu.cpp18
-rw-r--r--src/core/core_cpu.h12
-rw-r--r--src/core/core_timing.cpp199
-rw-r--r--src/core/core_timing.h215
-rw-r--r--src/core/core_timing_util.cpp10
-rw-r--r--src/core/core_timing_util.h7
-rw-r--r--src/core/cpu_core_manager.cpp2
-rw-r--r--src/core/crypto/key_manager.cpp3
-rw-r--r--src/core/file_sys/content_archive.h15
-rw-r--r--src/core/file_sys/registered_cache.cpp2
-rw-r--r--src/core/file_sys/vfs_vector.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/frontend/framebuffer_layout.cpp12
-rw-r--r--src/core/frontend/framebuffer_layout.h2
-rw-r--r--src/core/frontend/input.h2
-rw-r--r--src/core/gdbstub/gdbstub.cpp14
-rw-r--r--src/core/hle/ipc.h4
-rw-r--r--src/core/hle/ipc_helpers.h50
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp181
-rw-r--r--src/core/hle/kernel/address_arbiter.h80
-rw-r--r--src/core/hle/kernel/client_port.cpp9
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h9
-rw-r--r--src/core/hle/kernel/code_set.cpp12
-rw-r--r--src/core/hle/kernel/code_set.h90
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/handle_table.cpp40
-rw-r--r--src/core/hle/kernel/handle_table.h25
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/kernel.cpp14
-rw-r--r--src/core/hle/kernel/kernel.h21
-rw-r--r--src/core/hle/kernel/process.cpp34
-rw-r--r--src/core/hle/kernel/process.h69
-rw-r--r--src/core/hle/kernel/process_capability.cpp4
-rw-r--r--src/core/hle/kernel/process_capability.h4
-rw-r--r--src/core/hle/kernel/scheduler.cpp20
-rw-r--r--src/core/hle/kernel/scheduler.h6
-rw-r--r--src/core/hle/kernel/server_port.cpp4
-rw-r--r--src/core/hle/kernel/server_port.h35
-rw-r--r--src/core/hle/kernel/server_session.cpp91
-rw-r--r--src/core/hle/kernel/server_session.h53
-rw-r--r--src/core/hle/kernel/shared_memory.cpp5
-rw-r--r--src/core/hle/kernel/svc.cpp105
-rw-r--r--src/core/hle/kernel/thread.cpp92
-rw-r--r--src/core/hle/kernel/thread.h23
-rw-r--r--src/core/hle/kernel/vm_manager.cpp53
-rw-r--r--src/core/hle/kernel/vm_manager.h33
-rw-r--r--src/core/hle/result.h19
-rw-r--r--src/core/hle/service/am/am.cpp77
-rw-r--r--src/core/hle/service/am/am.h16
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/audio/audout_u.cpp27
-rw-r--r--src/core/hle/service/audio/audren_u.cpp73
-rw-r--r--src/core/hle/service/audio/audren_u.h3
-rw-r--r--src/core/hle/service/audio/errors.h15
-rw-r--r--src/core/hle/service/audio/hwopus.cpp206
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp5
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h1
-rw-r--r--src/core/hle/service/hid/controllers/controller_base.h7
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.h2
-rw-r--r--src/core/hle/service/hid/controllers/gesture.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/gesture.h2
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.h2
-rw-r--r--src/core/hle/service/hid/controllers/mouse.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/mouse.h2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/npad.h2
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.h2
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.cpp7
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.h2
-rw-r--r--src/core/hle/service/hid/controllers/xpad.cpp5
-rw-r--r--src/core/hle/service/hid/controllers/xpad.h2
-rw-r--r--src/core/hle/service/hid/hid.cpp19
-rw-r--r--src/core/hle/service/hid/hid.h6
-rw-r--r--src/core/hle/service/hid/irs.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp158
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h81
-rw-r--r--src/core/hle/service/service.cpp15
-rw-r--r--src/core/hle/service/service.h15
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/sm/sm.h2
-rw-r--r--src/core/hle/service/time/time.cpp9
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp71
-rw-r--r--src/core/hle/service/vi/display/vi_display.h98
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.cpp13
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.h52
-rw-r--r--src/core/hle/service/vi/vi.cpp129
-rw-r--r--src/core/hle/service/vi/vi.h40
-rw-r--r--src/core/hle/service/vi/vi_m.cpp12
-rw-r--r--src/core/hle/service/vi/vi_m.h19
-rw-r--r--src/core/hle/service/vi/vi_s.cpp12
-rw-r--r--src/core/hle/service/vi/vi_s.h19
-rw-r--r--src/core/hle/service/vi/vi_u.cpp12
-rw-r--r--src/core/hle/service/vi/vi_u.h19
-rw-r--r--src/core/loader/elf.cpp1
-rw-r--r--src/core/loader/linker.cpp147
-rw-r--r--src/core/loader/linker.h36
-rw-r--r--src/core/loader/nro.cpp1
-rw-r--r--src/core/loader/nro.h4
-rw-r--r--src/core/loader/nso.cpp1
-rw-r--r--src/core/loader/nso.h4
-rw-r--r--src/core/memory.cpp241
-rw-r--r--src/core/memory.h80
-rw-r--r--src/core/memory_setup.h19
-rw-r--r--src/core/settings.cpp3
-rw-r--r--src/core/settings.h2
-rw-r--r--src/core/telemetry_session.cpp4
-rw-r--r--src/input_common/CMakeLists.txt15
-rw-r--r--src/input_common/main.cpp23
-rw-r--r--src/input_common/main.h2
-rw-r--r--src/input_common/motion_emu.cpp28
-rw-r--r--src/input_common/sdl/sdl.cpp636
-rw-r--r--src/input_common/sdl/sdl.h53
-rw-r--r--src/input_common/sdl/sdl_impl.cpp668
-rw-r--r--src/input_common/sdl/sdl_impl.h64
-rw-r--r--src/tests/core/arm/arm_test_common.cpp11
-rw-r--r--src/tests/core/arm/arm_test_common.h8
-rw-r--r--src/tests/core/core_timing.cpp220
-rw-r--r--src/video_core/CMakeLists.txt41
-rw-r--r--src/video_core/dma_pusher.cpp54
-rw-r--r--src/video_core/dma_pusher.h5
-rw-r--r--src/video_core/engines/fermi_2d.cpp67
-rw-r--r--src/video_core/engines/fermi_2d.h31
-rw-r--r--src/video_core/engines/kepler_compute.cpp33
-rw-r--r--src/video_core/engines/kepler_compute.h (renamed from src/video_core/engines/maxwell_compute.h)34
-rw-r--r--src/video_core/engines/kepler_memory.cpp21
-rw-r--r--src/video_core/engines/kepler_memory.h10
-rw-r--r--src/video_core/engines/maxwell_3d.cpp165
-rw-r--r--src/video_core/engines/maxwell_3d.h26
-rw-r--r--src/video_core/engines/maxwell_compute.cpp28
-rw-r--r--src/video_core/engines/maxwell_dma.cpp32
-rw-r--r--src/video_core/engines/maxwell_dma.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h34
-rw-r--r--src/video_core/engines/shader_header.h41
-rw-r--r--src/video_core/gpu.cpp36
-rw-r--r--src/video_core/gpu.h76
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp98
-rw-r--r--src/video_core/gpu_thread.h185
-rw-r--r--src/video_core/memory_manager.cpp58
-rw-r--r--src/video_core/memory_manager.h17
-rw-r--r--src/video_core/morton.cpp324
-rw-r--r--src/video_core/morton.h6
-rw-r--r--src/video_core/rasterizer_cache.h88
-rw-r--r--src/video_core/rasterizer_interface.h17
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h31
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp281
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp768
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h163
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp496
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h98
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp221
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h56
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp654
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h245
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h5
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp178
-rw-r--r--src/video_core/renderer_opengl/gl_state.h12
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp117
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h13
-rw-r--r--src/video_core/renderer_vulkan/declarations.h45
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp123
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h104
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp238
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h116
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp252
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp285
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h180
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h56
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h69
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/shader/decode.cpp73
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp8
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp8
-rw-r--r--src/video_core/shader/decode/bfe.cpp2
-rw-r--r--src/video_core/shader/decode/bfi.cpp2
-rw-r--r--src/video_core/shader/decode/conversion.cpp8
-rw-r--r--src/video_core/shader/decode/ffma.cpp2
-rw-r--r--src/video_core/shader/decode/float_set.cpp2
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/half_set.cpp2
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/hfma2.cpp2
-rw-r--r--src/video_core/shader/decode/integer_set.cpp2
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp540
-rw-r--r--src/video_core/shader/decode/other.cpp17
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp2
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp534
-rw-r--r--src/video_core/shader/decode/video.cpp2
-rw-r--r--src/video_core/shader/decode/xmad.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp14
-rw-r--r--src/video_core/shader/shader_ir.h125
-rw-r--r--src/video_core/shader/track.cpp17
-rw-r--r--src/video_core/surface.cpp4
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp38
-rw-r--r--src/video_core/textures/decoders.h31
-rw-r--r--src/video_core/textures/texture.h86
-rw-r--r--src/video_core/video_core.cpp5
-rw-r--r--src/video_core/video_core.h7
-rw-r--r--src/web_service/verify_login.h2
-rw-r--r--src/web_service/web_backend.cpp1
-rw-r--r--src/yuzu/applets/web_browser.cpp2
-rw-r--r--src/yuzu/bootmanager.cpp22
-rw-r--r--src/yuzu/bootmanager.h8
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/config.cpp443
-rw-r--r--src/yuzu/configuration/config.h5
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp26
-rw-r--r--src/yuzu/configuration/configure_graphics.h2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui14
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp7
-rw-r--r--src/yuzu/debugger/wait_tree.cpp8
-rw-r--r--src/yuzu/debugger/wait_tree.h3
-rw-r--r--src/yuzu/loading_screen.cpp4
-rw-r--r--src/yuzu/loading_screen.ui5
-rw-r--r--src/yuzu/main.cpp39
-rw-r--r--src/yuzu_cmd/config.cpp15
-rw-r--r--src/yuzu_cmd/default_ini.h8
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp13
-rw-r--r--src/yuzu_cmd/yuzu.cpp9
313 files changed, 11051 insertions, 5139 deletions
diff --git a/.gitmodules b/.gitmodules
index a33a04167..2558a5ebc 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,6 @@
37[submodule "discord-rpc"] 37[submodule "discord-rpc"]
38 path = externals/discord-rpc 38 path = externals/discord-rpc
39 url = https://github.com/discordapp/discord-rpc.git 39 url = https://github.com/discordapp/discord-rpc.git
40[submodule "Vulkan-Headers"]
41 path = externals/Vulkan-Headers
42 url = https://github.com/KhronosGroup/Vulkan-Headers.git
diff --git a/.travis.yml b/.travis.yml
index b0fbe3c5f..9512f7843 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
24 - os: osx 24 - os: osx
25 env: NAME="macos build" 25 env: NAME="macos build"
26 sudo: false 26 sudo: false
27 osx_image: xcode10 27 osx_image: xcode10.1
28 install: "./.travis/macos/deps.sh" 28 install: "./.travis/macos/deps.sh"
29 script: "./.travis/macos/build.sh" 29 script: "./.travis/macos/build.sh"
30 after_success: "./.travis/macos/upload.sh" 30 after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index 4a14837fc..b7b4c6f8c 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@
2 2
3set -o pipefail 3set -o pipefail
4 4
5export MACOSX_DEPLOYMENT_TARGET=10.13 5export MACOSX_DEPLOYMENT_TARGET=10.14
6export Qt5_DIR=$(brew --prefix)/opt/qt5 6export Qt5_DIR=$(brew --prefix)/opt/qt5
7export UNICORNDIR=$(pwd)/externals/unicorn 7export UNICORNDIR=$(pwd)/externals/unicorn
8export PATH="/usr/local/opt/ccache/libexec:$PATH" 8export PATH="/usr/local/opt/ccache/libexec:$PATH"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 871e0ca1a..a4914f37d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF
23 23
24option(ENABLE_CUBEB "Enables the cubeb audio backend" ON) 24option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
25 25
26option(ENABLE_VULKAN "Enables Vulkan backend" ON)
27
26option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF) 28option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
27 29
28if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit) 30if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
@@ -161,12 +163,6 @@ else()
161 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) 163 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
162endif() 164endif()
163 165
164# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
165# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
166if (CMAKE_COMPILER_IS_GNUCC)
167 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
168endif()
169
170# Set file offset size to 64 bits. 166# Set file offset size to 64 bits.
171# 167#
172# On modern Unixes, this is typically already the case. The lone exception is 168# On modern Unixes, this is typically already the case. The lone exception is
@@ -183,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
183# System imported libraries 179# System imported libraries
184# ====================== 180# ======================
185 181
186find_package(Boost 1.63.0 QUIET) 182find_package(Boost 1.66.0 QUIET)
187if (NOT Boost_FOUND) 183if (NOT Boost_FOUND)
188 message(STATUS "Boost 1.63.0 or newer not found, falling back to externals") 184 message(STATUS "Boost 1.66.0 or newer not found, falling back to externals")
189 185
190 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost") 186 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
191 set(Boost_NO_SYSTEM_PATHS OFF) 187 set(Boost_NO_SYSTEM_PATHS OFF)
@@ -419,19 +415,6 @@ function(create_target_directory_groups target_name)
419 endforeach() 415 endforeach()
420endfunction() 416endfunction()
421 417
422# Gets a UTC timstamp and sets the provided variable to it
423function(get_timestamp _var)
424 string(TIMESTAMP timestamp UTC)
425 set(${_var} "${timestamp}" PARENT_SCOPE)
426endfunction()
427
428# generate git/build information
429include(GetGitRevisionDescription)
430get_git_head_revision(GIT_REF_SPEC GIT_REV)
431git_describe(GIT_DESC --always --long --dirty)
432git_branch_name(GIT_BRANCH)
433get_timestamp(BUILD_DATE)
434
435enable_testing() 418enable_testing()
436add_subdirectory(externals) 419add_subdirectory(externals)
437add_subdirectory(src) 420add_subdirectory(src)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
new file mode 100644
index 000000000..08315a1f1
--- /dev/null
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -0,0 +1,95 @@
1# Gets a UTC timstamp and sets the provided variable to it
2function(get_timestamp _var)
3 string(TIMESTAMP timestamp UTC)
4 set(${_var} "${timestamp}" PARENT_SCOPE)
5endfunction()
6
7list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
8# generate git/build information
9include(GetGitRevisionDescription)
10get_git_head_revision(GIT_REF_SPEC GIT_REV)
11git_describe(GIT_DESC --always --long --dirty)
12git_branch_name(GIT_BRANCH)
13get_timestamp(BUILD_DATE)
14
15# Generate cpp with Git revision from template
16# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
17set(REPO_NAME "")
18set(BUILD_VERSION "0")
19if (BUILD_REPOSITORY)
20 # regex capture the string nightly or canary into CMAKE_MATCH_1
21 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
22 if (${CMAKE_MATCH_COUNT} GREATER 0)
23 # capitalize the first letter of each word in the repo name.
24 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
25 foreach(WORD ${REPO_NAME_LIST})
26 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
27 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
28 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
29 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
30 endforeach()
31 if (BUILD_TAG)
32 string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
33 if (${CMAKE_MATCH_COUNT} GREATER 0)
34 set(BUILD_VERSION ${CMAKE_MATCH_1})
35 endif()
36 if (BUILD_VERSION)
37 # This leaves a trailing space on the last word, but we actually want that
38 # because of how it's styled in the title bar.
39 set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
40 else()
41 set(BUILD_FULLNAME "")
42 endif()
43 endif()
44 endif()
45endif()
46
47# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
48set(VIDEO_CORE "${SRC_DIR}/src/video_core")
49set(HASH_FILES
50 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
51 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
52 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
53 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
54 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
57 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
58 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
59 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
60 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
61 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
62 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
63 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
64 "${VIDEO_CORE}/shader/decode/bfe.cpp"
65 "${VIDEO_CORE}/shader/decode/bfi.cpp"
66 "${VIDEO_CORE}/shader/decode/conversion.cpp"
67 "${VIDEO_CORE}/shader/decode/ffma.cpp"
68 "${VIDEO_CORE}/shader/decode/float_set.cpp"
69 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
70 "${VIDEO_CORE}/shader/decode/half_set.cpp"
71 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
72 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp"
76 "${VIDEO_CORE}/shader/decode/texture.cpp"
77 "${VIDEO_CORE}/shader/decode/other.cpp"
78 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
79 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
80 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
81 "${VIDEO_CORE}/shader/decode/shift.cpp"
82 "${VIDEO_CORE}/shader/decode/video.cpp"
83 "${VIDEO_CORE}/shader/decode/xmad.cpp"
84 "${VIDEO_CORE}/shader/decode.cpp"
85 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp"
88)
89set(COMBINED "")
90foreach (F IN LISTS HASH_FILES)
91 file(READ ${F} TMP)
92 set(COMBINED "${COMBINED}${TMP}")
93endforeach()
94string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
95configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
diff --git a/README.md b/README.md
index 1d5ee58cc..fa4233b2a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
7 7
8It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes. 8It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
9 9
10yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics. 10yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
11 11
12yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included. 12yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.
13 13
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
new file mode 160000
Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202
diff --git a/externals/cubeb b/externals/cubeb
Subproject 12b78c0edfa40007e41dbdcd9dfe367fbb98d01 Subproject 6f2420de8f155b10330cf973900ac7bdbfee589
diff --git a/externals/opus b/externals/opus
Subproject b2871922a12abb49579512d604cabc471a59ad9 Subproject 562f8ba555c4181e1b57e82e496e4a959b9c019
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 50d2a1ed3..8619a3f03 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
26 return {}; 26 return {};
27} 27}
28 28
29StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, 29StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
30 u32 num_channels, std::string&& name,
30 Stream::ReleaseCallback&& release_callback) { 31 Stream::ReleaseCallback&& release_callback) {
31 if (!sink) { 32 if (!sink) {
32 sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id); 33 sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
33 } 34 }
34 35
35 return std::make_shared<Stream>( 36 return std::make_shared<Stream>(
36 sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback), 37 core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
37 sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name)); 38 sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
38} 39}
39 40
diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h
index df9607ac7..b07588287 100644
--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
13#include "audio_core/stream.h" 13#include "audio_core/stream.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15 15
16namespace Core::Timing {
17class CoreTiming;
18}
19
16namespace AudioCore { 20namespace AudioCore {
17 21
18/** 22/**
@@ -21,8 +25,8 @@ namespace AudioCore {
21class AudioOut { 25class AudioOut {
22public: 26public:
23 /// Opens a new audio stream 27 /// Opens a new audio stream
24 StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, 28 StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
25 Stream::ReleaseCallback&& release_callback); 29 std::string&& name, Stream::ReleaseCallback&& release_callback);
26 30
27 /// Returns a vector of recently released buffers specified by tag for the specified stream 31 /// Returns a vector of recently released buffers specified by tag for the specified stream
28 std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count); 32 std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 00c026511..9a0939883 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/codec.h" 8#include "audio_core/codec.h"
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "core/core.h"
11#include "core/hle/kernel/writable_event.h" 12#include "core/hle/kernel/writable_event.h"
12#include "core/memory.h" 13#include "core/memory.h"
13 14
@@ -71,14 +72,14 @@ private:
71 EffectOutStatus out_status{}; 72 EffectOutStatus out_status{};
72 EffectInStatus info{}; 73 EffectInStatus info{};
73}; 74};
74AudioRenderer::AudioRenderer(AudioRendererParameter params, 75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
75 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) 76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
76 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 77 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
77 effects(params.effect_count) { 78 effects(params.effect_count) {
78 79
79 audio_out = std::make_unique<AudioCore::AudioOut>(); 80 audio_out = std::make_unique<AudioCore::AudioOut>();
80 stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer", 81 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
81 [=]() { buffer_event->Signal(); }); 82 "AudioRenderer", [=]() { buffer_event->Signal(); });
82 audio_out->StartStream(stream); 83 audio_out->StartStream(stream);
83 84
84 QueueMixedBuffer(0); 85 QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 7826881bf..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
14#include "common/swap.h" 14#include "common/swap.h"
15#include "core/hle/kernel/object.h" 15#include "core/hle/kernel/object.h"
16 16
17namespace Core::Timing {
18class CoreTiming;
19}
20
17namespace Kernel { 21namespace Kernel {
18class WritableEvent; 22class WritableEvent;
19} 23}
@@ -42,16 +46,18 @@ struct AudioRendererParameter {
42 u32_le sample_rate; 46 u32_le sample_rate;
43 u32_le sample_count; 47 u32_le sample_count;
44 u32_le mix_buffer_count; 48 u32_le mix_buffer_count;
45 u32_le unknown_c; 49 u32_le submix_count;
46 u32_le voice_count; 50 u32_le voice_count;
47 u32_le sink_count; 51 u32_le sink_count;
48 u32_le effect_count; 52 u32_le effect_count;
49 u32_le unknown_1c; 53 u32_le performance_frame_count;
50 u8 unknown_20; 54 u8 is_voice_drop_enabled;
51 INSERT_PADDING_BYTES(3); 55 u8 unknown_21;
56 u8 unknown_22;
57 u8 execution_mode;
52 u32_le splitter_count; 58 u32_le splitter_count;
53 u32_le unknown_2c; 59 u32_le num_splitter_send_channels;
54 INSERT_PADDING_WORDS(1); 60 u32_le unknown_30;
55 u32_le revision; 61 u32_le revision;
56}; 62};
57static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); 63static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
@@ -208,7 +214,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
208 214
209class AudioRenderer { 215class AudioRenderer {
210public: 216public:
211 AudioRenderer(AudioRendererParameter params, 217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
212 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); 218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
213 ~AudioRenderer(); 219 ~AudioRenderer();
214 220
diff --git a/src/audio_core/buffer.h b/src/audio_core/buffer.h
index a323b23ec..5ee09e9aa 100644
--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {} 21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
22 22
23 /// Returns the raw audio data for the buffer 23 /// Returns the raw audio data for the buffer
24 std::vector<s16>& Samples() { 24 std::vector<s16>& GetSamples() {
25 return samples; 25 return samples;
26 } 26 }
27 27
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index 454de798b..c5a0d98ce 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
68 } 68 }
69 } 69 }
70 70
71 state.yn1 = yn1; 71 state.yn1 = static_cast<s16>(yn1);
72 state.yn2 = yn2; 72 state.yn2 = static_cast<s16>(yn2);
73 73
74 return ret; 74 return ret;
75} 75}
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 097328901..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _WIN32
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
46 } 50 }
47 } 51 }
48 52
49 ~CubebSinkStream() { 53 ~CubebSinkStream() override {
50 if (!ctx) { 54 if (!ctx) {
51 return; 55 return;
52 } 56 }
@@ -75,11 +79,11 @@ public:
75 queue.Push(samples); 79 queue.Push(samples);
76 } 80 }
77 81
78 std::size_t SamplesInQueue(u32 num_channels) const override { 82 std::size_t SamplesInQueue(u32 channel_count) const override {
79 if (!ctx) 83 if (!ctx)
80 return 0; 84 return 0;
81 85
82 return queue.Size() / num_channels; 86 return queue.Size() / channel_count;
83 } 87 }
84 88
85 void Flush() override { 89 void Flush() override {
@@ -98,7 +102,7 @@ private:
98 u32 num_channels{}; 102 u32 num_channels{};
99 103
100 Common::RingBuffer<s16, 0x10000> queue; 104 Common::RingBuffer<s16, 0x10000> queue;
101 std::array<s16, 2> last_frame; 105 std::array<s16, 2> last_frame{};
102 std::atomic<bool> should_flush{}; 106 std::atomic<bool> should_flush{};
103 TimeStretcher time_stretch; 107 TimeStretcher time_stretch;
104 108
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _WIN32
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ce2d374e..4b66a6786 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,12 +32,12 @@ u32 Stream::GetNumChannels() const {
32 return {}; 32 return {};
33} 33}
34 34
35Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, 35Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
36 SinkStream& sink_stream, std::string&& name_) 36 ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
37 : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)}, 37 : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
38 sink_stream{sink_stream}, name{std::move(name_)} { 38 sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
39 39
40 release_event = CoreTiming::RegisterEvent( 40 release_event = core_timing.RegisterEvent(
41 name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); }); 41 name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
42} 42}
43 43
@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {
57 57
58s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { 58s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
59 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; 59 const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
60 return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate); 60 return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
61} 61}
62 62
63static void VolumeAdjustSamples(std::vector<s16>& samples) { 63static void VolumeAdjustSamples(std::vector<s16>& samples) {
@@ -95,11 +95,11 @@ void Stream::PlayNextBuffer() {
95 active_buffer = queued_buffers.front(); 95 active_buffer = queued_buffers.front();
96 queued_buffers.pop(); 96 queued_buffers.pop();
97 97
98 VolumeAdjustSamples(active_buffer->Samples()); 98 VolumeAdjustSamples(active_buffer->GetSamples());
99 99
100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
101 101
102 CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {}); 102 core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
103} 103}
104 104
105void Stream::ReleaseActiveBuffer() { 105void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index aebfeb51d..05071243b 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -13,9 +13,10 @@
13#include "audio_core/buffer.h" 13#include "audio_core/buffer.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15 15
16namespace CoreTiming { 16namespace Core::Timing {
17class CoreTiming;
17struct EventType; 18struct EventType;
18} 19} // namespace Core::Timing
19 20
20namespace AudioCore { 21namespace AudioCore {
21 22
@@ -42,8 +43,8 @@ public:
42 /// Callback function type, used to change guest state on a buffer being released 43 /// Callback function type, used to change guest state on a buffer being released
43 using ReleaseCallback = std::function<void()>; 44 using ReleaseCallback = std::function<void()>;
44 45
45 Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, 46 Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
46 SinkStream& sink_stream, std::string&& name_); 47 ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);
47 48
48 /// Plays the audio stream 49 /// Plays the audio stream
49 void Play(); 50 void Play();
@@ -91,16 +92,17 @@ private:
91 /// Gets the number of core cycles when the specified buffer will be released 92 /// Gets the number of core cycles when the specified buffer will be released
92 s64 GetBufferReleaseCycles(const Buffer& buffer) const; 93 s64 GetBufferReleaseCycles(const Buffer& buffer) const;
93 94
94 u32 sample_rate; ///< Sample rate of the stream 95 u32 sample_rate; ///< Sample rate of the stream
95 Format format; ///< Format of the stream 96 Format format; ///< Format of the stream
96 ReleaseCallback release_callback; ///< Buffer release callback for the stream 97 ReleaseCallback release_callback; ///< Buffer release callback for the stream
97 State state{State::Stopped}; ///< Playback state of the stream 98 State state{State::Stopped}; ///< Playback state of the stream
98 CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream 99 Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
99 BufferPtr active_buffer; ///< Actively playing buffer in the stream 100 BufferPtr active_buffer; ///< Actively playing buffer in the stream
100 std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream 101 std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream
101 std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream 102 std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
102 SinkStream& sink_stream; ///< Output sink for the stream 103 SinkStream& sink_stream; ///< Output sink for the stream
103 std::string name; ///< Name of the stream, must be unique 104 Core::Timing::CoreTiming& core_timing; ///< Core timing instance.
105 std::string name; ///< Name of the stream, must be unique
104}; 106};
105 107
106using StreamPtr = std::shared_ptr<Stream>; 108using StreamPtr = std::shared_ptr<Stream>;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 845626fc5..43ae8a9e7 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,42 +1,70 @@
1# Generate cpp with Git revision from template 1# Add a custom command to generate a new shader_cache_version hash when any of the following files change
2# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well 2# NOTE: This is an approximation of what files affect shader generation, its possible something else
3set(REPO_NAME "") 3# could affect the result, but much more unlikely than the following files. Keeping a list of files
4set(BUILD_VERSION "0") 4# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
5if ($ENV{CI}) 5set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
6 if ($ENV{TRAVIS}) 6if (DEFINED ENV{CI})
7 if (DEFINED ENV{TRAVIS})
7 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG}) 8 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
8 set(BUILD_TAG $ENV{TRAVIS_TAG}) 9 set(BUILD_TAG $ENV{TRAVIS_TAG})
9 elseif($ENV{APPVEYOR}) 10 elseif(DEFINED ENV{APPVEYOR})
10 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME}) 11 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
11 set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME}) 12 set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME})
12 endif() 13 endif()
13 # regex capture the string nightly or canary into CMAKE_MATCH_1
14 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
15 if (${CMAKE_MATCH_COUNT} GREATER 0)
16 # capitalize the first letter of each word in the repo name.
17 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
18 foreach(WORD ${REPO_NAME_LIST})
19 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
20 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
21 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
22 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
23 endforeach()
24 if (BUILD_TAG)
25 string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
26 if (${CMAKE_MATCH_COUNT} GREATER 0)
27 set(BUILD_VERSION ${CMAKE_MATCH_1})
28 endif()
29 if (BUILD_VERSION)
30 # This leaves a trailing space on the last word, but we actually want that
31 # because of how it's styled in the title bar.
32 set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
33 else()
34 set(BUILD_FULLNAME "")
35 endif()
36 endif()
37 endif()
38endif() 14endif()
39configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) 15add_custom_command(OUTPUT scm_rev.cpp
16 COMMAND ${CMAKE_COMMAND}
17 -DSRC_DIR="${CMAKE_SOURCE_DIR}"
18 -DBUILD_REPOSITORY="${BUILD_REPOSITORY}"
19 -DBUILD_TAG="${BUILD_TAG}"
20 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
21 DEPENDS
22 # WARNING! It was too much work to try and make a common location for this list,
23 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
24 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
25 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
26 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
27 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
28 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
29 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
30 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
31 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
32 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
33 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
34 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
35 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
36 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
37 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
38 "${VIDEO_CORE}/shader/decode/bfe.cpp"
39 "${VIDEO_CORE}/shader/decode/bfi.cpp"
40 "${VIDEO_CORE}/shader/decode/conversion.cpp"
41 "${VIDEO_CORE}/shader/decode/ffma.cpp"
42 "${VIDEO_CORE}/shader/decode/float_set.cpp"
43 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
44 "${VIDEO_CORE}/shader/decode/half_set.cpp"
45 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
46 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
51 "${VIDEO_CORE}/shader/decode/other.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
54 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
55 "${VIDEO_CORE}/shader/decode/shift.cpp"
56 "${VIDEO_CORE}/shader/decode/video.cpp"
57 "${VIDEO_CORE}/shader/decode/xmad.cpp"
58 "${VIDEO_CORE}/shader/decode.cpp"
59 "${VIDEO_CORE}/shader/shader_ir.cpp"
60 "${VIDEO_CORE}/shader/shader_ir.h"
61 "${VIDEO_CORE}/shader/track.cpp"
62 # and also check that the scm_rev files haven't changed
63 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
64 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
65 # technically we should regenerate if the git version changed, but its not worth the effort imo
66 "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
67)
40 68
41add_library(common STATIC 69add_library(common STATIC
42 alignment.h 70 alignment.h
@@ -64,10 +92,14 @@ add_library(common STATIC
64 logging/text_formatter.cpp 92 logging/text_formatter.cpp
65 logging/text_formatter.h 93 logging/text_formatter.h
66 math_util.h 94 math_util.h
95 memory_hook.cpp
96 memory_hook.h
67 microprofile.cpp 97 microprofile.cpp
68 microprofile.h 98 microprofile.h
69 microprofileui.h 99 microprofileui.h
70 misc.cpp 100 misc.cpp
101 page_table.cpp
102 page_table.h
71 param_package.cpp 103 param_package.cpp
72 param_package.h 104 param_package.h
73 quaternion.h 105 quaternion.h
@@ -86,6 +118,8 @@ add_library(common STATIC
86 threadsafe_queue.h 118 threadsafe_queue.h
87 timer.cpp 119 timer.cpp
88 timer.h 120 timer.h
121 uint128.cpp
122 uint128.h
89 vector_math.h 123 vector_math.h
90 web_result.h 124 web_result.h
91) 125)
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 639efe22d..8e35c463f 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -162,9 +162,13 @@ public:
162 BitField(T val) = delete; 162 BitField(T val) = delete;
163 BitField& operator=(T val) = delete; 163 BitField& operator=(T val) = delete;
164 164
165 // Force default constructor to be created 165 constexpr BitField() noexcept = default;
166 // so that we can use this within unions 166
167 constexpr BitField() = default; 167 constexpr BitField(const BitField&) noexcept = default;
168 constexpr BitField& operator=(const BitField&) noexcept = default;
169
170 constexpr BitField(BitField&&) noexcept = default;
171 constexpr BitField& operator=(BitField&&) noexcept = default;
168 172
169 constexpr FORCE_INLINE operator T() const { 173 constexpr FORCE_INLINE operator T() const {
170 return Value(); 174 return Value();
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
55/** 55/**
56 * Decode a color stored in RGBA8 format 56 * Decode a color stored in RGBA8 format
57 * @param bytes Pointer to encoded source color 57 * @param bytes Pointer to encoded source color
58 * @return Result color decoded as Math::Vec4<u8> 58 * @return Result color decoded as Common::Vec4<u8>
59 */ 59 */
60inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { 60inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
61 return {bytes[3], bytes[2], bytes[1], bytes[0]}; 61 return {bytes[3], bytes[2], bytes[1], bytes[0]};
62} 62}
63 63
64/** 64/**
65 * Decode a color stored in RGB8 format 65 * Decode a color stored in RGB8 format
66 * @param bytes Pointer to encoded source color 66 * @param bytes Pointer to encoded source color
67 * @return Result color decoded as Math::Vec4<u8> 67 * @return Result color decoded as Common::Vec4<u8>
68 */ 68 */
69inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { 69inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
70 return {bytes[2], bytes[1], bytes[0], 255}; 70 return {bytes[2], bytes[1], bytes[0], 255};
71} 71}
72 72
73/** 73/**
74 * Decode a color stored in RG8 (aka HILO8) format 74 * Decode a color stored in RG8 (aka HILO8) format
75 * @param bytes Pointer to encoded source color 75 * @param bytes Pointer to encoded source color
76 * @return Result color decoded as Math::Vec4<u8> 76 * @return Result color decoded as Common::Vec4<u8>
77 */ 77 */
78inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { 78inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
79 return {bytes[1], bytes[0], 0, 255}; 79 return {bytes[1], bytes[0], 0, 255};
80} 80}
81 81
82/** 82/**
83 * Decode a color stored in RGB565 format 83 * Decode a color stored in RGB565 format
84 * @param bytes Pointer to encoded source color 84 * @param bytes Pointer to encoded source color
85 * @return Result color decoded as Math::Vec4<u8> 85 * @return Result color decoded as Common::Vec4<u8>
86 */ 86 */
87inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { 87inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
88 u16_le pixel; 88 u16_le pixel;
89 std::memcpy(&pixel, bytes, sizeof(pixel)); 89 std::memcpy(&pixel, bytes, sizeof(pixel));
90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), 90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
94/** 94/**
95 * Decode a color stored in RGB5A1 format 95 * Decode a color stored in RGB5A1 format
96 * @param bytes Pointer to encoded source color 96 * @param bytes Pointer to encoded source color
97 * @return Result color decoded as Math::Vec4<u8> 97 * @return Result color decoded as Common::Vec4<u8>
98 */ 98 */
99inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { 99inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
100 u16_le pixel; 100 u16_le pixel;
101 std::memcpy(&pixel, bytes, sizeof(pixel)); 101 std::memcpy(&pixel, bytes, sizeof(pixel));
102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), 102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
106/** 106/**
107 * Decode a color stored in RGBA4 format 107 * Decode a color stored in RGBA4 format
108 * @param bytes Pointer to encoded source color 108 * @param bytes Pointer to encoded source color
109 * @return Result color decoded as Math::Vec4<u8> 109 * @return Result color decoded as Common::Vec4<u8>
110 */ 110 */
111inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { 111inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
112 u16_le pixel; 112 u16_le pixel;
113 std::memcpy(&pixel, bytes, sizeof(pixel)); 113 std::memcpy(&pixel, bytes, sizeof(pixel));
114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), 114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
138/** 138/**
139 * Decode a depth value and a stencil value stored in D24S8 format 139 * Decode a depth value and a stencil value stored in D24S8 format
140 * @param bytes Pointer to encoded source values 140 * @param bytes Pointer to encoded source values
141 * @return Resulting values stored as a Math::Vec2 141 * @return Resulting values stored as a Common::Vec2
142 */ 142 */
143inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { 143inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; 144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
145} 145}
146 146
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
149 * @param color Source color to encode 149 * @param color Source color to encode
150 * @param bytes Destination pointer to store encoded color 150 * @param bytes Destination pointer to store encoded color
151 */ 151 */
152inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { 152inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
153 bytes[3] = color.r(); 153 bytes[3] = color.r();
154 bytes[2] = color.g(); 154 bytes[2] = color.g();
155 bytes[1] = color.b(); 155 bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
161 * @param color Source color to encode 161 * @param color Source color to encode
162 * @param bytes Destination pointer to store encoded color 162 * @param bytes Destination pointer to store encoded color
163 */ 163 */
164inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { 164inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
165 bytes[2] = color.r(); 165 bytes[2] = color.r();
166 bytes[1] = color.g(); 166 bytes[1] = color.g();
167 bytes[0] = color.b(); 167 bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
172 * @param color Source color to encode 172 * @param color Source color to encode
173 * @param bytes Destination pointer to store encoded color 173 * @param bytes Destination pointer to store encoded color
174 */ 174 */
175inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { 175inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
176 bytes[1] = color.r(); 176 bytes[1] = color.r();
177 bytes[0] = color.g(); 177 bytes[0] = color.g();
178} 178}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
181 * @param color Source color to encode 181 * @param color Source color to encode
182 * @param bytes Destination pointer to store encoded color 182 * @param bytes Destination pointer to store encoded color
183 */ 183 */
184inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { 184inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
185 const u16_le data = 185 const u16_le data =
186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); 186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
187 187
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
193 * @param color Source color to encode 193 * @param color Source color to encode
194 * @param bytes Destination pointer to store encoded color 194 * @param bytes Destination pointer to store encoded color
195 */ 195 */
196inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { 196inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | 197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); 198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
199 199
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
205 * @param color Source color to encode 205 * @param color Source color to encode
206 * @param bytes Destination pointer to store encoded color 206 * @param bytes Destination pointer to store encoded color
207 */ 207 */
208inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { 208inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | 209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
211 211
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index 4f88de768..076752d3b 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -35,6 +35,7 @@
35#define KEYS_DIR "keys" 35#define KEYS_DIR "keys"
36#define LOAD_DIR "load" 36#define LOAD_DIR "load"
37#define DUMP_DIR "dump" 37#define DUMP_DIR "dump"
38#define SHADER_DIR "shader"
38#define LOG_DIR "log" 39#define LOG_DIR "log"
39 40
40// Filenames 41// Filenames
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index b52492da6..aecb66c32 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -710,6 +710,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
710 paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP); 710 paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
711 paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP); 711 paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
712 paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP); 712 paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
713 paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
713 paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP); 714 paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
714 paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP); 715 paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
715 // TODO: Put the logs in a better location for each OS 716 // TODO: Put the logs in a better location for each OS
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 571503d2a..38cc7f059 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -31,6 +31,7 @@ enum class UserPath {
31 SDMCDir, 31 SDMCDir,
32 LoadDir, 32 LoadDir,
33 DumpDir, 33 DumpDir,
34 ShaderDir,
34 SysDataDir, 35 SysDataDir,
35 UserDir, 36 UserDir,
36}; 37};
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 12f6d0114..4462ff3fb 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,10 +39,10 @@ public:
39 Impl(Impl const&) = delete; 39 Impl(Impl const&) = delete;
40 const Impl& operator=(Impl const&) = delete; 40 const Impl& operator=(Impl const&) = delete;
41 41
42 void PushEntry(Entry e) { 42 void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
43 std::lock_guard<std::mutex> lock(message_mutex); 43 const char* function, std::string message) {
44 message_queue.Push(std::move(e)); 44 message_queue.Push(
45 message_cv.notify_one(); 45 CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
46 } 46 }
47 47
48 void AddBackend(std::unique_ptr<Backend> backend) { 48 void AddBackend(std::unique_ptr<Backend> backend) {
@@ -86,15 +86,13 @@ private:
86 } 86 }
87 }; 87 };
88 while (true) { 88 while (true) {
89 { 89 entry = message_queue.PopWait();
90 std::unique_lock<std::mutex> lock(message_mutex); 90 if (entry.final_entry) {
91 message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
92 }
93 if (!running) {
94 break; 91 break;
95 } 92 }
96 write_logs(entry); 93 write_logs(entry);
97 } 94 }
95
98 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case 96 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
99 // where a system is repeatedly spamming logs even on close. 97 // where a system is repeatedly spamming logs even on close.
100 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100; 98 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
106 } 104 }
107 105
108 ~Impl() { 106 ~Impl() {
109 running = false; 107 Entry entry;
110 message_cv.notify_one(); 108 entry.final_entry = true;
109 message_queue.Push(entry);
111 backend_thread.join(); 110 backend_thread.join();
112 } 111 }
113 112
114 std::atomic_bool running{true}; 113 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
115 std::mutex message_mutex, writing_mutex; 114 const char* function, std::string message) const {
116 std::condition_variable message_cv; 115 using std::chrono::duration_cast;
116 using std::chrono::steady_clock;
117
118 Entry entry;
119 entry.timestamp =
120 duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
121 entry.log_class = log_class;
122 entry.log_level = log_level;
123 entry.filename = Common::TrimSourcePath(filename);
124 entry.line_num = line_nr;
125 entry.function = function;
126 entry.message = std::move(message);
127
128 return entry;
129 }
130
131 std::mutex writing_mutex;
117 std::thread backend_thread; 132 std::thread backend_thread;
118 std::vector<std::unique_ptr<Backend>> backends; 133 std::vector<std::unique_ptr<Backend>> backends;
119 Common::MPSCQueue<Log::Entry> message_queue; 134 Common::MPSCQueue<Log::Entry> message_queue;
120 Filter filter; 135 Filter filter;
136 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
121}; 137};
122 138
123void ConsoleBackend::Write(const Entry& entry) { 139void ConsoleBackend::Write(const Entry& entry) {
@@ -232,6 +248,7 @@ void DebuggerBackend::Write(const Entry& entry) {
232 CLS(Render) \ 248 CLS(Render) \
233 SUB(Render, Software) \ 249 SUB(Render, Software) \
234 SUB(Render, OpenGL) \ 250 SUB(Render, OpenGL) \
251 SUB(Render, Vulkan) \
235 CLS(Audio) \ 252 CLS(Audio) \
236 SUB(Audio, DSP) \ 253 SUB(Audio, DSP) \
237 SUB(Audio, Sink) \ 254 SUB(Audio, Sink) \
@@ -275,25 +292,6 @@ const char* GetLevelName(Level log_level) {
275#undef LVL 292#undef LVL
276} 293}
277 294
278Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
279 const char* function, std::string message) {
280 using std::chrono::duration_cast;
281 using std::chrono::steady_clock;
282
283 static steady_clock::time_point time_origin = steady_clock::now();
284
285 Entry entry;
286 entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
287 entry.log_class = log_class;
288 entry.log_level = log_level;
289 entry.filename = Common::TrimSourcePath(filename);
290 entry.line_num = line_nr;
291 entry.function = function;
292 entry.message = std::move(message);
293
294 return entry;
295}
296
297void SetGlobalFilter(const Filter& filter) { 295void SetGlobalFilter(const Filter& filter) {
298 Impl::Instance().SetGlobalFilter(filter); 296 Impl::Instance().SetGlobalFilter(filter);
299} 297}
@@ -318,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
318 if (!filter.CheckMessage(log_class, log_level)) 316 if (!filter.CheckMessage(log_class, log_level))
319 return; 317 return;
320 318
321 Entry entry = 319 instance.PushEntry(log_class, log_level, filename, line_num, function,
322 CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); 320 fmt::vformat(format, args));
323
324 instance.PushEntry(std::move(entry));
325} 321}
326} // namespace Log 322} // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index 91bb0c309..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
27 unsigned int line_num; 27 unsigned int line_num;
28 std::string function; 28 std::string function;
29 std::string message; 29 std::string message;
30 bool final_entry = false;
30 31
31 Entry() = default; 32 Entry() = default;
32 Entry(Entry&& o) = default; 33 Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
134 */ 135 */
135const char* GetLevelName(Level log_level); 136const char* GetLevelName(Level log_level);
136 137
137/// Creates a log entry by formatting the given source location, and message.
138Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
139 const char* function, std::string message);
140
141/** 138/**
142 * The global filter will prevent any messages from even being processed if they are filtered. Each 139 * The global filter will prevent any messages from even being processed if they are filtered. Each
143 * backend can have a filter, but if the level is lower than the global filter, the backend will 140 * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index d4ec31ec3..8ed6d5050 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
112 Render, ///< Emulator video output and hardware acceleration 112 Render, ///< Emulator video output and hardware acceleration
113 Render_Software, ///< Software renderer backend 113 Render_Software, ///< Software renderer backend
114 Render_OpenGL, ///< OpenGL backend 114 Render_OpenGL, ///< OpenGL backend
115 Render_Vulkan, ///< Vulkan backend
115 Audio, ///< Audio emulation 116 Audio, ///< Audio emulation
116 Audio_DSP, ///< The HLE implementation of the DSP 117 Audio_DSP, ///< The HLE implementation of the DSP
117 Audio_Sink, ///< Emulator audio output backend 118 Audio_Sink, ///< Emulator audio output backend
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
7#include <cstdlib> 7#include <cstdlib>
8#include <type_traits> 8#include <type_traits>
9 9
10namespace MathUtil { 10namespace Common {
11 11
12constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
13 13
@@ -41,4 +41,4 @@ struct Rectangle {
41 } 41 }
42}; 42};
43 43
44} // namespace MathUtil 44} // namespace Common
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/memory_hook.h" 5#include "common/memory_hook.h"
6 6
7namespace Memory { 7namespace Common {
8 8
9MemoryHook::~MemoryHook() = default; 9MemoryHook::~MemoryHook() = default;
10 10
11} // namespace Memory 11} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11 11
12namespace Memory { 12namespace Common {
13 13
14/** 14/**
15 * Memory hooks have two purposes: 15 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
44}; 44};
45 45
46using MemoryHookPointer = std::shared_ptr<MemoryHook>; 46using MemoryHookPointer = std::shared_ptr<MemoryHook>;
47} // namespace Memory 47} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..8eba1c3f1
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/page_table.h"
6
7namespace Common {
8
9PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
10
11PageTable::~PageTable() = default;
12
13void PageTable::Resize(std::size_t address_space_width_in_bits) {
14 const std::size_t num_page_table_entries = 1ULL
15 << (address_space_width_in_bits - page_size_in_bits);
16
17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries);
19
20 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
21 // vector size is subsequently decreased (via resize), the vector might not automatically
22 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
23 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
24
25 pointers.shrink_to_fit();
26 attributes.shrink_to_fit();
27}
28
29} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8339f2890
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,80 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include <boost/icl/interval_map.hpp>
9#include "common/common_types.h"
10#include "common/memory_hook.h"
11
12namespace Common {
13
14enum class PageType : u8 {
15 /// Page is unmapped and should cause an access error.
16 Unmapped,
17 /// Page is mapped to regular memory. This is the only type you can get pointers to.
18 Memory,
19 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
20 /// invalidation
21 RasterizerCachedMemory,
22 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
23 Special,
24};
25
26struct SpecialRegion {
27 enum class Type {
28 DebugHook,
29 IODevice,
30 } type;
31
32 MemoryHookPointer handler;
33
34 bool operator<(const SpecialRegion& other) const {
35 return std::tie(type, handler) < std::tie(other.type, other.handler);
36 }
37
38 bool operator==(const SpecialRegion& other) const {
39 return std::tie(type, handler) == std::tie(other.type, other.handler);
40 }
41};
42
43/**
44 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
45 * mimics the way a real CPU page table works.
46 */
47struct PageTable {
48 explicit PageTable(std::size_t page_size_in_bits);
49 ~PageTable();
50
51 /**
52 * Resizes the page table to be able to accomodate enough pages within
53 * a given address space.
54 *
55 * @param address_space_width_in_bits The address size width in bits.
56 */
57 void Resize(std::size_t address_space_width_in_bits);
58
59 /**
60 * Vector of memory pointers backing each page. An entry can only be non-null if the
61 * corresponding entry in the `attributes` vector is of type `Memory`.
62 */
63 std::vector<u8*> pointers;
64
65 /**
66 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
67 * of type `Special`.
68 */
69 boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
70
71 /**
72 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
73 * the corresponding entry in `pointers` MUST be set to null.
74 */
75 std::vector<PageType> attributes;
76
77 const std::size_t page_size_in_bits{};
78};
79
80} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
6 6
7#include "common/vector_math.h" 7#include "common/vector_math.h"
8 8
9namespace Math { 9namespace Common {
10 10
11template <typename T> 11template <typename T>
12class Quaternion { 12class Quaternion {
13public: 13public:
14 Math::Vec3<T> xyz; 14 Vec3<T> xyz;
15 T w{}; 15 T w{};
16 16
17 Quaternion<decltype(-T{})> Inverse() const { 17 Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
38}; 38};
39 39
40template <typename T> 40template <typename T>
41auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { 41auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); 42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
43} 43}
44 44
45inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { 45inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
46 return {axis * std::sin(angle / 2), std::cos(angle / 2)}; 46 return {axis * std::sin(angle / 2), std::cos(angle / 2)};
47} 47}
48 48
49} // namespace Math 49} // namespace Common
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 2b1727769..d69038f65 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -11,6 +11,7 @@
11#define BUILD_DATE "@BUILD_DATE@" 11#define BUILD_DATE "@BUILD_DATE@"
12#define BUILD_FULLNAME "@BUILD_FULLNAME@" 12#define BUILD_FULLNAME "@BUILD_FULLNAME@"
13#define BUILD_VERSION "@BUILD_VERSION@" 13#define BUILD_VERSION "@BUILD_VERSION@"
14#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
14 15
15namespace Common { 16namespace Common {
16 17
@@ -21,6 +22,7 @@ const char g_build_name[] = BUILD_NAME;
21const char g_build_date[] = BUILD_DATE; 22const char g_build_date[] = BUILD_DATE;
22const char g_build_fullname[] = BUILD_FULLNAME; 23const char g_build_fullname[] = BUILD_FULLNAME;
23const char g_build_version[] = BUILD_VERSION; 24const char g_build_version[] = BUILD_VERSION;
25const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
24 26
25} // namespace 27} // namespace
26 28
diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h
index af9a9daed..666bf0367 100644
--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -13,5 +13,6 @@ extern const char g_build_name[];
13extern const char g_build_date[]; 13extern const char g_build_date[];
14extern const char g_build_fullname[]; 14extern const char g_build_fullname[];
15extern const char g_build_version[]; 15extern const char g_build_version[];
16extern const char g_shader_cache_version[];
16 17
17} // namespace Common 18} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index 4b82865fe..b3eab1324 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -30,8 +30,8 @@
30#include <cstring> 30#include <cstring>
31#include "common/common_types.h" 31#include "common/common_types.h"
32 32
33// GCC 4.6+ 33// GCC
34#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 34#ifdef __GNUC__
35 35
36#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) 36#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
37#define COMMON_LITTLE_ENDIAN 1 37#define COMMON_LITTLE_ENDIAN 1
@@ -40,7 +40,7 @@
40#endif 40#endif
41 41
42// LLVM/clang 42// LLVM/clang
43#elif __clang__ 43#elif defined(__clang__)
44 44
45#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) 45#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
46#define COMMON_LITTLE_ENDIAN 1 46#define COMMON_LITTLE_ENDIAN 1
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index e7594db68..791f99a8c 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,7 +6,6 @@
6 6
7#include <array> 7#include <array>
8#include <deque> 8#include <deque>
9#include <boost/range/algorithm_ext/erase.hpp>
10 9
11namespace Common { 10namespace Common {
12 11
@@ -111,8 +110,9 @@ struct ThreadQueueList {
111 } 110 }
112 111
113 void remove(Priority priority, const T& thread_id) { 112 void remove(Priority priority, const T& thread_id) {
114 Queue* cur = &queues[priority]; 113 Queue* const cur = &queues[priority];
115 boost::remove_erase(cur->data, thread_id); 114 const auto iter = std::remove(cur->data.begin(), cur->data.end(), thread_id);
115 cur->data.erase(iter, cur->data.end());
116 } 116 }
117 117
118 void rotate(Priority priority) { 118 void rotate(Priority priority) {
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index edf13bc49..821e8536a 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,17 @@
7// a simple lockless thread-safe, 7// a simple lockless thread-safe,
8// single reader, single writer queue 8// single reader, single writer queue
9 9
10#include <algorithm>
11#include <atomic> 10#include <atomic>
11#include <condition_variable>
12#include <cstddef> 12#include <cstddef>
13#include <mutex> 13#include <mutex>
14#include "common/common_types.h" 14#include <utility>
15 15
16namespace Common { 16namespace Common {
17template <typename T, bool NeedSize = true> 17template <typename T>
18class SPSCQueue { 18class SPSCQueue {
19public: 19public:
20 SPSCQueue() : size(0) { 20 SPSCQueue() {
21 write_ptr = read_ptr = new ElementPtr(); 21 write_ptr = read_ptr = new ElementPtr();
22 } 22 }
23 ~SPSCQueue() { 23 ~SPSCQueue() {
@@ -25,13 +25,12 @@ public:
25 delete read_ptr; 25 delete read_ptr;
26 } 26 }
27 27
28 u32 Size() const { 28 std::size_t Size() const {
29 static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
30 return size.load(); 29 return size.load();
31 } 30 }
32 31
33 bool Empty() const { 32 bool Empty() const {
34 return !read_ptr->next.load(); 33 return Size() == 0;
35 } 34 }
36 35
37 T& Front() const { 36 T& Front() const {
@@ -47,13 +46,14 @@ public:
47 ElementPtr* new_ptr = new ElementPtr(); 46 ElementPtr* new_ptr = new ElementPtr();
48 write_ptr->next.store(new_ptr, std::memory_order_release); 47 write_ptr->next.store(new_ptr, std::memory_order_release);
49 write_ptr = new_ptr; 48 write_ptr = new_ptr;
50 if (NeedSize) 49 cv.notify_one();
51 size++; 50
51 ++size;
52 } 52 }
53 53
54 void Pop() { 54 void Pop() {
55 if (NeedSize) 55 --size;
56 size--; 56
57 ElementPtr* tmpptr = read_ptr; 57 ElementPtr* tmpptr = read_ptr;
58 // advance the read pointer 58 // advance the read pointer
59 read_ptr = tmpptr->next.load(); 59 read_ptr = tmpptr->next.load();
@@ -66,8 +66,7 @@ public:
66 if (Empty()) 66 if (Empty())
67 return false; 67 return false;
68 68
69 if (NeedSize) 69 --size;
70 size--;
71 70
72 ElementPtr* tmpptr = read_ptr; 71 ElementPtr* tmpptr = read_ptr;
73 read_ptr = tmpptr->next.load(std::memory_order_acquire); 72 read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -77,6 +76,16 @@ public:
77 return true; 76 return true;
78 } 77 }
79 78
79 T PopWait() {
80 if (Empty()) {
81 std::unique_lock<std::mutex> lock(cv_mutex);
82 cv.wait(lock, [this]() { return !Empty(); });
83 }
84 T t;
85 Pop(t);
86 return t;
87 }
88
80 // not thread-safe 89 // not thread-safe
81 void Clear() { 90 void Clear() {
82 size.store(0); 91 size.store(0);
@@ -89,7 +98,7 @@ private:
89 // and a pointer to the next ElementPtr 98 // and a pointer to the next ElementPtr
90 class ElementPtr { 99 class ElementPtr {
91 public: 100 public:
92 ElementPtr() : next(nullptr) {} 101 ElementPtr() {}
93 ~ElementPtr() { 102 ~ElementPtr() {
94 ElementPtr* next_ptr = next.load(); 103 ElementPtr* next_ptr = next.load();
95 104
@@ -98,21 +107,23 @@ private:
98 } 107 }
99 108
100 T current; 109 T current;
101 std::atomic<ElementPtr*> next; 110 std::atomic<ElementPtr*> next{nullptr};
102 }; 111 };
103 112
104 ElementPtr* write_ptr; 113 ElementPtr* write_ptr;
105 ElementPtr* read_ptr; 114 ElementPtr* read_ptr;
106 std::atomic<u32> size; 115 std::atomic_size_t size{0};
116 std::mutex cv_mutex;
117 std::condition_variable cv;
107}; 118};
108 119
109// a simple thread-safe, 120// a simple thread-safe,
110// single reader, multiple writer queue 121// single reader, multiple writer queue
111 122
112template <typename T, bool NeedSize = true> 123template <typename T>
113class MPSCQueue { 124class MPSCQueue {
114public: 125public:
115 u32 Size() const { 126 std::size_t Size() const {
116 return spsc_queue.Size(); 127 return spsc_queue.Size();
117 } 128 }
118 129
@@ -138,13 +149,17 @@ public:
138 return spsc_queue.Pop(t); 149 return spsc_queue.Pop(t);
139 } 150 }
140 151
152 T PopWait() {
153 return spsc_queue.PopWait();
154 }
155
141 // not thread-safe 156 // not thread-safe
142 void Clear() { 157 void Clear() {
143 spsc_queue.Clear(); 158 spsc_queue.Clear();
144 } 159 }
145 160
146private: 161private:
147 SPSCQueue<T, NeedSize> spsc_queue; 162 SPSCQueue<T> spsc_queue;
148 std::mutex write_lock; 163 std::mutex write_lock;
149}; 164};
150} // namespace Common 165} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..32bf56730
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,45 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef _MSC_VER
6#include <intrin.h>
7
8#pragma intrinsic(_umul128)
9#endif
10#include <cstring>
11#include "common/uint128.h"
12
13namespace Common {
14
15u128 Multiply64Into128(u64 a, u64 b) {
16 u128 result;
17#ifdef _MSC_VER
18 result[0] = _umul128(a, b, &result[1]);
19#else
20 unsigned __int128 tmp = a;
21 tmp *= b;
22 std::memcpy(&result, &tmp, sizeof(u128));
23#endif
24 return result;
25}
26
27std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
28 u64 remainder = dividend[0] % divisor;
29 u64 accum = dividend[0] / divisor;
30 if (dividend[1] == 0)
31 return {accum, remainder};
32 // We ignore dividend[1] / divisor as that overflows
33 const u64 first_segment = (dividend[1] % divisor) << 32;
34 accum += (first_segment / divisor) << 32;
35 const u64 second_segment = (first_segment % divisor) << 32;
36 accum += (second_segment / divisor);
37 remainder += second_segment % divisor;
38 if (remainder >= divisor) {
39 accum++;
40 remainder -= divisor;
41 }
42 return {accum, remainder};
43}
44
45} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..a3be2a2cb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,19 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9
10namespace Common {
11
12// This function multiplies 2 u64 values and produces a u128 value;
13u128 Multiply64Into128(u64 a, u64 b);
14
15// This function divides a u128 by a u32 value and produces two u64 values:
16// the result of division and the remainder
17std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
18
19} // namespace Common
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
33#include <cmath> 33#include <cmath>
34#include <type_traits> 34#include <type_traits>
35 35
36namespace Math { 36namespace Common {
37 37
38template <typename T> 38template <typename T>
39class Vec2; 39class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
690 return MakeVec(x, yzw[0], yzw[1], yzw[2]); 690 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
691} 691}
692 692
693} // namespace Math 693} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f61bcd40d..16920e2e9 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -107,6 +107,8 @@ add_library(core STATIC
107 hle/kernel/client_port.h 107 hle/kernel/client_port.h
108 hle/kernel/client_session.cpp 108 hle/kernel/client_session.cpp
109 hle/kernel/client_session.h 109 hle/kernel/client_session.h
110 hle/kernel/code_set.cpp
111 hle/kernel/code_set.h
110 hle/kernel/errors.h 112 hle/kernel/errors.h
111 hle/kernel/handle_table.cpp 113 hle/kernel/handle_table.cpp
112 hle/kernel/handle_table.h 114 hle/kernel/handle_table.h
@@ -217,6 +219,7 @@ add_library(core STATIC
217 hle/service/audio/audren_u.h 219 hle/service/audio/audren_u.h
218 hle/service/audio/codecctl.cpp 220 hle/service/audio/codecctl.cpp
219 hle/service/audio/codecctl.h 221 hle/service/audio/codecctl.h
222 hle/service/audio/errors.h
220 hle/service/audio/hwopus.cpp 223 hle/service/audio/hwopus.cpp
221 hle/service/audio/hwopus.h 224 hle/service/audio/hwopus.h
222 hle/service/bcat/bcat.cpp 225 hle/service/bcat/bcat.cpp
@@ -400,6 +403,10 @@ add_library(core STATIC
400 hle/service/time/time.h 403 hle/service/time/time.h
401 hle/service/usb/usb.cpp 404 hle/service/usb/usb.cpp
402 hle/service/usb/usb.h 405 hle/service/usb/usb.h
406 hle/service/vi/display/vi_display.cpp
407 hle/service/vi/display/vi_display.h
408 hle/service/vi/layer/vi_layer.cpp
409 hle/service/vi/layer/vi_layer.h
403 hle/service/vi/vi.cpp 410 hle/service/vi/vi.cpp
404 hle/service/vi/vi.h 411 hle/service/vi/vi.h
405 hle/service/vi/vi_m.cpp 412 hle/service/vi/vi_m.cpp
@@ -414,8 +421,6 @@ add_library(core STATIC
414 loader/deconstructed_rom_directory.h 421 loader/deconstructed_rom_directory.h
415 loader/elf.cpp 422 loader/elf.cpp
416 loader/elf.h 423 loader/elf.h
417 loader/linker.cpp
418 loader/linker.h
419 loader/loader.cpp 424 loader/loader.cpp
420 loader/loader.h 425 loader/loader.h
421 loader/nax.cpp 426 loader/nax.cpp
@@ -432,8 +437,6 @@ add_library(core STATIC
432 loader/xci.h 437 loader/xci.h
433 memory.cpp 438 memory.cpp
434 memory.h 439 memory.h
435 memory_hook.cpp
436 memory_hook.h
437 memory_setup.h 440 memory_setup.h
438 perf_stats.cpp 441 perf_stats.cpp
439 perf_stats.h 442 perf_stats.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index afbda8d8b..4fdc12f11 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_cpu.h" 13#include "core/core_cpu.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/core_timing_util.h"
15#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
16#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/svc.h" 18#include "core/hle/kernel/svc.h"
@@ -112,14 +113,14 @@ public:
112 // Always execute at least one tick. 113 // Always execute at least one tick.
113 amortized_ticks = std::max<u64>(amortized_ticks, 1); 114 amortized_ticks = std::max<u64>(amortized_ticks, 1);
114 115
115 CoreTiming::AddTicks(amortized_ticks); 116 parent.core_timing.AddTicks(amortized_ticks);
116 num_interpreted_instructions = 0; 117 num_interpreted_instructions = 0;
117 } 118 }
118 u64 GetTicksRemaining() override { 119 u64 GetTicksRemaining() override {
119 return std::max(CoreTiming::GetDowncount(), 0); 120 return std::max(parent.core_timing.GetDowncount(), 0);
120 } 121 }
121 u64 GetCNTPCT() override { 122 u64 GetCNTPCT() override {
122 return CoreTiming::GetTicks(); 123 return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
123 } 124 }
124 125
125 ARM_Dynarmic& parent; 126 ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
151 config.tpidr_el0 = &cb->tpidr_el0; 152 config.tpidr_el0 = &cb->tpidr_el0;
152 config.dczid_el0 = 4; 153 config.dczid_el0 = 4;
153 config.ctr_el0 = 0x8444c004; 154 config.ctr_el0 = 0x8444c004;
154 config.cntfrq_el0 = 19200000; // Value from fusee. 155 config.cntfrq_el0 = Timing::CNTFREQ;
155 156
156 // Unpredictable instructions 157 // Unpredictable instructions
157 config.define_unpredictable_behaviour = true; 158 config.define_unpredictable_behaviour = true;
@@ -172,8 +173,10 @@ void ARM_Dynarmic::Step() {
172 cb->InterpreterFallback(jit->GetPC(), 1); 173 cb->InterpreterFallback(jit->GetPC(), 1);
173} 174}
174 175
175ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index) 176ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
176 : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index}, 177 std::size_t core_index)
178 : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
179 core_index{core_index}, core_timing{core_timing},
177 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { 180 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
178 ThreadContext ctx{}; 181 ThreadContext ctx{};
179 inner_unicorn.SaveContext(ctx); 182 inner_unicorn.SaveContext(ctx);
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 512bf8ce9..aada1e862 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,10 +12,14 @@
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
19namespace Core::Timing {
20class CoreTiming;
21}
22
19namespace Core { 23namespace Core {
20 24
21class ARM_Dynarmic_Callbacks; 25class ARM_Dynarmic_Callbacks;
@@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor;
23 27
24class ARM_Dynarmic final : public ARM_Interface { 28class ARM_Dynarmic final : public ARM_Interface {
25public: 29public:
26 ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 30 ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
31 std::size_t core_index);
27 ~ARM_Dynarmic(); 32 ~ARM_Dynarmic();
28 33
29 void MapBackingMemory(VAddr address, std::size_t size, u8* memory, 34 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -62,9 +67,10 @@ private:
62 ARM_Unicorn inner_unicorn; 67 ARM_Unicorn inner_unicorn;
63 68
64 std::size_t core_index; 69 std::size_t core_index;
70 Timing::CoreTiming& core_timing;
65 DynarmicExclusiveMonitor& exclusive_monitor; 71 DynarmicExclusiveMonitor& exclusive_monitor;
66 72
67 Memory::PageTable* current_page_table = nullptr; 73 Common::PageTable* current_page_table = nullptr;
68}; 74};
69 75
70class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 76class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c455c81fb..a542a098b 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
72 return {}; 72 return {};
73} 73}
74 74
75ARM_Unicorn::ARM_Unicorn() { 75ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
76 CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc)); 76 CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
77 77
78 auto fpv = 3 << 20; 78 auto fpv = 3 << 20;
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
177 if (GDBStub::IsServerEnabled()) { 177 if (GDBStub::IsServerEnabled()) {
178 ExecuteInstructions(std::max(4000000, 0)); 178 ExecuteInstructions(std::max(4000000, 0));
179 } else { 179 } else {
180 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); 180 ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
181 } 181 }
182} 182}
183 183
@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
190void ARM_Unicorn::ExecuteInstructions(int num_instructions) { 190void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
191 MICROPROFILE_SCOPE(ARM_Jit_Unicorn); 191 MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
192 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 192 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
193 CoreTiming::AddTicks(num_instructions); 193 core_timing.AddTicks(num_instructions);
194 if (GDBStub::IsServerEnabled()) { 194 if (GDBStub::IsServerEnabled()) {
195 if (last_bkpt_hit) { 195 if (last_bkpt_hit) {
196 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); 196 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 75761950b..dbd6955ea 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,12 +9,17 @@
9#include "core/arm/arm_interface.h" 9#include "core/arm/arm_interface.h"
10#include "core/gdbstub/gdbstub.h" 10#include "core/gdbstub/gdbstub.h"
11 11
12namespace Core::Timing {
13class CoreTiming;
14}
15
12namespace Core { 16namespace Core {
13 17
14class ARM_Unicorn final : public ARM_Interface { 18class ARM_Unicorn final : public ARM_Interface {
15public: 19public:
16 ARM_Unicorn(); 20 explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
17 ~ARM_Unicorn(); 21 ~ARM_Unicorn();
22
18 void MapBackingMemory(VAddr address, std::size_t size, u8* memory, 23 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
19 Kernel::VMAPermission perms) override; 24 Kernel::VMAPermission perms) override;
20 void UnmapMemory(VAddr address, std::size_t size) override; 25 void UnmapMemory(VAddr address, std::size_t size) override;
@@ -43,6 +48,7 @@ public:
43 48
44private: 49private:
45 uc_engine* uc{}; 50 uc_engine* uc{};
51 Timing::CoreTiming& core_timing;
46 GDBStub::BreakpointAddress last_bkpt{}; 52 GDBStub::BreakpointAddress last_bkpt{};
47 bool last_bkpt_hit; 53 bool last_bkpt_hit;
48}; 54};
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 572814e4b..89b3fb418 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
36#include "frontend/applets/software_keyboard.h" 36#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 37#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 38#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 39#include "video_core/gpu_asynch.h"
40#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 41#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 42#include "video_core/video_core.h"
42 43
@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
78 return vfs->OpenFile(path, FileSys::Mode::Read); 79 return vfs->OpenFile(path, FileSys::Mode::Read);
79} 80}
80struct System::Impl { 81struct System::Impl {
82 explicit Impl(System& system) : kernel{system} {}
81 83
82 Cpu& CurrentCpuCore() { 84 Cpu& CurrentCpuCore() {
83 return cpu_core_manager.GetCurrentCore(); 85 return cpu_core_manager.GetCurrentCore();
@@ -94,7 +96,7 @@ struct System::Impl {
94 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) { 96 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
95 LOG_DEBUG(HW_Memory, "initialized OK"); 97 LOG_DEBUG(HW_Memory, "initialized OK");
96 98
97 CoreTiming::Init(); 99 core_timing.Initialize();
98 kernel.Initialize(); 100 kernel.Initialize();
99 101
100 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( 102 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -114,24 +116,30 @@ struct System::Impl {
114 if (web_browser == nullptr) 116 if (web_browser == nullptr)
115 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); 117 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
116 118
117 auto main_process = Kernel::Process::Create(kernel, "main"); 119 auto main_process = Kernel::Process::Create(system, "main");
118 kernel.MakeCurrentProcess(main_process.get()); 120 kernel.MakeCurrentProcess(main_process.get());
119 121
120 telemetry_session = std::make_unique<Core::TelemetrySession>(); 122 telemetry_session = std::make_unique<Core::TelemetrySession>();
121 service_manager = std::make_shared<Service::SM::ServiceManager>(); 123 service_manager = std::make_shared<Service::SM::ServiceManager>();
122 124
123 Service::Init(service_manager, *virtual_filesystem); 125 Service::Init(service_manager, system, *virtual_filesystem);
124 GDBStub::Init(); 126 GDBStub::Init();
125 127
126 renderer = VideoCore::CreateRenderer(emu_window); 128 renderer = VideoCore::CreateRenderer(emu_window, system);
127 if (!renderer->Init()) { 129 if (!renderer->Init()) {
128 return ResultStatus::ErrorVideoCore; 130 return ResultStatus::ErrorVideoCore;
129 } 131 }
130 132
131 gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer()); 133 is_powered_on = true;
134
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
137 } else {
138 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
139 }
132 140
133 cpu_core_manager.Initialize(system); 141 cpu_core_manager.Initialize(system);
134 is_powered_on = true; 142
135 LOG_DEBUG(Core, "Initialized OK"); 143 LOG_DEBUG(Core, "Initialized OK");
136 144
137 // Reset counters and set time origin to current frame 145 // Reset counters and set time origin to current frame
@@ -175,19 +183,20 @@ struct System::Impl {
175 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + 183 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
176 static_cast<u32>(load_result)); 184 static_cast<u32>(load_result));
177 } 185 }
186
178 status = ResultStatus::Success; 187 status = ResultStatus::Success;
179 return status; 188 return status;
180 } 189 }
181 190
182 void Shutdown() { 191 void Shutdown() {
183 // Log last frame performance stats 192 // Log last frame performance stats
184 auto perf_results = GetAndResetPerfStats(); 193 const auto perf_results = GetAndResetPerfStats();
185 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 194 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
186 perf_results.emulation_speed * 100.0); 195 perf_results.emulation_speed * 100.0);
187 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 196 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
188 perf_results.game_fps); 197 perf_results.game_fps);
189 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 198 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
190 perf_results.frametime * 1000.0); 199 perf_results.frametime * 1000.0);
191 200
192 is_powered_on = false; 201 is_powered_on = false;
193 202
@@ -204,7 +213,7 @@ struct System::Impl {
204 213
205 // Shutdown kernel and core timing 214 // Shutdown kernel and core timing
206 kernel.Shutdown(); 215 kernel.Shutdown();
207 CoreTiming::Shutdown(); 216 core_timing.Shutdown();
208 217
209 // Close app loader 218 // Close app loader
210 app_loader.reset(); 219 app_loader.reset();
@@ -231,9 +240,10 @@ struct System::Impl {
231 } 240 }
232 241
233 PerfStatsResults GetAndResetPerfStats() { 242 PerfStatsResults GetAndResetPerfStats() {
234 return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs()); 243 return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
235 } 244 }
236 245
246 Timing::CoreTiming core_timing;
237 Kernel::KernelCore kernel; 247 Kernel::KernelCore kernel;
238 /// RealVfsFilesystem instance 248 /// RealVfsFilesystem instance
239 FileSys::VirtualFilesystem virtual_filesystem; 249 FileSys::VirtualFilesystem virtual_filesystem;
@@ -263,7 +273,7 @@ struct System::Impl {
263 Core::FrameLimiter frame_limiter; 273 Core::FrameLimiter frame_limiter;
264}; 274};
265 275
266System::System() : impl{std::make_unique<Impl>()} {} 276System::System() : impl{std::make_unique<Impl>(*this)} {}
267System::~System() = default; 277System::~System() = default;
268 278
269Cpu& System::CurrentCpuCore() { 279Cpu& System::CurrentCpuCore() {
@@ -395,6 +405,14 @@ const Kernel::KernelCore& System::Kernel() const {
395 return impl->kernel; 405 return impl->kernel;
396} 406}
397 407
408Timing::CoreTiming& System::CoreTiming() {
409 return impl->core_timing;
410}
411
412const Timing::CoreTiming& System::CoreTiming() const {
413 return impl->core_timing;
414}
415
398Core::PerfStats& System::GetPerfStats() { 416Core::PerfStats& System::GetPerfStats() {
399 return impl->perf_stats; 417 return impl->perf_stats;
400} 418}
diff --git a/src/core/core.h b/src/core/core.h
index 511a5ad3a..ba76a41d8 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -47,6 +47,10 @@ namespace VideoCore {
47class RendererBase; 47class RendererBase;
48} // namespace VideoCore 48} // namespace VideoCore
49 49
50namespace Core::Timing {
51class CoreTiming;
52}
53
50namespace Core { 54namespace Core {
51 55
52class ARM_Interface; 56class ARM_Interface;
@@ -205,6 +209,12 @@ public:
205 /// Provides a constant pointer to the current process. 209 /// Provides a constant pointer to the current process.
206 const Kernel::Process* CurrentProcess() const; 210 const Kernel::Process* CurrentProcess() const;
207 211
212 /// Provides a reference to the core timing instance.
213 Timing::CoreTiming& CoreTiming();
214
215 /// Provides a constant reference to the core timing instance.
216 const Timing::CoreTiming& CoreTiming() const;
217
208 /// Provides a reference to the kernel instance. 218 /// Provides a reference to the kernel instance.
209 Kernel::KernelCore& Kernel(); 219 Kernel::KernelCore& Kernel();
210 220
@@ -283,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
283 return System::GetInstance().CurrentArmInterface(); 293 return System::GetInstance().CurrentArmInterface();
284} 294}
285 295
286inline TelemetrySession& Telemetry() {
287 return System::GetInstance().TelemetrySession();
288}
289
290inline Kernel::Process* CurrentProcess() { 296inline Kernel::Process* CurrentProcess() {
291 return System::GetInstance().CurrentProcess(); 297 return System::GetInstance().CurrentProcess();
292} 298}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index fffda8a99..1eefed6d0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -49,20 +50,21 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index) 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 : cpu_barrier{cpu_barrier}, core_index{core_index} { 54 std::size_t core_index)
55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
54 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
55#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
56 arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
57#else 59#else
58 arm_interface = std::make_unique<ARM_Unicorn>(); 60 arm_interface = std::make_unique<ARM_Unicorn>();
59 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 61 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
60#endif 62#endif
61 } else { 63 } else {
62 arm_interface = std::make_unique<ARM_Unicorn>(); 64 arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
63 } 65 }
64 66
65 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
66} 68}
67 69
68Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
@@ -93,14 +95,14 @@ void Cpu::RunLoop(bool tight_loop) {
93 95
94 if (IsMainCore()) { 96 if (IsMainCore()) {
95 // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. 97 // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
96 CoreTiming::Idle(); 98 core_timing.Idle();
97 CoreTiming::Advance(); 99 core_timing.Advance();
98 } 100 }
99 101
100 PrepareReschedule(); 102 PrepareReschedule();
101 } else { 103 } else {
102 if (IsMainCore()) { 104 if (IsMainCore()) {
103 CoreTiming::Advance(); 105 core_timing.Advance();
104 } 106 }
105 107
106 if (tight_loop) { 108 if (tight_loop) {
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 1d2bdc6cd..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -16,6 +16,14 @@ class Scheduler;
16} 16}
17 17
18namespace Core { 18namespace Core {
19class System;
20}
21
22namespace Core::Timing {
23class CoreTiming;
24}
25
26namespace Core {
19 27
20class ARM_Interface; 28class ARM_Interface;
21class ExclusiveMonitor; 29class ExclusiveMonitor;
@@ -41,7 +49,8 @@ private:
41 49
42class Cpu { 50class Cpu {
43public: 51public:
44 Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index); 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 std::size_t core_index);
45 ~Cpu(); 54 ~Cpu();
46 55
47 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
@@ -82,6 +91,7 @@ private:
82 std::unique_ptr<ARM_Interface> arm_interface; 91 std::unique_ptr<ARM_Interface> arm_interface;
83 CpuBarrier& cpu_barrier; 92 CpuBarrier& cpu_barrier;
84 std::unique_ptr<Kernel::Scheduler> scheduler; 93 std::unique_ptr<Kernel::Scheduler> scheduler;
94 Timing::CoreTiming& core_timing;
85 95
86 std::atomic<bool> reschedule_pending = false; 96 std::atomic<bool> reschedule_pending = false;
87 std::size_t core_index; 97 std::size_t core_index;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 7953c8720..a0dd5db24 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,149 +8,98 @@
8#include <mutex> 8#include <mutex>
9#include <string> 9#include <string>
10#include <tuple> 10#include <tuple>
11#include <unordered_map> 11
12#include <vector>
13#include "common/assert.h" 12#include "common/assert.h"
14#include "common/thread.h" 13#include "common/thread.h"
15#include "common/threadsafe_queue.h"
16#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
17 15
18namespace CoreTiming { 16namespace Core::Timing {
19
20static s64 global_timer;
21static int slice_length;
22static int downcount;
23 17
24struct EventType { 18constexpr int MAX_SLICE_LENGTH = 20000;
25 TimedCallback callback;
26 const std::string* name;
27};
28 19
29struct Event { 20struct CoreTiming::Event {
30 s64 time; 21 s64 time;
31 u64 fifo_order; 22 u64 fifo_order;
32 u64 userdata; 23 u64 userdata;
33 const EventType* type; 24 const EventType* type;
34};
35
36// Sort by time, unless the times are the same, in which case sort by the order added to the queue
37static bool operator>(const Event& left, const Event& right) {
38 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
39}
40
41static bool operator<(const Event& left, const Event& right) {
42 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
43}
44
45// unordered_map stores each element separately as a linked list node so pointers to elements
46// remain stable regardless of rehashes/resizing.
47static std::unordered_map<std::string, EventType> event_types;
48 25
49// The queue is a min-heap using std::make_heap/push_heap/pop_heap. 26 // Sort by time, unless the times are the same, in which case sort by
50// We don't use std::priority_queue because we need to be able to serialize, unserialize and 27 // the order added to the queue
51// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated 28 friend bool operator>(const Event& left, const Event& right) {
52// by the standard adaptor class. 29 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
53static std::vector<Event> event_queue; 30 }
54static u64 event_fifo_id;
55// the queue for storing the events from other threads threadsafe until they will be added
56// to the event_queue by the emu thread
57static Common::MPSCQueue<Event, false> ts_queue;
58
59// the queue for unscheduling the events from other threads threadsafe
60static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
61
62constexpr int MAX_SLICE_LENGTH = 20000;
63
64static s64 idled_cycles;
65
66// Are we in a function that has been called from Advance()
67// If events are sheduled from a function that gets called from Advance(),
68// don't change slice_length and downcount.
69static bool is_global_timer_sane;
70
71static EventType* ev_lost = nullptr;
72
73static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
74
75EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
76 // check for existing type with same name.
77 // we want event type names to remain unique so that we can use them for serialization.
78 ASSERT_MSG(event_types.find(name) == event_types.end(),
79 "CoreTiming Event \"{}\" is already registered. Events should only be registered "
80 "during Init to avoid breaking save states.",
81 name.c_str());
82 31
83 auto info = event_types.emplace(name, EventType{callback, nullptr}); 32 friend bool operator<(const Event& left, const Event& right) {
84 EventType* event_type = &info.first->second; 33 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
85 event_type->name = &info.first->first; 34 }
86 return event_type; 35};
87}
88 36
89void UnregisterAllEvents() { 37CoreTiming::CoreTiming() = default;
90 ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending"); 38CoreTiming::~CoreTiming() = default;
91 event_types.clear();
92}
93 39
94void Init() { 40void CoreTiming::Initialize() {
95 downcount = MAX_SLICE_LENGTH; 41 downcount = MAX_SLICE_LENGTH;
96 slice_length = MAX_SLICE_LENGTH; 42 slice_length = MAX_SLICE_LENGTH;
97 global_timer = 0; 43 global_timer = 0;
98 idled_cycles = 0; 44 idled_cycles = 0;
99 45
100 // The time between CoreTiming being intialized and the first call to Advance() is considered 46 // The time between CoreTiming being initialized and the first call to Advance() is considered
101 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before 47 // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
102 // executing the first cycle of each slice to prepare the slice length and downcount for 48 // executing the first cycle of each slice to prepare the slice length and downcount for
103 // that slice. 49 // that slice.
104 is_global_timer_sane = true; 50 is_global_timer_sane = true;
105 51
106 event_fifo_id = 0; 52 event_fifo_id = 0;
107 ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); 53
54 const auto empty_timed_callback = [](u64, s64) {};
55 ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
108} 56}
109 57
110void Shutdown() { 58void CoreTiming::Shutdown() {
111 MoveEvents(); 59 MoveEvents();
112 ClearPendingEvents(); 60 ClearPendingEvents();
113 UnregisterAllEvents(); 61 UnregisterAllEvents();
114} 62}
115 63
116// This should only be called from the CPU thread. If you are calling 64EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
117// it from any other thread, you are doing something evil 65 // check for existing type with same name.
118u64 GetTicks() { 66 // we want event type names to remain unique so that we can use them for serialization.
119 u64 ticks = static_cast<u64>(global_timer); 67 ASSERT_MSG(event_types.find(name) == event_types.end(),
120 if (!is_global_timer_sane) { 68 "CoreTiming Event \"{}\" is already registered. Events should only be registered "
121 ticks += slice_length - downcount; 69 "during Init to avoid breaking save states.",
122 } 70 name.c_str());
123 return ticks;
124}
125
126void AddTicks(u64 ticks) {
127 downcount -= static_cast<int>(ticks);
128}
129 71
130u64 GetIdleTicks() { 72 auto info = event_types.emplace(name, EventType{callback, nullptr});
131 return static_cast<u64>(idled_cycles); 73 EventType* event_type = &info.first->second;
74 event_type->name = &info.first->first;
75 return event_type;
132} 76}
133 77
134void ClearPendingEvents() { 78void CoreTiming::UnregisterAllEvents() {
135 event_queue.clear(); 79 ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
80 event_types.clear();
136} 81}
137 82
138void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 83void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
139 ASSERT(event_type != nullptr); 84 ASSERT(event_type != nullptr);
140 s64 timeout = GetTicks() + cycles_into_future; 85 const s64 timeout = GetTicks() + cycles_into_future;
86
141 // If this event needs to be scheduled before the next advance(), force one early 87 // If this event needs to be scheduled before the next advance(), force one early
142 if (!is_global_timer_sane) 88 if (!is_global_timer_sane) {
143 ForceExceptionCheck(cycles_into_future); 89 ForceExceptionCheck(cycles_into_future);
90 }
91
144 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); 92 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
145 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 93 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
146} 94}
147 95
148void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 96void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
97 u64 userdata) {
149 ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type}); 98 ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
150} 99}
151 100
152void UnscheduleEvent(const EventType* event_type, u64 userdata) { 101void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
153 auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { 102 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
154 return e.type == event_type && e.userdata == userdata; 103 return e.type == event_type && e.userdata == userdata;
155 }); 104 });
156 105
@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
161 } 110 }
162} 111}
163 112
164void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) { 113void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
165 unschedule_queue.Push(std::make_pair(event_type, userdata)); 114 unschedule_queue.Push(std::make_pair(event_type, userdata));
166} 115}
167 116
168void RemoveEvent(const EventType* event_type) { 117u64 CoreTiming::GetTicks() const {
169 auto itr = std::remove_if(event_queue.begin(), event_queue.end(), 118 u64 ticks = static_cast<u64>(global_timer);
170 [&](const Event& e) { return e.type == event_type; }); 119 if (!is_global_timer_sane) {
120 ticks += slice_length - downcount;
121 }
122 return ticks;
123}
124
125u64 CoreTiming::GetIdleTicks() const {
126 return static_cast<u64>(idled_cycles);
127}
128
129void CoreTiming::AddTicks(u64 ticks) {
130 downcount -= static_cast<int>(ticks);
131}
132
133void CoreTiming::ClearPendingEvents() {
134 event_queue.clear();
135}
136
137void CoreTiming::RemoveEvent(const EventType* event_type) {
138 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
139 [&](const Event& e) { return e.type == event_type; });
171 140
172 // Removing random items breaks the invariant so we have to re-establish it. 141 // Removing random items breaks the invariant so we have to re-establish it.
173 if (itr != event_queue.end()) { 142 if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
176 } 145 }
177} 146}
178 147
179void RemoveNormalAndThreadsafeEvent(const EventType* event_type) { 148void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
180 MoveEvents(); 149 MoveEvents();
181 RemoveEvent(event_type); 150 RemoveEvent(event_type);
182} 151}
183 152
184void ForceExceptionCheck(s64 cycles) { 153void CoreTiming::ForceExceptionCheck(s64 cycles) {
185 cycles = std::max<s64>(0, cycles); 154 cycles = std::max<s64>(0, cycles);
186 if (downcount > cycles) { 155 if (downcount <= cycles) {
187 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int 156 return;
188 // here. Account for cycles already executed by adjusting the g.slice_length
189 slice_length -= downcount - static_cast<int>(cycles);
190 downcount = static_cast<int>(cycles);
191 } 157 }
158
159 // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
160 // here. Account for cycles already executed by adjusting the g.slice_length
161 slice_length -= downcount - static_cast<int>(cycles);
162 downcount = static_cast<int>(cycles);
192} 163}
193 164
194void MoveEvents() { 165void CoreTiming::MoveEvents() {
195 for (Event ev; ts_queue.Pop(ev);) { 166 for (Event ev; ts_queue.Pop(ev);) {
196 ev.fifo_order = event_fifo_id++; 167 ev.fifo_order = event_fifo_id++;
197 event_queue.emplace_back(std::move(ev)); 168 event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
199 } 170 }
200} 171}
201 172
202void Advance() { 173void CoreTiming::Advance() {
203 MoveEvents(); 174 MoveEvents();
204 for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) { 175 for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
205 UnscheduleEvent(ev.first, ev.second); 176 UnscheduleEvent(ev.first, ev.second);
206 } 177 }
207 178
208 int cycles_executed = slice_length - downcount; 179 const int cycles_executed = slice_length - downcount;
209 global_timer += cycles_executed; 180 global_timer += cycles_executed;
210 slice_length = MAX_SLICE_LENGTH; 181 slice_length = MAX_SLICE_LENGTH;
211 182
@@ -229,17 +200,17 @@ void Advance() {
229 downcount = slice_length; 200 downcount = slice_length;
230} 201}
231 202
232void Idle() { 203void CoreTiming::Idle() {
233 idled_cycles += downcount; 204 idled_cycles += downcount;
234 downcount = 0; 205 downcount = 0;
235} 206}
236 207
237std::chrono::microseconds GetGlobalTimeUs() { 208std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
238 return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; 209 return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
239} 210}
240 211
241int GetDowncount() { 212int CoreTiming::GetDowncount() const {
242 return downcount; 213 return downcount;
243} 214}
244 215
245} // namespace CoreTiming 216} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 9ed757bd7..59163bae1 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
8#include <functional>
9#include <string>
10#include <unordered_map>
11#include <vector>
12#include "common/common_types.h"
13#include "common/threadsafe_queue.h"
14
15namespace Core::Timing {
16
17/// A callback that may be scheduled for a particular core timing event.
18using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
19
20/// Contains the characteristics of a particular event.
21struct EventType {
22 /// The event's callback function.
23 TimedCallback callback;
24 /// A pointer to the name of the event.
25 const std::string* name;
26};
27
7/** 28/**
8 * This is a system to schedule events into the emulated machine's future. Time is measured 29 * This is a system to schedule events into the emulated machine's future. Time is measured
9 * in main CPU clock cycles. 30 * in main CPU clock cycles.
@@ -16,80 +37,120 @@
16 * inside callback: 37 * inside callback:
17 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever") 38 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
18 */ 39 */
19 40class CoreTiming {
20#include <chrono> 41public:
21#include <functional> 42 CoreTiming();
22#include <string> 43 ~CoreTiming();
23#include "common/common_types.h" 44
24 45 CoreTiming(const CoreTiming&) = delete;
25namespace CoreTiming { 46 CoreTiming(CoreTiming&&) = delete;
26 47
27struct EventType; 48 CoreTiming& operator=(const CoreTiming&) = delete;
28 49 CoreTiming& operator=(CoreTiming&&) = delete;
29using TimedCallback = std::function<void(u64 userdata, int cycles_late)>; 50
30 51 /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
31/** 52 /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
32 * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is 53 void Initialize();
33 * required to end slice -1 and start slice 0 before the first cycle of code is executed. 54
34 */ 55 /// Tears down all timing related functionality.
35void Init(); 56 void Shutdown();
36void Shutdown(); 57
37 58 /// Registers a core timing event with the given name and callback.
38/** 59 ///
39 * This should only be called from the emu thread, if you are calling it any other thread, you are 60 /// @param name The name of the core timing event to register.
40 * doing something evil 61 /// @param callback The callback to execute for the event.
41 */ 62 ///
42u64 GetTicks(); 63 /// @returns An EventType instance representing the registered event.
43u64 GetIdleTicks(); 64 ///
44void AddTicks(u64 ticks); 65 /// @pre The name of the event being registered must be unique among all
45 66 /// registered events.
46/** 67 ///
47 * Returns the event_type identifier. if name is not unique, it will assert. 68 EventType* RegisterEvent(const std::string& name, TimedCallback callback);
48 */ 69
49EventType* RegisterEvent(const std::string& name, TimedCallback callback); 70 /// Unregisters all registered events thus far.
50void UnregisterAllEvents(); 71 void UnregisterAllEvents();
51 72
52/** 73 /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
53 * After the first Advance, the slice lengths and the downcount will be reduced whenever an event 74 /// event is scheduled earlier than the current values.
54 * is scheduled earlier than the current values. 75 ///
55 * Scheduling from a callback will not update the downcount until the Advance() completes. 76 /// Scheduling from a callback will not update the downcount until the Advance() completes.
56 */ 77 void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
57void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0); 78
58 79 /// This is to be called when outside of hle threads, such as the graphics thread, wants to
59/** 80 /// schedule things to be executed on the main thread.
60 * This is to be called when outside of hle threads, such as the graphics thread, wants to 81 ///
61 * schedule things to be executed on the main thread. 82 /// @note This doesn't change slice_length and thus events scheduled by this might be
62 * Not that this doesn't change slice_length and thus events scheduled by this might be called 83 /// called with a delay of up to MAX_SLICE_LENGTH
63 * with a delay of up to MAX_SLICE_LENGTH 84 void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
64 */ 85 u64 userdata = 0);
65void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata); 86
66 87 void UnscheduleEvent(const EventType* event_type, u64 userdata);
67void UnscheduleEvent(const EventType* event_type, u64 userdata); 88 void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
68void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata); 89
69 90 /// We only permit one event of each type in the queue at a time.
70/// We only permit one event of each type in the queue at a time. 91 void RemoveEvent(const EventType* event_type);
71void RemoveEvent(const EventType* event_type); 92 void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
72void RemoveNormalAndThreadsafeEvent(const EventType* event_type); 93
73 94 void ForceExceptionCheck(s64 cycles);
74/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends 95
75 * the previous timing slice and begins the next one, you must Advance from the previous 96 /// This should only be called from the emu thread, if you are calling it any other thread,
76 * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an 97 /// you are doing something evil
77 * Advance() is required to initialize the slice length before the first cycle of emulated 98 u64 GetTicks() const;
78 * instructions is executed. 99
79 */ 100 u64 GetIdleTicks() const;
80void Advance(); 101
81void MoveEvents(); 102 void AddTicks(u64 ticks);
82 103
83/// Pretend that the main CPU has executed enough cycles to reach the next event. 104 /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
84void Idle(); 105 /// the previous timing slice and begins the next one, you must Advance from the previous
85 106 /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
86/// Clear all pending events. This should ONLY be done on exit. 107 /// Advance() is required to initialize the slice length before the first cycle of emulated
87void ClearPendingEvents(); 108 /// instructions is executed.
88 109 void Advance();
89void ForceExceptionCheck(s64 cycles); 110
90 111 /// Pretend that the main CPU has executed enough cycles to reach the next event.
91std::chrono::microseconds GetGlobalTimeUs(); 112 void Idle();
92 113
93int GetDowncount(); 114 std::chrono::microseconds GetGlobalTimeUs() const;
94 115
95} // namespace CoreTiming 116 int GetDowncount() const;
117
118private:
119 struct Event;
120
121 /// Clear all pending events. This should ONLY be done on exit.
122 void ClearPendingEvents();
123 void MoveEvents();
124
125 s64 global_timer = 0;
126 s64 idled_cycles = 0;
127 int slice_length = 0;
128 int downcount = 0;
129
130 // Are we in a function that has been called from Advance()
131 // If events are scheduled from a function that gets called from Advance(),
132 // don't change slice_length and downcount.
133 bool is_global_timer_sane = false;
134
135 // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
136 // We don't use std::priority_queue because we need to be able to serialize, unserialize and
137 // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
138 // accomodated by the standard adaptor class.
139 std::vector<Event> event_queue;
140 u64 event_fifo_id = 0;
141
142 // Stores each element separately as a linked list node so pointers to elements
143 // remain stable regardless of rehashes/resizing.
144 std::unordered_map<std::string, EventType> event_types;
145
146 // The queue for storing the events from other threads threadsafe until they will be added
147 // to the event_queue by the emu thread
148 Common::MPSCQueue<Event> ts_queue;
149
150 // The queue for unscheduling the events from other threads threadsafe
151 Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
152
153 EventType* ev_lost = nullptr;
154};
155
156} // namespace Core::Timing
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 73dea4edb..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,8 +7,9 @@
7#include <cinttypes> 7#include <cinttypes>
8#include <limits> 8#include <limits>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/uint128.h"
10 11
11namespace CoreTiming { 12namespace Core::Timing {
12 13
13constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE; 14constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
14 15
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
60 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; 61 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
61} 62}
62 63
63} // namespace CoreTiming 64u64 CpuCyclesToClockCycles(u64 ticks) {
65 const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
66 return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
67}
68
69} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 5c3718782..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,11 +6,12 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9namespace CoreTiming { 9namespace Core::Timing {
10 10
11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz 11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
12// The exact value used is of course unverified. 12// The exact value used is of course unverified.
13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked 13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
14constexpr u64 CNTFREQ = 19200000; // Value from fusee.
14 15
15inline s64 msToCycles(int ms) { 16inline s64 msToCycles(int ms) {
16 // since ms is int there is no way to overflow 17 // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
61 return cycles * 1000 / BASE_CLOCK_RATE; 62 return cycles * 1000 / BASE_CLOCK_RATE;
62} 63}
63 64
64} // namespace CoreTiming 65u64 CpuCyclesToClockCycles(u64 ticks);
66
67} // namespace Core::Timing
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 769a6fefa..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index); 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 } 31 }
32 32
33 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index ca12fb4ab..dfac9a4b3 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
398} 398}
399 399
400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) { 400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
401 std::ifstream file(filename); 401 std::ifstream file;
402 OpenFStream(file, filename, std::ios_base::in);
402 if (!file.is_open()) 403 if (!file.is_open())
403 return; 404 return;
404 405
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 5d4d05c82..15b9e6624 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -24,13 +24,26 @@ namespace FileSys {
24 24
25union NCASectionHeader; 25union NCASectionHeader;
26 26
27/// Describes the type of content within an NCA archive.
27enum class NCAContentType : u8 { 28enum class NCAContentType : u8 {
29 /// Executable-related data
28 Program = 0, 30 Program = 0,
31
32 /// Metadata.
29 Meta = 1, 33 Meta = 1,
34
35 /// Access control data.
30 Control = 2, 36 Control = 2,
37
38 /// Information related to the game manual
39 /// e.g. Legal information, etc.
31 Manual = 3, 40 Manual = 3,
41
42 /// System data.
32 Data = 4, 43 Data = 4,
33 Data_Unknown5 = 5, ///< Seems to be used on some system archives 44
45 /// Data that can be accessed by applications.
46 PublicData = 5,
34}; 47};
35 48
36enum class NCASectionCryptoType : u8 { 49enum class NCASectionCryptoType : u8 {
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 128199063..1c6bacace 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -94,7 +94,7 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
94 case NCAContentType::Control: 94 case NCAContentType::Control:
95 return ContentRecordType::Control; 95 return ContentRecordType::Control;
96 case NCAContentType::Data: 96 case NCAContentType::Data:
97 case NCAContentType::Data_Unknown5: 97 case NCAContentType::PublicData:
98 return ContentRecordType::Data; 98 return ContentRecordType::Data;
99 case NCAContentType::Manual: 99 case NCAContentType::Manual:
100 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. 100 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 515626658..75fc04302 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
47 if (offset + length > data.size()) 47 if (offset + length > data.size())
48 data.resize(offset + length); 48 data.resize(offset + length);
49 const auto write = std::min(length, data.size() - offset); 49 const auto write = std::min(length, data.size() - offset);
50 std::memcpy(data.data(), data_, write); 50 std::memcpy(data.data() + offset, data_, write);
51 return write; 51 return write;
52} 52}
53 53
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..e29afd630 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); 67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
68} 68}
69 69
70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { 70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
71 new_x = std::max(new_x, framebuffer_layout.screen.left); 71 new_x = std::max(new_x, framebuffer_layout.screen.left);
72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1); 72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
73 73
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 7006a37b3..d0bcb4660 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
166 /** 166 /**
167 * Clip the provided coordinates to be inside the touchscreen area. 167 * Clip the provided coordinates to be inside the touchscreen area.
168 */ 168 */
169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); 169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
170}; 170};
171 171
172} // namespace Core::Frontend 172} // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
12 12
13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio 13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
14template <class T> 14template <class T>
15static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, 15static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
16 float screen_aspect_ratio) { 16 float screen_aspect_ratio) {
17 float scale = std::min(static_cast<float>(window_area.GetWidth()), 17 float scale = std::min(static_cast<float>(window_area.GetWidth()),
18 window_area.GetHeight() / screen_aspect_ratio); 18 window_area.GetHeight() / screen_aspect_ratio);
19 return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), 19 return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
20 static_cast<T>(std::round(scale * screen_aspect_ratio))}; 20 static_cast<T>(std::round(scale * screen_aspect_ratio))};
21} 21}
22 22
23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { 23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
31 ScreenUndocked::Width}; 31 ScreenUndocked::Width};
32 MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; 32 Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
33 MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); 33 Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
34 34
35 float window_aspect_ratio = static_cast<float>(height) / width; 35 float window_aspect_ratio = static_cast<float>(height) / width;
36 36
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
16 unsigned width{ScreenUndocked::Width}; 16 unsigned width{ScreenUndocked::Width};
17 unsigned height{ScreenUndocked::Height}; 17 unsigned height{ScreenUndocked::Height};
18 18
19 MathUtil::Rectangle<unsigned> screen; 19 Common::Rectangle<unsigned> screen;
20 20
21 /** 21 /**
22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked 22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
124 * Orientation is determined by right-hand rule. 124 * Orientation is determined by right-hand rule.
125 * Units: deg/sec 125 * Units: deg/sec
126 */ 126 */
127using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; 127using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
128 128
129/** 129/**
130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are 130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index a1cad4fcb..dafb32aae 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -507,8 +507,11 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {
507 507
508 LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}", 508 LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
509 bp->second.len, bp->second.addr, static_cast<int>(type)); 509 bp->second.len, bp->second.addr, static_cast<int>(type));
510 Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size()); 510
511 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 511 if (type == BreakpointType::Execute) {
512 Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
513 Core::System::GetInstance().InvalidateCpuInstructionCaches();
514 }
512 p.erase(addr); 515 p.erase(addr);
513} 516}
514 517
@@ -1057,9 +1060,12 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
1057 breakpoint.addr = addr; 1060 breakpoint.addr = addr;
1058 breakpoint.len = len; 1061 breakpoint.len = len;
1059 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size()); 1062 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
1063
1060 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4}; 1064 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
1061 Memory::WriteBlock(addr, btrap.data(), btrap.size()); 1065 if (type == BreakpointType::Execute) {
1062 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 1066 Memory::WriteBlock(addr, btrap.data(), btrap.size());
1067 Core::System::GetInstance().InvalidateCpuInstructionCaches();
1068 }
1063 p.insert({addr, breakpoint}); 1069 p.insert({addr, breakpoint});
1064 1070
1065 LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}", 1071 LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index 96c8677d2..fae54bcc7 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 90f276ee8..68406eb63 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
19#include "core/hle/kernel/hle_ipc.h" 19#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/object.h" 20#include "core/hle/kernel/object.h"
21#include "core/hle/kernel/server_session.h" 21#include "core/hle/kernel/server_session.h"
22#include "core/hle/result.h"
22 23
23namespace IPC { 24namespace IPC {
24 25
26constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
27
25class RequestHelperBase { 28class RequestHelperBase {
26protected: 29protected:
27 Kernel::HLERequestContext* context = nullptr; 30 Kernel::HLERequestContext* context = nullptr;
@@ -272,6 +275,20 @@ inline void ResponseBuilder::Push(u64 value) {
272} 275}
273 276
274template <> 277template <>
278inline void ResponseBuilder::Push(float value) {
279 u32 integral;
280 std::memcpy(&integral, &value, sizeof(u32));
281 Push(integral);
282}
283
284template <>
285inline void ResponseBuilder::Push(double value) {
286 u64 integral;
287 std::memcpy(&integral, &value, sizeof(u64));
288 Push(integral);
289}
290
291template <>
275inline void ResponseBuilder::Push(bool value) { 292inline void ResponseBuilder::Push(bool value) {
276 Push(static_cast<u8>(value)); 293 Push(static_cast<u8>(value));
277} 294}
@@ -350,7 +367,7 @@ public:
350 template <class T> 367 template <class T>
351 std::shared_ptr<T> PopIpcInterface() { 368 std::shared_ptr<T> PopIpcInterface() {
352 ASSERT(context->Session()->IsDomain()); 369 ASSERT(context->Session()->IsDomain());
353 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 370 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 371 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
355 } 372 }
356}; 373};
@@ -362,6 +379,11 @@ inline u32 RequestParser::Pop() {
362 return cmdbuf[index++]; 379 return cmdbuf[index++];
363} 380}
364 381
382template <>
383inline s32 RequestParser::Pop() {
384 return static_cast<s32>(Pop<u32>());
385}
386
365template <typename T> 387template <typename T>
366void RequestParser::PopRaw(T& value) { 388void RequestParser::PopRaw(T& value) {
367 std::memcpy(&value, cmdbuf + index, sizeof(T)); 389 std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -393,11 +415,37 @@ inline u64 RequestParser::Pop() {
393} 415}
394 416
395template <> 417template <>
418inline s8 RequestParser::Pop() {
419 return static_cast<s8>(Pop<u8>());
420}
421
422template <>
423inline s16 RequestParser::Pop() {
424 return static_cast<s16>(Pop<u16>());
425}
426
427template <>
396inline s64 RequestParser::Pop() { 428inline s64 RequestParser::Pop() {
397 return static_cast<s64>(Pop<u64>()); 429 return static_cast<s64>(Pop<u64>());
398} 430}
399 431
400template <> 432template <>
433inline float RequestParser::Pop() {
434 const u32 value = Pop<u32>();
435 float real;
436 std::memcpy(&real, &value, sizeof(real));
437 return real;
438}
439
440template <>
441inline double RequestParser::Pop() {
442 const u64 value = Pop<u64>();
443 float real;
444 std::memcpy(&real, &value, sizeof(real));
445 return real;
446}
447
448template <>
401inline bool RequestParser::Pop() { 449inline bool RequestParser::Pop() {
402 return Pop<u8>() != 0; 450 return Pop<u8>() != 0;
403} 451}
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 57157beb4..352190da8 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_cpu.h" 11#include "core/core_cpu.h"
12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -18,58 +19,15 @@
18#include "core/memory.h" 19#include "core/memory.h"
19 20
20namespace Kernel { 21namespace Kernel {
21namespace AddressArbiter { 22namespace {
22
23// Performs actual address waiting logic.
24static ResultCode WaitForAddress(VAddr address, s64 timeout) {
25 SharedPtr<Thread> current_thread = GetCurrentThread();
26 current_thread->SetArbiterWaitAddress(address);
27 current_thread->SetStatus(ThreadStatus::WaitArb);
28 current_thread->InvalidateWakeupCallback();
29
30 current_thread->WakeAfterDelay(timeout);
31
32 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
33 return RESULT_TIMEOUT;
34}
35
36// Gets the threads waiting on an address.
37static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
38 const auto RetrieveWaitingThreads = [](std::size_t core_index,
39 std::vector<SharedPtr<Thread>>& waiting_threads,
40 VAddr arb_addr) {
41 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
42 const auto& thread_list = scheduler.GetThreadList();
43
44 for (const auto& thread : thread_list) {
45 if (thread->GetArbiterWaitAddress() == arb_addr)
46 waiting_threads.push_back(thread);
47 }
48 };
49
50 // Retrieve all threads that are waiting for this address.
51 std::vector<SharedPtr<Thread>> threads;
52 RetrieveWaitingThreads(0, threads, address);
53 RetrieveWaitingThreads(1, threads, address);
54 RetrieveWaitingThreads(2, threads, address);
55 RetrieveWaitingThreads(3, threads, address);
56
57 // Sort them by priority, such that the highest priority ones come first.
58 std::sort(threads.begin(), threads.end(),
59 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
60 return lhs->GetPriority() < rhs->GetPriority();
61 });
62
63 return threads;
64}
65
66// Wake up num_to_wake (or all) threads in a vector. 23// Wake up num_to_wake (or all) threads in a vector.
67static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { 24void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
68 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 25 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
69 // them all. 26 // them all.
70 std::size_t last = waiting_threads.size(); 27 std::size_t last = waiting_threads.size();
71 if (num_to_wake > 0) 28 if (num_to_wake > 0) {
72 last = num_to_wake; 29 last = num_to_wake;
30 }
73 31
74 // Signal the waiting threads. 32 // Signal the waiting threads.
75 for (std::size_t i = 0; i < last; i++) { 33 for (std::size_t i = 0; i < last; i++) {
@@ -79,42 +37,55 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
79 waiting_threads[i]->ResumeFromWait(); 37 waiting_threads[i]->ResumeFromWait();
80 } 38 }
81} 39}
40} // Anonymous namespace
41
42AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
43AddressArbiter::~AddressArbiter() = default;
44
45ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
46 s32 num_to_wake) {
47 switch (type) {
48 case SignalType::Signal:
49 return SignalToAddressOnly(address, num_to_wake);
50 case SignalType::IncrementAndSignalIfEqual:
51 return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
52 case SignalType::ModifyByWaitingCountAndSignalIfEqual:
53 return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
54 default:
55 return ERR_INVALID_ENUM_VALUE;
56 }
57}
82 58
83// Signals an address being waited on. 59ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
84ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { 60 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
85 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
86
87 WakeThreads(waiting_threads, num_to_wake); 61 WakeThreads(waiting_threads, num_to_wake);
88 return RESULT_SUCCESS; 62 return RESULT_SUCCESS;
89} 63}
90 64
91// Signals an address being waited on and increments its value if equal to the value argument. 65ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
92ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { 66 s32 num_to_wake) {
93 // Ensure that we can write to the address. 67 // Ensure that we can write to the address.
94 if (!Memory::IsValidVirtualAddress(address)) { 68 if (!Memory::IsValidVirtualAddress(address)) {
95 return ERR_INVALID_ADDRESS_STATE; 69 return ERR_INVALID_ADDRESS_STATE;
96 } 70 }
97 71
98 if (static_cast<s32>(Memory::Read32(address)) == value) { 72 if (static_cast<s32>(Memory::Read32(address)) != value) {
99 Memory::Write32(address, static_cast<u32>(value + 1));
100 } else {
101 return ERR_INVALID_STATE; 73 return ERR_INVALID_STATE;
102 } 74 }
103 75
104 return SignalToAddress(address, num_to_wake); 76 Memory::Write32(address, static_cast<u32>(value + 1));
77 return SignalToAddressOnly(address, num_to_wake);
105} 78}
106 79
107// Signals an address being waited on and modifies its value based on waiting thread count if equal 80ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
108// to the value argument. 81 s32 num_to_wake) {
109ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
110 s32 num_to_wake) {
111 // Ensure that we can write to the address. 82 // Ensure that we can write to the address.
112 if (!Memory::IsValidVirtualAddress(address)) { 83 if (!Memory::IsValidVirtualAddress(address)) {
113 return ERR_INVALID_ADDRESS_STATE; 84 return ERR_INVALID_ADDRESS_STATE;
114 } 85 }
115 86
116 // Get threads waiting on the address. 87 // Get threads waiting on the address.
117 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); 88 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
118 89
119 // Determine the modified value depending on the waiting count. 90 // Determine the modified value depending on the waiting count.
120 s32 updated_value; 91 s32 updated_value;
@@ -126,41 +97,54 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu
126 updated_value = value; 97 updated_value = value;
127 } 98 }
128 99
129 if (static_cast<s32>(Memory::Read32(address)) == value) { 100 if (static_cast<s32>(Memory::Read32(address)) != value) {
130 Memory::Write32(address, static_cast<u32>(updated_value));
131 } else {
132 return ERR_INVALID_STATE; 101 return ERR_INVALID_STATE;
133 } 102 }
134 103
104 Memory::Write32(address, static_cast<u32>(updated_value));
135 WakeThreads(waiting_threads, num_to_wake); 105 WakeThreads(waiting_threads, num_to_wake);
136 return RESULT_SUCCESS; 106 return RESULT_SUCCESS;
137} 107}
138 108
139// Waits on an address if the value passed is less than the argument value, optionally decrementing. 109ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
140ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { 110 s64 timeout_ns) {
111 switch (type) {
112 case ArbitrationType::WaitIfLessThan:
113 return WaitForAddressIfLessThan(address, value, timeout_ns, false);
114 case ArbitrationType::DecrementAndWaitIfLessThan:
115 return WaitForAddressIfLessThan(address, value, timeout_ns, true);
116 case ArbitrationType::WaitIfEqual:
117 return WaitForAddressIfEqual(address, value, timeout_ns);
118 default:
119 return ERR_INVALID_ENUM_VALUE;
120 }
121}
122
123ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
124 bool should_decrement) {
141 // Ensure that we can read the address. 125 // Ensure that we can read the address.
142 if (!Memory::IsValidVirtualAddress(address)) { 126 if (!Memory::IsValidVirtualAddress(address)) {
143 return ERR_INVALID_ADDRESS_STATE; 127 return ERR_INVALID_ADDRESS_STATE;
144 } 128 }
145 129
146 s32 cur_value = static_cast<s32>(Memory::Read32(address)); 130 const s32 cur_value = static_cast<s32>(Memory::Read32(address));
147 if (cur_value < value) { 131 if (cur_value >= value) {
148 if (should_decrement) {
149 Memory::Write32(address, static_cast<u32>(cur_value - 1));
150 }
151 } else {
152 return ERR_INVALID_STATE; 132 return ERR_INVALID_STATE;
153 } 133 }
134
135 if (should_decrement) {
136 Memory::Write32(address, static_cast<u32>(cur_value - 1));
137 }
138
154 // Short-circuit without rescheduling, if timeout is zero. 139 // Short-circuit without rescheduling, if timeout is zero.
155 if (timeout == 0) { 140 if (timeout == 0) {
156 return RESULT_TIMEOUT; 141 return RESULT_TIMEOUT;
157 } 142 }
158 143
159 return WaitForAddress(address, timeout); 144 return WaitForAddressImpl(address, timeout);
160} 145}
161 146
162// Waits on an address if the value passed is equal to the argument value. 147ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
164 // Ensure that we can read the address. 148 // Ensure that we can read the address.
165 if (!Memory::IsValidVirtualAddress(address)) { 149 if (!Memory::IsValidVirtualAddress(address)) {
166 return ERR_INVALID_ADDRESS_STATE; 150 return ERR_INVALID_ADDRESS_STATE;
@@ -174,7 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
174 return RESULT_TIMEOUT; 158 return RESULT_TIMEOUT;
175 } 159 }
176 160
177 return WaitForAddress(address, timeout); 161 return WaitForAddressImpl(address, timeout);
162}
163
164ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
165 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
166 current_thread->SetArbiterWaitAddress(address);
167 current_thread->SetStatus(ThreadStatus::WaitArb);
168 current_thread->InvalidateWakeupCallback();
169
170 current_thread->WakeAfterDelay(timeout);
171
172 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
173 return RESULT_TIMEOUT;
174}
175
176std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
177 const auto RetrieveWaitingThreads = [this](std::size_t core_index,
178 std::vector<SharedPtr<Thread>>& waiting_threads,
179 VAddr arb_addr) {
180 const auto& scheduler = system.Scheduler(core_index);
181 const auto& thread_list = scheduler.GetThreadList();
182
183 for (const auto& thread : thread_list) {
184 if (thread->GetArbiterWaitAddress() == arb_addr) {
185 waiting_threads.push_back(thread);
186 }
187 }
188 };
189
190 // Retrieve all threads that are waiting for this address.
191 std::vector<SharedPtr<Thread>> threads;
192 RetrieveWaitingThreads(0, threads, address);
193 RetrieveWaitingThreads(1, threads, address);
194 RetrieveWaitingThreads(2, threads, address);
195 RetrieveWaitingThreads(3, threads, address);
196
197 // Sort them by priority, such that the highest priority ones come first.
198 std::sort(threads.begin(), threads.end(),
199 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
200 return lhs->GetPriority() < rhs->GetPriority();
201 });
202
203 return threads;
178} 204}
179} // namespace AddressArbiter
180} // namespace Kernel 205} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index e3657b8e9..ed0d0e69f 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,31 +4,77 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/object.h"
8 11
9union ResultCode; 12union ResultCode;
10 13
14namespace Core {
15class System;
16}
17
11namespace Kernel { 18namespace Kernel {
12 19
13namespace AddressArbiter { 20class Thread;
14enum class ArbitrationType {
15 WaitIfLessThan = 0,
16 DecrementAndWaitIfLessThan = 1,
17 WaitIfEqual = 2,
18};
19 21
20enum class SignalType { 22class AddressArbiter {
21 Signal = 0, 23public:
22 IncrementAndSignalIfEqual = 1, 24 enum class ArbitrationType {
23 ModifyByWaitingCountAndSignalIfEqual = 2, 25 WaitIfLessThan = 0,
24}; 26 DecrementAndWaitIfLessThan = 1,
27 WaitIfEqual = 2,
28 };
29
30 enum class SignalType {
31 Signal = 0,
32 IncrementAndSignalIfEqual = 1,
33 ModifyByWaitingCountAndSignalIfEqual = 2,
34 };
35
36 explicit AddressArbiter(Core::System& system);
37 ~AddressArbiter();
38
39 AddressArbiter(const AddressArbiter&) = delete;
40 AddressArbiter& operator=(const AddressArbiter&) = delete;
41
42 AddressArbiter(AddressArbiter&&) = default;
43 AddressArbiter& operator=(AddressArbiter&&) = delete;
44
45 /// Signals an address being waited on with a particular signaling type.
46 ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
25 47
26ResultCode SignalToAddress(VAddr address, s32 num_to_wake); 48 /// Waits on an address with a particular arbitration type.
27ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); 49 ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
28ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
29 50
30ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); 51private:
31ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 52 /// Signals an address being waited on.
32} // namespace AddressArbiter 53 ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
54
55 /// Signals an address being waited on and increments its value if equal to the value argument.
56 ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
57
58 /// Signals an address being waited on and modifies its value based on waiting thread count if
59 /// equal to the value argument.
60 ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
61 s32 num_to_wake);
62
63 /// Waits on an address if the value passed is less than the argument value,
64 /// optionally decrementing.
65 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
66 bool should_decrement);
67
68 /// Waits on an address if the value passed is equal to the argument value.
69 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
70
71 // Waits on the given address with a timeout in nanoseconds
72 ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
73
74 // Gets the threads waiting on an address.
75 std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
76
77 Core::System& system;
78};
33 79
34} // namespace Kernel 80} // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index d4c91d529..aa432658e 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,10 +33,11 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler. 33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); 34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
35 35
36 if (server_port->hle_handler) 36 if (server_port->HasHLEHandler()) {
37 server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); 37 server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
38 else 38 } else {
39 server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions)); 39 server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
40 }
40 41
41 // Wake the threads waiting on the ServerPort 42 // Wake the threads waiting on the ServerPort
42 server_port->WakeupAllWaitingThreads(); 43 server_port->WakeupAllWaitingThreads();
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..b1f39aad7 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.cpp b/src/core/hle/kernel/code_set.cpp
new file mode 100644
index 000000000..1f434e9af
--- /dev/null
+++ b/src/core/hle/kernel/code_set.cpp
@@ -0,0 +1,12 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/kernel/code_set.h"
6
7namespace Kernel {
8
9CodeSet::CodeSet() = default;
10CodeSet::~CodeSet() = default;
11
12} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
new file mode 100644
index 000000000..834fd23d2
--- /dev/null
+++ b/src/core/hle/kernel/code_set.h
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9#include <vector>
10
11#include "common/common_types.h"
12
13namespace Kernel {
14
15/**
16 * Represents executable data that may be loaded into a kernel process.
17 *
18 * A code set consists of three basic segments:
19 * - A code (AKA text) segment,
20 * - A read-only data segment (rodata)
21 * - A data segment
22 *
23 * The code segment is the portion of the object file that contains
24 * executable instructions.
25 *
26 * The read-only data segment in the portion of the object file that
27 * contains (as one would expect) read-only data, such as fixed constant
28 * values and data structures.
29 *
30 * The data segment is similar to the read-only data segment -- it contains
31 * variables and data structures that have predefined values, however,
32 * entities within this segment can be modified.
33 */
34struct CodeSet final {
35 /// A single segment within a code set.
36 struct Segment final {
37 /// The byte offset that this segment is located at.
38 std::size_t offset = 0;
39
40 /// The address to map this segment to.
41 VAddr addr = 0;
42
43 /// The size of this segment in bytes.
44 u32 size = 0;
45 };
46
47 explicit CodeSet();
48 ~CodeSet();
49
50 CodeSet(const CodeSet&) = delete;
51 CodeSet& operator=(const CodeSet&) = delete;
52
53 CodeSet(CodeSet&&) = default;
54 CodeSet& operator=(CodeSet&&) = default;
55
56 Segment& CodeSegment() {
57 return segments[0];
58 }
59
60 const Segment& CodeSegment() const {
61 return segments[0];
62 }
63
64 Segment& RODataSegment() {
65 return segments[1];
66 }
67
68 const Segment& RODataSegment() const {
69 return segments[1];
70 }
71
72 Segment& DataSegment() {
73 return segments[2];
74 }
75
76 const Segment& DataSegment() const {
77 return segments[2];
78 }
79
80 /// The overall data that backs this code set.
81 std::shared_ptr<std::vector<u8>> memory;
82
83 /// The segments that comprise this code set.
84 std::array<Segment, 3> segments;
85
86 /// The entry point address for this code set.
87 VAddr entrypoint = 0;
88};
89
90} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
17constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; 18constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
18constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; 19constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
19constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; 20constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
16constexpr u16 GetSlot(Handle handle) { 16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15; 17 return static_cast<u16>(handle >> 15);
18} 18}
19 19
20constexpr u16 GetGeneration(Handle handle) { 20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF; 21 return static_cast<u16>(handle & 0x7FFF);
22} 22}
23} // Anonymous namespace 23} // Anonymous namespace
24 24
25HandleTable::HandleTable() { 25HandleTable::HandleTable() {
26 next_generation = 1;
27 Clear(); 26 Clear();
28} 27}
29 28
30HandleTable::~HandleTable() = default; 29HandleTable::~HandleTable() = default;
31 30
31ResultCode HandleTable::SetSize(s32 handle_table_size) {
32 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
33 return ERR_OUT_OF_MEMORY;
34 }
35
36 // Values less than or equal to zero indicate to use the maximum allowable
37 // size for the handle table in the actual kernel, so we ignore the given
38 // value in that case, since we assume this by default unless this function
39 // is called.
40 if (handle_table_size > 0) {
41 table_size = static_cast<u16>(handle_table_size);
42 }
43
44 return RESULT_SUCCESS;
45}
46
32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 47ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
33 DEBUG_ASSERT(obj != nullptr); 48 DEBUG_ASSERT(obj != nullptr);
34 49
35 u16 slot = next_free_slot; 50 const u16 slot = next_free_slot;
36 if (slot >= generations.size()) { 51 if (slot >= table_size) {
37 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 52 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
38 return ERR_HANDLE_TABLE_FULL; 53 return ERR_HANDLE_TABLE_FULL;
39 } 54 }
40 next_free_slot = generations[slot]; 55 next_free_slot = generations[slot];
41 56
42 u16 generation = next_generation++; 57 const u16 generation = next_generation++;
43 58
44 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. 59 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
45 // Horizon OS uses zero to represent an invalid handle, so skip to 1. 60 // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
64} 79}
65 80
66ResultCode HandleTable::Close(Handle handle) { 81ResultCode HandleTable::Close(Handle handle) {
67 if (!IsValid(handle)) 82 if (!IsValid(handle)) {
68 return ERR_INVALID_HANDLE; 83 return ERR_INVALID_HANDLE;
84 }
69 85
70 u16 slot = GetSlot(handle); 86 const u16 slot = GetSlot(handle);
71 87
72 objects[slot] = nullptr; 88 objects[slot] = nullptr;
73 89
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
77} 93}
78 94
79bool HandleTable::IsValid(Handle handle) const { 95bool HandleTable::IsValid(Handle handle) const {
80 std::size_t slot = GetSlot(handle); 96 const std::size_t slot = GetSlot(handle);
81 u16 generation = GetGeneration(handle); 97 const u16 generation = GetGeneration(handle);
82 98
83 return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; 99 return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
84} 100}
85 101
86SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { 102SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
97} 113}
98 114
99void HandleTable::Clear() { 115void HandleTable::Clear() {
100 for (u16 i = 0; i < MAX_COUNT; ++i) { 116 for (u16 i = 0; i < table_size; ++i) {
101 generations[i] = i + 1; 117 generations[i] = i + 1;
102 objects[i] = nullptr; 118 objects[i] = nullptr;
103 } 119 }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
50 ~HandleTable(); 50 ~HandleTable();
51 51
52 /** 52 /**
53 * Sets the number of handles that may be in use at one time
54 * for this handle table.
55 *
56 * @param handle_table_size The desired size to limit the handle table to.
57 *
58 * @returns an error code indicating if initialization was successful.
59 * If initialization was not successful, then ERR_OUT_OF_MEMORY
60 * will be returned.
61 *
62 * @pre handle_table_size must be within the range [0, 1024]
63 */
64 ResultCode SetSize(s32 handle_table_size);
65
66 /**
53 * Allocates a handle for the given object. 67 * Allocates a handle for the given object.
54 * @return The created Handle or one of the following errors: 68 * @return The created Handle or one of the following errors:
55 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. 69 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
104 std::array<u16, MAX_COUNT> generations; 118 std::array<u16, MAX_COUNT> generations;
105 119
106 /** 120 /**
121 * The limited size of the handle table. This can be specified by process
122 * capabilities in order to restrict the overall number of handles that
123 * can be created in a process instance
124 */
125 u16 table_size = static_cast<u16>(MAX_COUNT);
126
127 /**
107 * Global counter of the number of created handles. Stored in `generations` when a handle is 128 * Global counter of the number of created handles. Stored in `generations` when a handle is
108 * created, and wraps around to 1 when it hits 0x8000. 129 * created, and wraps around to 1 when it hits 0x8000.
109 */ 130 */
110 u16 next_generation; 131 u16 next_generation = 1;
111 132
112 /// Head of the free slots linked list. 133 /// Head of the free slots linked list.
113 u16 next_free_slot; 134 u16 next_free_slot = 0;
114}; 135};
115 136
116} // namespace Kernel 137} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7a524ce5a..4d224d01d 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@
12 12
13#include "core/core.h" 13#include "core/core.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/client_port.h" 16#include "core/hle/kernel/client_port.h"
16#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
@@ -86,6 +87,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
86} 87}
87 88
88struct KernelCore::Impl { 89struct KernelCore::Impl {
90 explicit Impl(Core::System& system) : system{system} {}
91
89 void Initialize(KernelCore& kernel) { 92 void Initialize(KernelCore& kernel) {
90 Shutdown(); 93 Shutdown();
91 94
@@ -124,7 +127,7 @@ struct KernelCore::Impl {
124 127
125 void InitializeThreads() { 128 void InitializeThreads() {
126 thread_wakeup_event_type = 129 thread_wakeup_event_type =
127 CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); 130 system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
128 } 131 }
129 132
130 std::atomic<u32> next_object_id{0}; 133 std::atomic<u32> next_object_id{0};
@@ -137,7 +140,7 @@ struct KernelCore::Impl {
137 140
138 SharedPtr<ResourceLimit> system_resource_limit; 141 SharedPtr<ResourceLimit> system_resource_limit;
139 142
140 CoreTiming::EventType* thread_wakeup_event_type = nullptr; 143 Core::Timing::EventType* thread_wakeup_event_type = nullptr;
141 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, 144 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
142 // allowing us to simply use a pool index or similar. 145 // allowing us to simply use a pool index or similar.
143 Kernel::HandleTable thread_wakeup_callback_handle_table; 146 Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -145,9 +148,12 @@ struct KernelCore::Impl {
145 /// Map of named ports managed by the kernel, which can be retrieved using 148 /// Map of named ports managed by the kernel, which can be retrieved using
146 /// the ConnectToPort SVC. 149 /// the ConnectToPort SVC.
147 NamedPortTable named_ports; 150 NamedPortTable named_ports;
151
152 // System context
153 Core::System& system;
148}; 154};
149 155
150KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} 156KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
151KernelCore::~KernelCore() { 157KernelCore::~KernelCore() {
152 Shutdown(); 158 Shutdown();
153} 159}
@@ -213,7 +219,7 @@ u64 KernelCore::CreateNewProcessID() {
213 return impl->next_process_id++; 219 return impl->next_process_id++;
214} 220}
215 221
216CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const { 222Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
217 return impl->thread_wakeup_event_type; 223 return impl->thread_wakeup_event_type;
218} 224}
219 225
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index c643a6401..ff17ff865 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,12 +11,18 @@
11template <typename T> 11template <typename T>
12class ResultVal; 12class ResultVal;
13 13
14namespace CoreTiming { 14namespace Core {
15struct EventType; 15class System;
16} 16}
17 17
18namespace Core::Timing {
19class CoreTiming;
20struct EventType;
21} // namespace Core::Timing
22
18namespace Kernel { 23namespace Kernel {
19 24
25class AddressArbiter;
20class ClientPort; 26class ClientPort;
21class HandleTable; 27class HandleTable;
22class Process; 28class Process;
@@ -29,7 +35,14 @@ private:
29 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; 35 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
30 36
31public: 37public:
32 KernelCore(); 38 /// Constructs an instance of the kernel using the given System
39 /// instance as a context for any necessary system-related state,
40 /// such as threads, CPU core state, etc.
41 ///
42 /// @post After execution of the constructor, the provided System
43 /// object *must* outlive the kernel instance itself.
44 ///
45 explicit KernelCore(Core::System& system);
33 ~KernelCore(); 46 ~KernelCore();
34 47
35 KernelCore(const KernelCore&) = delete; 48 KernelCore(const KernelCore&) = delete;
@@ -89,7 +102,7 @@ private:
89 u64 CreateNewThreadID(); 102 u64 CreateNewThreadID();
90 103
91 /// Retrieves the event type used for thread wakeup callbacks. 104 /// Retrieves the event type used for thread wakeup callbacks.
92 CoreTiming::EventType* ThreadWakeupCallbackEventType() const; 105 Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
93 106
94 /// Provides a reference to the thread wakeup callback handle table. 107 /// Provides a reference to the thread wakeup callback handle table.
95 Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); 108 Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..15a16ae14 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -9,6 +9,7 @@
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/file_sys/program_metadata.h" 11#include "core/file_sys/program_metadata.h"
12#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -31,7 +32,7 @@ namespace {
31 */ 32 */
32void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { 33void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
33 // Setup page table so we can write to memory 34 // Setup page table so we can write to memory
34 SetCurrentPageTable(&owner_process.VMManager().page_table); 35 Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
35 36
36 // Initialize new "main" thread 37 // Initialize new "main" thread
37 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); 38 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
@@ -50,12 +51,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
50} 51}
51} // Anonymous namespace 52} // Anonymous namespace
52 53
53CodeSet::CodeSet() = default; 54SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
54CodeSet::~CodeSet() = default; 55 auto& kernel = system.Kernel();
55
56SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
57 SharedPtr<Process> process(new Process(kernel));
58 56
57 SharedPtr<Process> process(new Process(system));
59 process->name = std::move(name); 58 process->name = std::move(name);
60 process->resource_limit = kernel.GetSystemResourceLimit(); 59 process->resource_limit = kernel.GetSystemResourceLimit();
61 process->status = ProcessStatus::Created; 60 process->status = ProcessStatus::Created;
@@ -99,7 +98,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
99 vm_manager.Reset(metadata.GetAddressSpaceType()); 98 vm_manager.Reset(metadata.GetAddressSpaceType());
100 99
101 const auto& caps = metadata.GetKernelCapabilities(); 100 const auto& caps = metadata.GetKernelCapabilities();
102 return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); 101 const auto capability_init_result =
102 capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
103 if (capability_init_result.IsError()) {
104 return capability_init_result;
105 }
106
107 return handle_table.SetSize(capabilities.GetHandleTableSize());
103} 108}
104 109
105void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { 110void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
@@ -126,7 +131,7 @@ void Process::PrepareForTermination() {
126 if (thread->GetOwnerProcess() != this) 131 if (thread->GetOwnerProcess() != this)
127 continue; 132 continue;
128 133
129 if (thread == GetCurrentThread()) 134 if (thread == system.CurrentScheduler().GetCurrentThread())
130 continue; 135 continue;
131 136
132 // TODO(Subv): When are the other running/ready threads terminated? 137 // TODO(Subv): When are the other running/ready threads terminated?
@@ -138,7 +143,6 @@ void Process::PrepareForTermination() {
138 } 143 }
139 }; 144 };
140 145
141 const auto& system = Core::System::GetInstance();
142 stop_threads(system.Scheduler(0).GetThreadList()); 146 stop_threads(system.Scheduler(0).GetThreadList());
143 stop_threads(system.Scheduler(1).GetThreadList()); 147 stop_threads(system.Scheduler(1).GetThreadList());
144 stop_threads(system.Scheduler(2).GetThreadList()); 148 stop_threads(system.Scheduler(2).GetThreadList());
@@ -206,7 +210,7 @@ void Process::FreeTLSSlot(VAddr tls_address) {
206} 210}
207 211
208void Process::LoadModule(CodeSet module_, VAddr base_addr) { 212void Process::LoadModule(CodeSet module_, VAddr base_addr) {
209 const auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, 213 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
210 MemoryState memory_state) { 214 MemoryState memory_state) {
211 const auto vma = vm_manager 215 const auto vma = vm_manager
212 .MapMemoryBlock(segment.addr + base_addr, module_.memory, 216 .MapMemoryBlock(segment.addr + base_addr, module_.memory,
@@ -221,14 +225,12 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
221 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable); 225 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);
222 226
223 // Clear instruction cache in CPU JIT 227 // Clear instruction cache in CPU JIT
224 Core::System::GetInstance().ArmInterface(0).ClearInstructionCache(); 228 system.InvalidateCpuInstructionCaches();
225 Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
226 Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
227 Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
228} 229}
229 230
230Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} 231Process::Process(Core::System& system)
231Kernel::Process::~Process() {} 232 : WaitObject{system.Kernel()}, address_arbiter{system}, system{system} {}
233Process::~Process() = default;
232 234
233void Process::Acquire(Thread* thread) { 235void Process::Acquire(Thread* thread) {
234 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); 236 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dcc57ae9f..3ae7c922c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -7,17 +7,21 @@
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <cstddef> 9#include <cstddef>
10#include <memory>
11#include <string> 10#include <string>
12#include <vector> 11#include <vector>
13#include <boost/container/static_vector.hpp> 12#include <boost/container/static_vector.hpp>
14#include "common/common_types.h" 13#include "common/common_types.h"
14#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/handle_table.h" 15#include "core/hle/kernel/handle_table.h"
16#include "core/hle/kernel/process_capability.h" 16#include "core/hle/kernel/process_capability.h"
17#include "core/hle/kernel/vm_manager.h" 17#include "core/hle/kernel/vm_manager.h"
18#include "core/hle/kernel/wait_object.h" 18#include "core/hle/kernel/wait_object.h"
19#include "core/hle/result.h" 19#include "core/hle/result.h"
20 20
21namespace Core {
22class System;
23}
24
21namespace FileSys { 25namespace FileSys {
22class ProgramMetadata; 26class ProgramMetadata;
23} 27}
@@ -28,6 +32,8 @@ class KernelCore;
28class ResourceLimit; 32class ResourceLimit;
29class Thread; 33class Thread;
30 34
35struct CodeSet;
36
31struct AddressMapping { 37struct AddressMapping {
32 // Address and size must be page-aligned 38 // Address and size must be page-aligned
33 VAddr address; 39 VAddr address;
@@ -60,46 +66,6 @@ enum class ProcessStatus {
60 DebugBreak, 66 DebugBreak,
61}; 67};
62 68
63struct CodeSet final {
64 struct Segment {
65 std::size_t offset = 0;
66 VAddr addr = 0;
67 u32 size = 0;
68 };
69
70 explicit CodeSet();
71 ~CodeSet();
72
73 Segment& CodeSegment() {
74 return segments[0];
75 }
76
77 const Segment& CodeSegment() const {
78 return segments[0];
79 }
80
81 Segment& RODataSegment() {
82 return segments[1];
83 }
84
85 const Segment& RODataSegment() const {
86 return segments[1];
87 }
88
89 Segment& DataSegment() {
90 return segments[2];
91 }
92
93 const Segment& DataSegment() const {
94 return segments[2];
95 }
96
97 std::shared_ptr<std::vector<u8>> memory;
98
99 std::array<Segment, 3> segments;
100 VAddr entrypoint = 0;
101};
102
103class Process final : public WaitObject { 69class Process final : public WaitObject {
104public: 70public:
105 enum : u64 { 71 enum : u64 {
@@ -116,7 +82,7 @@ public:
116 82
117 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; 83 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
118 84
119 static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name); 85 static SharedPtr<Process> Create(Core::System& system, std::string&& name);
120 86
121 std::string GetTypeName() const override { 87 std::string GetTypeName() const override {
122 return "Process"; 88 return "Process";
@@ -150,6 +116,16 @@ public:
150 return handle_table; 116 return handle_table;
151 } 117 }
152 118
119 /// Gets a reference to the process' address arbiter.
120 AddressArbiter& GetAddressArbiter() {
121 return address_arbiter;
122 }
123
124 /// Gets a const reference to the process' address arbiter.
125 const AddressArbiter& GetAddressArbiter() const {
126 return address_arbiter;
127 }
128
153 /// Gets the current status of the process 129 /// Gets the current status of the process
154 ProcessStatus GetStatus() const { 130 ProcessStatus GetStatus() const {
155 return status; 131 return status;
@@ -251,7 +227,7 @@ public:
251 void FreeTLSSlot(VAddr tls_address); 227 void FreeTLSSlot(VAddr tls_address);
252 228
253private: 229private:
254 explicit Process(KernelCore& kernel); 230 explicit Process(Core::System& system);
255 ~Process() override; 231 ~Process() override;
256 232
257 /// Checks if the specified thread should wait until this process is available. 233 /// Checks if the specified thread should wait until this process is available.
@@ -309,9 +285,16 @@ private:
309 /// Per-process handle table for storing created object handles in. 285 /// Per-process handle table for storing created object handles in.
310 HandleTable handle_table; 286 HandleTable handle_table;
311 287
288 /// Per-process address arbiter.
289 AddressArbiter address_arbiter;
290
312 /// Random values for svcGetInfo RandomEntropy 291 /// Random values for svcGetInfo RandomEntropy
313 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; 292 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
314 293
294 /// System context
295 Core::System& system;
296
297 /// Name of this process
315 std::string name; 298 std::string name;
316}; 299};
317 300
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
96 interrupt_capabilities.set(); 96 interrupt_capabilities.set();
97 97
98 // Allow using the maximum possible amount of handles 98 // Allow using the maximum possible amount of handles
99 handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); 99 handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
100 100
101 // Allow all debugging capabilities. 101 // Allow all debugging capabilities.
102 is_debuggable = true; 102 is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
337 return ERR_RESERVED_VALUE; 337 return ERR_RESERVED_VALUE;
338 } 338 }
339 339
340 handle_table_size = (flags >> 16) & 0x3FF; 340 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
341 return RESULT_SUCCESS; 341 return RESULT_SUCCESS;
342} 342}
343 343
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
156 } 156 }
157 157
158 /// Gets the number of total allowable handles for the process' handle table. 158 /// Gets the number of total allowable handles for the process' handle table.
159 u32 GetHandleTableSize() const { 159 s32 GetHandleTableSize() const {
160 return handle_table_size; 160 return handle_table_size;
161 } 161 }
162 162
@@ -252,7 +252,7 @@ private:
252 u64 core_mask = 0; 252 u64 core_mask = 0;
253 u64 priority_mask = 0; 253 u64 priority_mask = 0;
254 254
255 u32 handle_table_size = 0; 255 s32 handle_table_size = 0;
256 u32 kernel_version = 0; 256 u32 kernel_version = 0;
257 257
258 ProgramType program_type = ProgramType::SysModule; 258 ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index df4d6cf0a..cc189cc64 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {
61 62
62void Scheduler::SwitchContext(Thread* new_thread) { 63void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 64 Thread* const previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 65 Process* const previous_process = system.Kernel().CurrentProcess();
65 66
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 67 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 68
@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
94 95
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 96 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 97 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 98 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); 99 Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
99 } 100 }
100 101
101 cpu_core.LoadContext(new_thread->GetContext()); 102 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 112
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 113void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 114 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = CoreTiming::GetTicks(); 115 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 116 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 117
117 if (thread != nullptr) { 118 if (thread != nullptr) {
@@ -198,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
198 ASSERT(thread->GetPriority() < THREADPRIO_COUNT); 199 ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
199 200
200 // Yield this thread -- sleep for zero time and force reschedule to different thread 201 // Yield this thread -- sleep for zero time and force reschedule to different thread
201 WaitCurrentThread_Sleep(); 202 GetCurrentThread()->Sleep(0);
202 GetCurrentThread()->WakeAfterDelay(0);
203} 203}
204 204
205void Scheduler::YieldWithLoadBalancing(Thread* thread) { 205void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
214 ASSERT(priority < THREADPRIO_COUNT); 214 ASSERT(priority < THREADPRIO_COUNT);
215 215
216 // Sleep for zero time to be able to force reschedule to different thread 216 // Sleep for zero time to be able to force reschedule to different thread
217 WaitCurrentThread_Sleep(); 217 GetCurrentThread()->Sleep(0);
218 GetCurrentThread()->WakeAfterDelay(0);
219 218
220 Thread* suggested_thread = nullptr; 219 Thread* suggested_thread = nullptr;
221 220
@@ -223,8 +222,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 222 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 223 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 224 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 225 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 226
229 // If scheduler provides a suggested thread 227 // If scheduler provides a suggested thread
230 if (res != nullptr) { 228 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..1c5bf57d9 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index d6ceeb2da..0e1515c89 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,6 +26,10 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
26 return MakeResult(std::move(session)); 26 return MakeResult(std::move(session));
27} 27}
28 28
29void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
30 pending_sessions.push_back(std::move(pending_session));
31}
32
29bool ServerPort::ShouldWait(Thread* thread) const { 33bool ServerPort::ShouldWait(Thread* thread) const {
30 // If there are no pending sessions, we wait until a new one is added. 34 // If there are no pending sessions, we wait until a new one is added.
31 return pending_sessions.empty(); 35 return pending_sessions.empty();
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index e52f8245f..9bc667cf2 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -22,6 +22,8 @@ class SessionRequestHandler;
22 22
23class ServerPort final : public WaitObject { 23class ServerPort final : public WaitObject {
24public: 24public:
25 using HLEHandler = std::shared_ptr<SessionRequestHandler>;
26
25 /** 27 /**
26 * Creates a pair of ServerPort and an associated ClientPort. 28 * Creates a pair of ServerPort and an associated ClientPort.
27 * 29 *
@@ -51,22 +53,27 @@ public:
51 */ 53 */
52 ResultVal<SharedPtr<ServerSession>> Accept(); 54 ResultVal<SharedPtr<ServerSession>> Accept();
53 55
56 /// Whether or not this server port has an HLE handler available.
57 bool HasHLEHandler() const {
58 return hle_handler != nullptr;
59 }
60
61 /// Gets the HLE handler for this port.
62 HLEHandler GetHLEHandler() const {
63 return hle_handler;
64 }
65
54 /** 66 /**
55 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port 67 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
56 * will inherit a reference to this handler. 68 * will inherit a reference to this handler.
57 */ 69 */
58 void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) { 70 void SetHleHandler(HLEHandler hle_handler_) {
59 hle_handler = std::move(hle_handler_); 71 hle_handler = std::move(hle_handler_);
60 } 72 }
61 73
62 std::string name; ///< Name of port (optional) 74 /// Appends a ServerSession to the collection of ServerSessions
63 75 /// waiting to be accepted by this port.
64 /// ServerSessions waiting to be accepted by the port 76 void AppendPendingSession(SharedPtr<ServerSession> pending_session);
65 std::vector<SharedPtr<ServerSession>> pending_sessions;
66
67 /// This session's HLE request handler template (optional)
68 /// ServerSessions created from this port inherit a reference to this handler.
69 std::shared_ptr<SessionRequestHandler> hle_handler;
70 77
71 bool ShouldWait(Thread* thread) const override; 78 bool ShouldWait(Thread* thread) const override;
72 void Acquire(Thread* thread) override; 79 void Acquire(Thread* thread) override;
@@ -74,6 +81,16 @@ public:
74private: 81private:
75 explicit ServerPort(KernelCore& kernel); 82 explicit ServerPort(KernelCore& kernel);
76 ~ServerPort() override; 83 ~ServerPort() override;
84
85 /// ServerSessions waiting to be accepted by the port
86 std::vector<SharedPtr<ServerSession>> pending_sessions;
87
88 /// This session's HLE request handler template (optional)
89 /// ServerSessions created from this port inherit a reference to this handler.
90 HLEHandler hle_handler;
91
92 /// Name of the port (optional)
93 std::string name;
77}; 94};
78 95
79} // namespace Kernel 96} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..4d8a337a7 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 63 pending_requesting_threads.pop_back();
64} 64}
65 65
66void ServerSession::ClientDisconnected() {
67 // We keep a shared pointer to the hle handler to keep it alive throughout
68 // the call to ClientDisconnected, as ClientDisconnected invalidates the
69 // hle_handler member itself during the course of the function executing.
70 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
71 if (handler) {
72 // Note that after this returns, this server session's hle_handler is
73 // invalidated (set to null).
74 handler->ClientDisconnected(this);
75 }
76
77 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
78 // their WaitSynchronization result to 0xC920181A.
79
80 // Clean up the list of client threads with pending requests, they are unneeded now that the
81 // client endpoint is closed.
82 pending_requesting_threads.clear();
83 currently_handling = nullptr;
84}
85
86void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
87 domain_request_handlers.push_back(std::move(handler));
88}
89
90std::size_t ServerSession::NumDomainRequestHandlers() const {
91 return domain_request_handlers.size();
92}
93
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 94ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 95 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 96 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 97 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 98
71 99 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 100 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 101
74 switch (domain_message_header->command) { 102 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 103 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 104 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 105 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 106 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 107 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 108 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 109 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 110 "to {} needed to return a new interface!",
83 } 111 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 112 UNREACHABLE();
85 113 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 114 }
115 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
96 116
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 117 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
98 static_cast<int>(domain_message_header->command.Value())); 118 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
99 ASSERT(false); 119
120 domain_request_handlers[object_id - 1] = nullptr;
121
122 IPC::ResponseBuilder rb{context, 2};
123 rb.Push(RESULT_SUCCESS);
124 return RESULT_SUCCESS;
125 }
100 } 126 }
101 127
128 LOG_CRITICAL(IPC, "Unknown domain command={}",
129 static_cast<int>(domain_message_header.command.Value()));
130 ASSERT(false);
102 return RESULT_SUCCESS; 131 return RESULT_SUCCESS;
103} 132}
104 133
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..aea4ccfeb 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
46 return HANDLE_TYPE; 46 return HANDLE_TYPE;
47 } 47 }
48 48
49 Session* GetParent() {
50 return parent.get();
51 }
52
53 const Session* GetParent() const {
54 return parent.get();
55 }
56
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 57 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 58
51 /** 59 /**
@@ -78,23 +86,16 @@ public:
78 86
79 void Acquire(Thread* thread) override; 87 void Acquire(Thread* thread) override;
80 88
81 std::string name; ///< The name of this session (optional) 89 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 90 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 91
86 /// This is the list of domain request handlers (after conversion to a domain) 92 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 93 /// this ServerSession instance.
88 94 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
89 /// List of threads that are pending a response after a sync request. This list is processed in
90 /// a LIFO manner, thus, the last request will be dispatched first.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93 95
94 /// Thread whose request is currently being handled. A request is considered "handled" when a 96 /// Retrieves the total number of domain request handlers that have been
95 /// response is sent via svcReplyAndReceive. 97 /// appended to this ServerSession instance.
96 /// TODO(Subv): Find a better name for this. 98 std::size_t NumDomainRequestHandlers() const;
97 SharedPtr<Thread> currently_handling;
98 99
99 /// Returns true if the session has been converted to a domain, otherwise False 100 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 101 bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
129 /// object handle. 130 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 131 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 132
133 /// The parent session, which links to the client endpoint.
134 std::shared_ptr<Session> parent;
135
136 /// This session's HLE request handler (applicable when not a domain)
137 std::shared_ptr<SessionRequestHandler> hle_handler;
138
139 /// This is the list of domain request handlers (after conversion to a domain)
140 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
141
142 /// List of threads that are pending a response after a sync request. This list is processed in
143 /// a LIFO manner, thus, the last request will be dispatched first.
144 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
145 std::vector<SharedPtr<Thread>> pending_requesting_threads;
146
147 /// Thread whose request is currently being handled. A request is considered "handled" when a
148 /// response is sent via svcReplyAndReceive.
149 /// TODO(Subv): Find a better name for this.
150 SharedPtr<Thread> currently_handling;
151
132 /// When set to True, converts the session to a domain at the end of the command 152 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 153 bool convert_to_domain{};
154
155 /// The name of this session (optional)
156 std::string name;
134}; 157};
135 158
136} // namespace Kernel 159} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..62861da36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 33 shared_memory->backing_block_offset = 0;
35 34
36 // Refresh the address mappings for the current process. 35 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 36 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 37 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 38 shared_memory->backing_block.get());
40 } 39 }
41 } else { 40 } else {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 7cfecb68c..047fa0c19 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address; 48 return address + size > address;
48} 49}
49 50
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// 8 GiB 51// 8 GiB
68constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; 52constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
69 53
@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 return ERR_INVALID_ADDRESS_STATE; 89 return ERR_INVALID_ADDRESS_STATE;
106 } 90 }
107 91
108 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { 92 if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
109 LOG_ERROR(Kernel_SVC, 93 LOG_ERROR(Kernel_SVC,
110 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 94 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
111 src_addr, size); 95 src_addr, size);
112 return ERR_INVALID_ADDRESS_STATE; 96 return ERR_INVALID_ADDRESS_STATE;
113 } 97 }
114 98
115 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { 99 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
116 LOG_ERROR(Kernel_SVC, 100 LOG_ERROR(Kernel_SVC,
117 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 101 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
118 dst_addr, size); 102 dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
238 auto* const current_process = Core::CurrentProcess(); 222 auto* const current_process = Core::CurrentProcess();
239 auto& vm_manager = current_process->VMManager(); 223 auto& vm_manager = current_process->VMManager();
240 224
241 if (!IsInsideAddressSpace(vm_manager, addr, size)) { 225 if (!vm_manager.IsWithinAddressSpace(addr, size)) {
242 LOG_ERROR(Kernel_SVC, 226 LOG_ERROR(Kernel_SVC,
243 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 227 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
244 size); 228 size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
299 } 283 }
300 284
301 auto& vm_manager = Core::CurrentProcess()->VMManager(); 285 auto& vm_manager = Core::CurrentProcess()->VMManager();
302 if (!IsInsideAddressSpace(vm_manager, address, size)) { 286 if (!vm_manager.IsWithinAddressSpace(address, size)) {
303 LOG_ERROR(Kernel_SVC, 287 LOG_ERROR(Kernel_SVC,
304 "Given address (0x{:016X}) is outside the bounds of the address space.", address); 288 "Given address (0x{:016X}) is outside the bounds of the address space.", address);
305 return ERR_INVALID_ADDRESS_STATE; 289 return ERR_INVALID_ADDRESS_STATE;
@@ -918,6 +902,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
918 } 902 }
919 903
920 const auto& system = Core::System::GetInstance(); 904 const auto& system = Core::System::GetInstance();
905 const auto& core_timing = system.CoreTiming();
921 const auto& scheduler = system.CurrentScheduler(); 906 const auto& scheduler = system.CurrentScheduler();
922 const auto* const current_thread = scheduler.GetCurrentThread(); 907 const auto* const current_thread = scheduler.GetCurrentThread();
923 const bool same_thread = current_thread == thread; 908 const bool same_thread = current_thread == thread;
@@ -927,9 +912,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
927 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { 912 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
928 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); 913 const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
929 914
930 out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks); 915 out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
931 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { 916 } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
932 out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks; 917 out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
933 } 918 }
934 919
935 *result = out_ticks; 920 *result = out_ticks;
@@ -1299,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) {
1299 1284
1300/// Called when a thread exits 1285/// Called when a thread exits
1301static void ExitThread() { 1286static void ExitThread() {
1302 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC()); 1287 auto& system = Core::System::GetInstance();
1303 1288
1304 ExitCurrentThread(); 1289 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
1305 Core::System::GetInstance().PrepareReschedule(); 1290
1291 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
1292 current_thread->Stop();
1293 system.CurrentScheduler().RemoveThread(current_thread);
1294 system.PrepareReschedule();
1306} 1295}
1307 1296
1308/// Sleep the current thread 1297/// Sleep the current thread
@@ -1315,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) {
1315 YieldAndWaitForLoadBalancing = -2, 1304 YieldAndWaitForLoadBalancing = -2,
1316 }; 1305 };
1317 1306
1307 auto& system = Core::System::GetInstance();
1308 auto& scheduler = system.CurrentScheduler();
1309 auto* const current_thread = scheduler.GetCurrentThread();
1310
1318 if (nanoseconds <= 0) { 1311 if (nanoseconds <= 0) {
1319 auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
1320 switch (static_cast<SleepType>(nanoseconds)) { 1312 switch (static_cast<SleepType>(nanoseconds)) {
1321 case SleepType::YieldWithoutLoadBalancing: 1313 case SleepType::YieldWithoutLoadBalancing:
1322 scheduler.YieldWithoutLoadBalancing(GetCurrentThread()); 1314 scheduler.YieldWithoutLoadBalancing(current_thread);
1323 break; 1315 break;
1324 case SleepType::YieldWithLoadBalancing: 1316 case SleepType::YieldWithLoadBalancing:
1325 scheduler.YieldWithLoadBalancing(GetCurrentThread()); 1317 scheduler.YieldWithLoadBalancing(current_thread);
1326 break; 1318 break;
1327 case SleepType::YieldAndWaitForLoadBalancing: 1319 case SleepType::YieldAndWaitForLoadBalancing:
1328 scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread()); 1320 scheduler.YieldAndWaitForLoadBalancing(current_thread);
1329 break; 1321 break;
1330 default: 1322 default:
1331 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); 1323 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
1332 } 1324 }
1333 } else { 1325 } else {
1334 // Sleep current thread and check for next thread to schedule 1326 current_thread->Sleep(nanoseconds);
1335 WaitCurrentThread_Sleep();
1336
1337 // Create an event to wake the thread up after the specified nanosecond delay has passed
1338 GetCurrentThread()->WakeAfterDelay(nanoseconds);
1339 } 1327 }
1340 1328
1341 // Reschedule all CPU cores 1329 // Reschedule all CPU cores
1342 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) 1330 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
1343 Core::System::GetInstance().CpuCore(i).PrepareReschedule(); 1331 system.CpuCore(i).PrepareReschedule();
1332 }
1344} 1333}
1345 1334
1346/// Wait process wide key atomic 1335/// Wait process wide key atomic
@@ -1494,20 +1483,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
1494 return ERR_INVALID_ADDRESS; 1483 return ERR_INVALID_ADDRESS;
1495 } 1484 }
1496 1485
1497 switch (static_cast<AddressArbiter::ArbitrationType>(type)) { 1486 const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
1498 case AddressArbiter::ArbitrationType::WaitIfLessThan: 1487 auto& address_arbiter =
1499 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); 1488 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1500 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: 1489 return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
1501 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
1502 case AddressArbiter::ArbitrationType::WaitIfEqual:
1503 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
1504 default:
1505 LOG_ERROR(Kernel_SVC,
1506 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
1507 "or WaitIfEqual but got {}",
1508 type);
1509 return ERR_INVALID_ENUM_VALUE;
1510 }
1511} 1490}
1512 1491
1513// Signals to an address (via Address Arbiter) 1492// Signals to an address (via Address Arbiter)
@@ -1525,31 +1504,21 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
1525 return ERR_INVALID_ADDRESS; 1504 return ERR_INVALID_ADDRESS;
1526 } 1505 }
1527 1506
1528 switch (static_cast<AddressArbiter::SignalType>(type)) { 1507 const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
1529 case AddressArbiter::SignalType::Signal: 1508 auto& address_arbiter =
1530 return AddressArbiter::SignalToAddress(address, num_to_wake); 1509 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1531 case AddressArbiter::SignalType::IncrementAndSignalIfEqual: 1510 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
1532 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
1533 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
1534 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
1535 num_to_wake);
1536 default:
1537 LOG_ERROR(Kernel_SVC,
1538 "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
1539 "or ModifyByWaitingCountAndSignalIfEqual but got {}",
1540 type);
1541 return ERR_INVALID_ENUM_VALUE;
1542 }
1543} 1511}
1544 1512
1545/// This returns the total CPU ticks elapsed since the CPU was powered-on 1513/// This returns the total CPU ticks elapsed since the CPU was powered-on
1546static u64 GetSystemTick() { 1514static u64 GetSystemTick() {
1547 LOG_TRACE(Kernel_SVC, "called"); 1515 LOG_TRACE(Kernel_SVC, "called");
1548 1516
1549 const u64 result{CoreTiming::GetTicks()}; 1517 auto& core_timing = Core::System::GetInstance().CoreTiming();
1518 const u64 result{core_timing.GetTicks()};
1550 1519
1551 // Advance time to defeat dumb games that busy-wait for the frame to end. 1520 // Advance time to defeat dumb games that busy-wait for the frame to end.
1552 CoreTiming::AddTicks(400); 1521 core_timing.AddTicks(400);
1553 1522
1554 return result; 1523 return result;
1555} 1524}
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d3984dfc4..3b22e8e0d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9 9
10#include <boost/range/algorithm_ext/erase.hpp>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -43,7 +41,8 @@ Thread::~Thread() = default;
43 41
44void Thread::Stop() { 42void Thread::Stop() {
45 // Cancel any outstanding wakeup events for this thread 43 // Cancel any outstanding wakeup events for this thread
46 CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); 44 Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
45 callback_handle);
47 kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); 46 kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
48 callback_handle = 0; 47 callback_handle = 0;
49 48
@@ -67,17 +66,6 @@ void Thread::Stop() {
67 owner_process->FreeTLSSlot(tls_address); 66 owner_process->FreeTLSSlot(tls_address);
68} 67}
69 68
70void WaitCurrentThread_Sleep() {
71 Thread* thread = GetCurrentThread();
72 thread->SetStatus(ThreadStatus::WaitSleep);
73}
74
75void ExitCurrentThread() {
76 Thread* thread = GetCurrentThread();
77 thread->Stop();
78 Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
79}
80
81void Thread::WakeAfterDelay(s64 nanoseconds) { 69void Thread::WakeAfterDelay(s64 nanoseconds) {
82 // Don't schedule a wakeup if the thread wants to wait forever 70 // Don't schedule a wakeup if the thread wants to wait forever
83 if (nanoseconds == -1) 71 if (nanoseconds == -1)
@@ -85,12 +73,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
85 73
86 // This function might be called from any thread so we have to be cautious and use the 74 // This function might be called from any thread so we have to be cautious and use the
87 // thread-safe version of ScheduleEvent. 75 // thread-safe version of ScheduleEvent.
88 CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), 76 Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
89 kernel.ThreadWakeupCallbackEventType(), callback_handle); 77 Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
78 callback_handle);
90} 79}
91 80
92void Thread::CancelWakeupTimer() { 81void Thread::CancelWakeupTimer() {
93 CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle); 82 Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
83 kernel.ThreadWakeupCallbackEventType(), callback_handle);
94} 84}
95 85
96static std::optional<s32> GetNextProcessorId(u64 mask) { 86static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -181,14 +171,13 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
181 return ERR_INVALID_PROCESSOR_ID; 171 return ERR_INVALID_PROCESSOR_ID;
182 } 172 }
183 173
184 // TODO(yuriks): Other checks, returning 0xD9001BEA
185
186 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { 174 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
187 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 175 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
188 // TODO (bunnei): Find the correct error code to use here 176 // TODO (bunnei): Find the correct error code to use here
189 return ResultCode(-1); 177 return ResultCode(-1);
190 } 178 }
191 179
180 auto& system = Core::System::GetInstance();
192 SharedPtr<Thread> thread(new Thread(kernel)); 181 SharedPtr<Thread> thread(new Thread(kernel));
193 182
194 thread->thread_id = kernel.CreateNewThreadID(); 183 thread->thread_id = kernel.CreateNewThreadID();
@@ -197,7 +186,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
197 thread->stack_top = stack_top; 186 thread->stack_top = stack_top;
198 thread->tpidr_el0 = 0; 187 thread->tpidr_el0 = 0;
199 thread->nominal_priority = thread->current_priority = priority; 188 thread->nominal_priority = thread->current_priority = priority;
200 thread->last_running_ticks = CoreTiming::GetTicks(); 189 thread->last_running_ticks = system.CoreTiming().GetTicks();
201 thread->processor_id = processor_id; 190 thread->processor_id = processor_id;
202 thread->ideal_core = processor_id; 191 thread->ideal_core = processor_id;
203 thread->affinity_mask = 1ULL << processor_id; 192 thread->affinity_mask = 1ULL << processor_id;
@@ -208,7 +197,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
208 thread->name = std::move(name); 197 thread->name = std::move(name);
209 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); 198 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
210 thread->owner_process = &owner_process; 199 thread->owner_process = &owner_process;
211 thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id); 200 thread->scheduler = &system.Scheduler(processor_id);
212 thread->scheduler->AddThread(thread, priority); 201 thread->scheduler->AddThread(thread, priority);
213 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread); 202 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
214 203
@@ -257,7 +246,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
257 } 246 }
258 247
259 if (status == ThreadStatus::Running) { 248 if (status == ThreadStatus::Running) {
260 last_running_ticks = CoreTiming::GetTicks(); 249 last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
261 } 250 }
262 251
263 status = new_status; 252 status = new_status;
@@ -267,8 +256,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
267 if (thread->lock_owner == this) { 256 if (thread->lock_owner == this) {
268 // If the thread is already waiting for this thread to release the mutex, ensure that the 257 // If the thread is already waiting for this thread to release the mutex, ensure that the
269 // waiters list is consistent and return without doing anything. 258 // waiters list is consistent and return without doing anything.
270 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 259 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
271 ASSERT(itr != wait_mutex_threads.end()); 260 ASSERT(iter != wait_mutex_threads.end());
272 return; 261 return;
273 } 262 }
274 263
@@ -276,11 +265,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
276 ASSERT(thread->lock_owner == nullptr); 265 ASSERT(thread->lock_owner == nullptr);
277 266
278 // Ensure that the thread is not already in the list of mutex waiters 267 // Ensure that the thread is not already in the list of mutex waiters
279 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 268 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
280 ASSERT(itr == wait_mutex_threads.end()); 269 ASSERT(iter == wait_mutex_threads.end());
281 270
271 // Keep the list in an ordered fashion
272 const auto insertion_point = std::find_if(
273 wait_mutex_threads.begin(), wait_mutex_threads.end(),
274 [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
275 wait_mutex_threads.insert(insertion_point, thread);
282 thread->lock_owner = this; 276 thread->lock_owner = this;
283 wait_mutex_threads.emplace_back(std::move(thread)); 277
284 UpdatePriority(); 278 UpdatePriority();
285} 279}
286 280
@@ -288,32 +282,44 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
288 ASSERT(thread->lock_owner == this); 282 ASSERT(thread->lock_owner == this);
289 283
290 // Ensure that the thread is in the list of mutex waiters 284 // Ensure that the thread is in the list of mutex waiters
291 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 285 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
292 ASSERT(itr != wait_mutex_threads.end()); 286 ASSERT(iter != wait_mutex_threads.end());
287
288 wait_mutex_threads.erase(iter);
293 289
294 boost::remove_erase(wait_mutex_threads, thread);
295 thread->lock_owner = nullptr; 290 thread->lock_owner = nullptr;
296 UpdatePriority(); 291 UpdatePriority();
297} 292}
298 293
299void Thread::UpdatePriority() { 294void Thread::UpdatePriority() {
300 // Find the highest priority among all the threads that are waiting for this thread's lock 295 // If any of the threads waiting on the mutex have a higher priority
296 // (taking into account priority inheritance), then this thread inherits
297 // that thread's priority.
301 u32 new_priority = nominal_priority; 298 u32 new_priority = nominal_priority;
302 for (const auto& thread : wait_mutex_threads) { 299 if (!wait_mutex_threads.empty()) {
303 if (thread->nominal_priority < new_priority) 300 if (wait_mutex_threads.front()->current_priority < new_priority) {
304 new_priority = thread->nominal_priority; 301 new_priority = wait_mutex_threads.front()->current_priority;
302 }
305 } 303 }
306 304
307 if (new_priority == current_priority) 305 if (new_priority == current_priority) {
308 return; 306 return;
307 }
309 308
310 scheduler->SetThreadPriority(this, new_priority); 309 scheduler->SetThreadPriority(this, new_priority);
311
312 current_priority = new_priority; 310 current_priority = new_priority;
313 311
312 if (!lock_owner) {
313 return;
314 }
315
316 // Ensure that the thread is within the correct location in the waiting list.
317 auto old_owner = lock_owner;
318 lock_owner->RemoveMutexWaiter(this);
319 old_owner->AddMutexWaiter(this);
320
314 // Recursively update the priority of the thread that depends on the priority of this one. 321 // Recursively update the priority of the thread that depends on the priority of this one.
315 if (lock_owner) 322 lock_owner->UpdatePriority();
316 lock_owner->UpdatePriority();
317} 323}
318 324
319void Thread::ChangeCore(u32 core, u64 mask) { 325void Thread::ChangeCore(u32 core, u64 mask) {
@@ -389,6 +395,14 @@ void Thread::SetActivity(ThreadActivity value) {
389 } 395 }
390} 396}
391 397
398void Thread::Sleep(s64 nanoseconds) {
399 // Sleep current thread and check for next thread to schedule
400 SetStatus(ThreadStatus::WaitSleep);
401
402 // Create an event to wake the thread up after the specified nanosecond delay has passed
403 WakeAfterDelay(nanoseconds);
404}
405
392//////////////////////////////////////////////////////////////////////////////////////////////////// 406////////////////////////////////////////////////////////////////////////////////////////////////////
393 407
394/** 408/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..faad5f391 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -383,6 +383,9 @@ public:
383 383
384 void SetActivity(ThreadActivity value); 384 void SetActivity(ThreadActivity value);
385 385
386 /// Sleeps this thread for the given amount of nanoseconds.
387 void Sleep(s64 nanoseconds);
388
386private: 389private:
387 explicit Thread(KernelCore& kernel); 390 explicit Thread(KernelCore& kernel);
388 ~Thread() override; 391 ~Thread() override;
@@ -398,8 +401,14 @@ private:
398 VAddr entry_point = 0; 401 VAddr entry_point = 0;
399 VAddr stack_top = 0; 402 VAddr stack_top = 0;
400 403
401 u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application 404 /// Nominal thread priority, as set by the emulated application.
402 u32 current_priority = 0; ///< Current thread priority, can be temporarily changed 405 /// The nominal priority is the thread priority without priority
406 /// inheritance taken into account.
407 u32 nominal_priority = 0;
408
409 /// Current thread priority. This may change over the course of the
410 /// thread's lifetime in order to facilitate priority inheritance.
411 u32 current_priority = 0;
403 412
404 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. 413 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
405 u64 last_running_ticks = 0; ///< CPU tick when thread was last running 414 u64 last_running_ticks = 0; ///< CPU tick when thread was last running
@@ -460,14 +469,4 @@ private:
460 */ 469 */
461Thread* GetCurrentThread(); 470Thread* GetCurrentThread();
462 471
463/**
464 * Waits the current thread on a sleep
465 */
466void WaitCurrentThread_Sleep();
467
468/**
469 * Stops the current thread and removes it from the thread_list
470 */
471void ExitCurrentThread();
472
473} // namespace Kernel 472} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..3def3e52c 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,18 +7,18 @@
7#include <utility> 7#include <utility>
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/memory_hook.h"
10#include "core/arm/arm_interface.h" 11#include "core/arm/arm_interface.h"
11#include "core/core.h" 12#include "core/core.h"
12#include "core/file_sys/program_metadata.h" 13#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/vm_manager.h" 15#include "core/hle/kernel/vm_manager.h"
15#include "core/memory.h" 16#include "core/memory.h"
16#include "core/memory_hook.h"
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
20 20namespace {
21static const char* GetMemoryStateName(MemoryState state) { 21const char* GetMemoryStateName(MemoryState state) {
22 static constexpr const char* names[] = { 22 static constexpr const char* names[] = {
23 "Unmapped", "Io", 23 "Unmapped", "Io",
24 "Normal", "CodeStatic", 24 "Normal", "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
35 return names[ToSvcMemoryState(state)]; 35 return names[ToSvcMemoryState(state)];
36} 36}
37 37
38// Checks if a given address range lies within a larger address range.
39constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
40 VAddr address_range_end) {
41 const VAddr end_address = address + size - 1;
42 return address_range_begin <= address && end_address <= address_range_end - 1;
43}
44} // Anonymous namespace
45
38bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 46bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
39 ASSERT(base + size == next.base); 47 ASSERT(base + size == next.base);
40 if (permissions != next.permissions || state != next.state || attribute != next.attribute || 48 if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -169,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
169 177
170ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size, 178ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
171 MemoryState state, 179 MemoryState state,
172 Memory::MemoryHookPointer mmio_handler) { 180 Common::MemoryHookPointer mmio_handler) {
173 // This is the appropriately sized VMA that will turn into our allocation. 181 // This is the appropriately sized VMA that will turn into our allocation.
174 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); 182 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
175 VirtualMemoryArea& final_vma = vma_handle->second; 183 VirtualMemoryArea& final_vma = vma_handle->second;
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
249} 257}
250 258
251ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { 259ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
252 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 260 if (!IsWithinHeapRegion(target, size)) {
253 target + size < target) {
254 return ERR_INVALID_ADDRESS; 261 return ERR_INVALID_ADDRESS;
255 } 262 }
256 263
@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
285} 292}
286 293
287ResultCode VMManager::HeapFree(VAddr target, u64 size) { 294ResultCode VMManager::HeapFree(VAddr target, u64 size) {
288 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 295 if (!IsWithinHeapRegion(target, size)) {
289 target + size < target) {
290 return ERR_INVALID_ADDRESS; 296 return ERR_INVALID_ADDRESS;
291 } 297 }
292 298
@@ -618,7 +624,7 @@ void VMManager::ClearPageTable() {
618 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 624 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
619 page_table.special_regions.clear(); 625 page_table.special_regions.clear();
620 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 626 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
621 Memory::PageType::Unmapped); 627 Common::PageType::Unmapped);
622} 628}
623 629
624VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask, 630VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
706 return address_space_width; 712 return address_space_width;
707} 713}
708 714
715bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
716 return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
717 GetAddressSpaceEndAddress());
718}
719
709VAddr VMManager::GetASLRRegionBaseAddress() const { 720VAddr VMManager::GetASLRRegionBaseAddress() const {
710 return aslr_region_base; 721 return aslr_region_base;
711} 722}
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
750 return code_region_end - code_region_base; 761 return code_region_end - code_region_base;
751} 762}
752 763
764bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
765 return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
766 GetCodeRegionEndAddress());
767}
768
753VAddr VMManager::GetHeapRegionBaseAddress() const { 769VAddr VMManager::GetHeapRegionBaseAddress() const {
754 return heap_region_base; 770 return heap_region_base;
755} 771}
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
762 return heap_region_end - heap_region_base; 778 return heap_region_end - heap_region_base;
763} 779}
764 780
781bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
782 return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
783 GetHeapRegionEndAddress());
784}
785
765VAddr VMManager::GetMapRegionBaseAddress() const { 786VAddr VMManager::GetMapRegionBaseAddress() const {
766 return map_region_base; 787 return map_region_base;
767} 788}
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
774 return map_region_end - map_region_base; 795 return map_region_end - map_region_base;
775} 796}
776 797
798bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
799 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
800}
801
777VAddr VMManager::GetNewMapRegionBaseAddress() const { 802VAddr VMManager::GetNewMapRegionBaseAddress() const {
778 return new_map_region_base; 803 return new_map_region_base;
779} 804}
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
786 return new_map_region_end - new_map_region_base; 811 return new_map_region_end - new_map_region_base;
787} 812}
788 813
814bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
815 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
816 GetNewMapRegionEndAddress());
817}
818
789VAddr VMManager::GetTLSIORegionBaseAddress() const { 819VAddr VMManager::GetTLSIORegionBaseAddress() const {
790 return tls_io_region_base; 820 return tls_io_region_base;
791} 821}
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
798 return tls_io_region_end - tls_io_region_base; 828 return tls_io_region_end - tls_io_region_base;
799} 829}
800 830
831bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
832 return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
833 GetTLSIORegionEndAddress());
834}
835
801} // namespace Kernel 836} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..b96980f8f 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
9#include <tuple> 9#include <tuple>
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
13#include "common/page_table.h"
12#include "core/hle/result.h" 14#include "core/hle/result.h"
13#include "core/memory.h" 15#include "core/memory.h"
14#include "core/memory_hook.h"
15 16
16namespace FileSys { 17namespace FileSys {
17enum class ProgramAddressSpaceType : u8; 18enum class ProgramAddressSpaceType : u8;
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 // Settings for type = MMIO 291 // Settings for type = MMIO
291 /// Physical address of the register area this VMA maps to. 292 /// Physical address of the register area this VMA maps to.
292 PAddr paddr = 0; 293 PAddr paddr = 0;
293 Memory::MemoryHookPointer mmio_handler = nullptr; 294 Common::MemoryHookPointer mmio_handler = nullptr;
294 295
295 /// Tests if this area can be merged to the right with `next`. 296 /// Tests if this area can be merged to the right with `next`.
296 bool CanBeMergedWith(const VirtualMemoryArea& next) const; 297 bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
368 * @param mmio_handler The handler that will implement read and write for this MMIO region. 369 * @param mmio_handler The handler that will implement read and write for this MMIO region.
369 */ 370 */
370 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state, 371 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
371 Memory::MemoryHookPointer mmio_handler); 372 Common::MemoryHookPointer mmio_handler);
372 373
373 /// Unmaps a range of addresses, splitting VMAs as necessary. 374 /// Unmaps a range of addresses, splitting VMAs as necessary.
374 ResultCode UnmapRange(VAddr target, u64 size); 375 ResultCode UnmapRange(VAddr target, u64 size);
@@ -432,18 +433,21 @@ public:
432 /// Gets the address space width in bits. 433 /// Gets the address space width in bits.
433 u64 GetAddressSpaceWidth() const; 434 u64 GetAddressSpaceWidth() const;
434 435
436 /// Determines whether or not the given address range lies within the address space.
437 bool IsWithinAddressSpace(VAddr address, u64 size) const;
438
435 /// Gets the base address of the ASLR region. 439 /// Gets the base address of the ASLR region.
436 VAddr GetASLRRegionBaseAddress() const; 440 VAddr GetASLRRegionBaseAddress() const;
437 441
438 /// Gets the end address of the ASLR region. 442 /// Gets the end address of the ASLR region.
439 VAddr GetASLRRegionEndAddress() const; 443 VAddr GetASLRRegionEndAddress() const;
440 444
441 /// Determines whether or not the specified address range is within the ASLR region.
442 bool IsWithinASLRRegion(VAddr address, u64 size) const;
443
444 /// Gets the size of the ASLR region 445 /// Gets the size of the ASLR region
445 u64 GetASLRRegionSize() const; 446 u64 GetASLRRegionSize() const;
446 447
448 /// Determines whether or not the specified address range is within the ASLR region.
449 bool IsWithinASLRRegion(VAddr address, u64 size) const;
450
447 /// Gets the base address of the code region. 451 /// Gets the base address of the code region.
448 VAddr GetCodeRegionBaseAddress() const; 452 VAddr GetCodeRegionBaseAddress() const;
449 453
@@ -453,6 +457,9 @@ public:
453 /// Gets the total size of the code region in bytes. 457 /// Gets the total size of the code region in bytes.
454 u64 GetCodeRegionSize() const; 458 u64 GetCodeRegionSize() const;
455 459
460 /// Determines whether or not the specified range is within the code region.
461 bool IsWithinCodeRegion(VAddr address, u64 size) const;
462
456 /// Gets the base address of the heap region. 463 /// Gets the base address of the heap region.
457 VAddr GetHeapRegionBaseAddress() const; 464 VAddr GetHeapRegionBaseAddress() const;
458 465
@@ -462,6 +469,9 @@ public:
462 /// Gets the total size of the heap region in bytes. 469 /// Gets the total size of the heap region in bytes.
463 u64 GetHeapRegionSize() const; 470 u64 GetHeapRegionSize() const;
464 471
472 /// Determines whether or not the specified range is within the heap region.
473 bool IsWithinHeapRegion(VAddr address, u64 size) const;
474
465 /// Gets the base address of the map region. 475 /// Gets the base address of the map region.
466 VAddr GetMapRegionBaseAddress() const; 476 VAddr GetMapRegionBaseAddress() const;
467 477
@@ -471,6 +481,9 @@ public:
471 /// Gets the total size of the map region in bytes. 481 /// Gets the total size of the map region in bytes.
472 u64 GetMapRegionSize() const; 482 u64 GetMapRegionSize() const;
473 483
484 /// Determines whether or not the specified range is within the map region.
485 bool IsWithinMapRegion(VAddr address, u64 size) const;
486
474 /// Gets the base address of the new map region. 487 /// Gets the base address of the new map region.
475 VAddr GetNewMapRegionBaseAddress() const; 488 VAddr GetNewMapRegionBaseAddress() const;
476 489
@@ -480,6 +493,9 @@ public:
480 /// Gets the total size of the new map region in bytes. 493 /// Gets the total size of the new map region in bytes.
481 u64 GetNewMapRegionSize() const; 494 u64 GetNewMapRegionSize() const;
482 495
496 /// Determines whether or not the given address range is within the new map region
497 bool IsWithinNewMapRegion(VAddr address, u64 size) const;
498
483 /// Gets the base address of the TLS IO region. 499 /// Gets the base address of the TLS IO region.
484 VAddr GetTLSIORegionBaseAddress() const; 500 VAddr GetTLSIORegionBaseAddress() const;
485 501
@@ -489,9 +505,12 @@ public:
489 /// Gets the total size of the TLS IO region in bytes. 505 /// Gets the total size of the TLS IO region in bytes.
490 u64 GetTLSIORegionSize() const; 506 u64 GetTLSIORegionSize() const;
491 507
508 /// Determines if the given address range is within the TLS IO region.
509 bool IsWithinTLSIORegion(VAddr address, u64 size) const;
510
492 /// Each VMManager has its own page table, which is set as the main one when the owning process 511 /// Each VMManager has its own page table, which is set as the main one when the owning process
493 /// is scheduled. 512 /// is scheduled.
494 Memory::PageTable page_table; 513 Common::PageTable page_table{Memory::PAGE_BITS};
495 514
496private: 515private:
497 using VMAIter = VMAMap::iterator; 516 using VMAIter = VMAMap::iterator;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..ab84f5ddc 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,20 +8,11 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
15 14
16/** 15/**
17 * Detailed description of the error. Code 0 always means success.
18 */
19enum class ErrorDescription : u32 {
20 Success = 0,
21 RemoteProcessDead = 301,
22};
23
24/**
25 * Identifies the module which caused the error. Error codes can be propagated through a call 16 * Identifies the module which caused the error. Error codes can be propagated through a call
26 * chain, meaning that this doesn't always correspond to the module where the API call made is 17 * chain, meaning that this doesn't always correspond to the module where the API call made is
27 * contained. 18 * contained.
@@ -121,7 +112,7 @@ enum class ErrorModule : u32 {
121 ShopN = 811, 112 ShopN = 811,
122}; 113};
123 114
124/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields. 115/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
125union ResultCode { 116union ResultCode {
126 u32 raw; 117 u32 raw;
127 118
@@ -134,17 +125,9 @@ union ResultCode {
134 125
135 constexpr explicit ResultCode(u32 raw) : raw(raw) {} 126 constexpr explicit ResultCode(u32 raw) : raw(raw) {}
136 127
137 constexpr ResultCode(ErrorModule module, ErrorDescription description)
138 : ResultCode(module, static_cast<u32>(description)) {}
139
140 constexpr ResultCode(ErrorModule module_, u32 description_) 128 constexpr ResultCode(ErrorModule module_, u32 description_)
141 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {} 129 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}
142 130
143 constexpr ResultCode& operator=(const ResultCode& o) {
144 raw = o.raw;
145 return *this;
146 }
147
148 constexpr bool IsSuccess() const { 131 constexpr bool IsSuccess() const {
149 return raw == 0; 132 return raw == 0;
150 } 133 }
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index d1cbe0e44..c750d70ac 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <array> 6#include <array>
6#include <cinttypes> 7#include <cinttypes>
7#include <cstring> 8#include <cstring>
8#include <stack>
9#include "audio_core/audio_renderer.h" 9#include "audio_core/audio_renderer.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/file_sys/savedata_factory.h" 11#include "core/file_sys/savedata_factory.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
93} 93}
94 94
95IAudioController::IAudioController() : ServiceFramework("IAudioController") { 95IAudioController::IAudioController() : ServiceFramework("IAudioController") {
96 // clang-format off
96 static const FunctionInfo functions[] = { 97 static const FunctionInfo functions[] = {
97 {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"}, 98 {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
98 {1, &IAudioController::GetMainAppletExpectedMasterVolume, 99 {1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
99 "GetMainAppletExpectedMasterVolume"}, 100 {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
100 {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, 101 {3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
101 "GetLibraryAppletExpectedMasterVolume"}, 102 {4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
102 {3, nullptr, "ChangeMainAppletMasterVolume"},
103 {4, nullptr, "SetTransparentVolumeRate"},
104 }; 103 };
104 // clang-format on
105
105 RegisterHandlers(functions); 106 RegisterHandlers(functions);
106} 107}
107 108
108IAudioController::~IAudioController() = default; 109IAudioController::~IAudioController() = default;
109 110
110void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 111void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
111 LOG_WARNING(Service_AM, "(STUBBED) called"); 112 IPC::RequestParser rp{ctx};
113 const float main_applet_volume_tmp = rp.Pop<float>();
114 const float library_applet_volume_tmp = rp.Pop<float>();
115
116 LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
117 main_applet_volume_tmp, library_applet_volume_tmp);
118
119 // Ensure the volume values remain within the 0-100% range
120 main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
121 library_applet_volume =
122 std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
123
112 IPC::ResponseBuilder rb{ctx, 2}; 124 IPC::ResponseBuilder rb{ctx, 2};
113 rb.Push(RESULT_SUCCESS); 125 rb.Push(RESULT_SUCCESS);
114} 126}
115 127
116void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 128void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
117 LOG_WARNING(Service_AM, "(STUBBED) called"); 129 LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
118 IPC::ResponseBuilder rb{ctx, 3}; 130 IPC::ResponseBuilder rb{ctx, 3};
119 rb.Push(RESULT_SUCCESS); 131 rb.Push(RESULT_SUCCESS);
120 rb.Push(volume); 132 rb.Push(main_applet_volume);
121} 133}
122 134
123void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) { 135void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
124 LOG_WARNING(Service_AM, "(STUBBED) called"); 136 LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
125 IPC::ResponseBuilder rb{ctx, 3}; 137 IPC::ResponseBuilder rb{ctx, 3};
126 rb.Push(RESULT_SUCCESS); 138 rb.Push(RESULT_SUCCESS);
127 rb.Push(volume); 139 rb.Push(library_applet_volume);
140}
141
142void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
143 struct Parameters {
144 float volume;
145 s64 fade_time_ns;
146 };
147 static_assert(sizeof(Parameters) == 16);
148
149 IPC::RequestParser rp{ctx};
150 const auto parameters = rp.PopRaw<Parameters>();
151
152 LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
153 parameters.fade_time_ns);
154
155 main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
156 fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
157
158 IPC::ResponseBuilder rb{ctx, 2};
159 rb.Push(RESULT_SUCCESS);
160}
161
162void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
163 IPC::RequestParser rp{ctx};
164 const float transparent_volume_rate_tmp = rp.Pop<float>();
165
166 LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
167
168 // Clamp volume range to 0-100%.
169 transparent_volume_rate =
170 std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
171
172 IPC::ResponseBuilder rb{ctx, 2};
173 rb.Push(RESULT_SUCCESS);
128} 174}
129 175
130IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") { 176IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
@@ -322,14 +368,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c
322 368
323void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) { 369void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
324 LOG_WARNING(Service_AM, "(STUBBED) called"); 370 LOG_WARNING(Service_AM, "(STUBBED) called");
371
325 // TODO(Subv): Find out how AM determines the display to use, for now just 372 // TODO(Subv): Find out how AM determines the display to use, for now just
326 // create the layer in the Default display. 373 // create the layer in the Default display.
327 u64 display_id = nvflinger->OpenDisplay("Default"); 374 const auto display_id = nvflinger->OpenDisplay("Default");
328 u64 layer_id = nvflinger->CreateLayer(display_id); 375 const auto layer_id = nvflinger->CreateLayer(*display_id);
329 376
330 IPC::ResponseBuilder rb{ctx, 4}; 377 IPC::ResponseBuilder rb{ctx, 4};
331 rb.Push(RESULT_SUCCESS); 378 rb.Push(RESULT_SUCCESS);
332 rb.Push(layer_id); 379 rb.Push(*layer_id);
333} 380}
334 381
335void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) { 382void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index b6113cfdd..565dd8e9e 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <memory> 8#include <memory>
8#include <queue> 9#include <queue>
9#include "core/hle/kernel/writable_event.h" 10#include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
81 void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx); 82 void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
82 void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx); 83 void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
83 void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx); 84 void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
85 void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
86 void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);
84 87
85 u32 volume{100}; 88 static constexpr float min_allowed_volume = 0.0f;
89 static constexpr float max_allowed_volume = 1.0f;
90
91 float main_applet_volume{0.25f};
92 float library_applet_volume{max_allowed_volume};
93 float transparent_volume_rate{min_allowed_volume};
94
95 // Volume transition fade time in nanoseconds.
96 // e.g. If the main applet volume was 0% and was changed to 50%
97 // with a fade of 50ns, then over the course of 50ns,
98 // the volume will gradually fade up to 50%
99 std::chrono::nanoseconds fade_time_ns{0};
86}; 100};
87 101
88class IDisplayController final : public ServiceFramework<IDisplayController> { 102class IDisplayController final : public ServiceFramework<IDisplayController> {
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index dc6a6b188..21f5e64c7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
18#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
20#include "core/hle/service/audio/audout_u.h" 20#include "core/hle/service/audio/audout_u.h"
21#include "core/hle/service/audio/errors.h"
21#include "core/memory.h" 22#include "core/memory.h"
22 23
23namespace Service::Audio { 24namespace Service::Audio {
24 25
25namespace ErrCodes {
26enum {
27 ErrorUnknown = 2,
28 BufferCountExceeded = 8,
29};
30}
31
32constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; 26constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
33constexpr int DefaultSampleRate{48000}; 27constexpr int DefaultSampleRate{48000};
34 28
@@ -68,12 +62,12 @@ public:
68 RegisterHandlers(functions); 62 RegisterHandlers(functions);
69 63
70 // This is the event handle used to check if the audio buffer was released 64 // This is the event handle used to check if the audio buffer was released
71 auto& kernel = Core::System::GetInstance().Kernel(); 65 auto& system = Core::System::GetInstance();
72 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, 66 buffer_event = Kernel::WritableEvent::CreateEventPair(
73 "IAudioOutBufferReleased"); 67 system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
74 68
75 stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count, 69 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
76 std::move(unique_name), 70 audio_params.channel_count, std::move(unique_name),
77 [=]() { buffer_event.writable->Signal(); }); 71 [=]() { buffer_event.writable->Signal(); });
78 } 72 }
79 73
@@ -100,7 +94,7 @@ private:
100 94
101 if (stream->IsPlaying()) { 95 if (stream->IsPlaying()) {
102 IPC::ResponseBuilder rb{ctx, 2}; 96 IPC::ResponseBuilder rb{ctx, 2};
103 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); 97 rb.Push(ERR_OPERATION_FAILED);
104 return; 98 return;
105 } 99 }
106 100
@@ -113,7 +107,9 @@ private:
113 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
114 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
115 109
116 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
117 113
118 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
143 139
144 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { 140 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
145 IPC::ResponseBuilder rb{ctx, 2}; 141 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); 142 rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
143 return;
147 } 144 }
148 145
149 IPC::ResponseBuilder rb{ctx, 2}; 146 IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 76cc48254..c9de10a24 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
17#include "core/hle/kernel/readable_event.h" 17#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/audio/audren_u.h" 19#include "core/hle/service/audio/audren_u.h"
20#include "core/hle/service/audio/errors.h"
20 21
21namespace Service::Audio { 22namespace Service::Audio {
22 23
@@ -37,15 +38,16 @@ public:
37 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, 38 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
38 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, 39 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
39 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, 40 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
40 {11, nullptr, "ExecuteAudioRendererRendering"}, 41 {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
41 }; 42 };
42 // clang-format on 43 // clang-format on
43 RegisterHandlers(functions); 44 RegisterHandlers(functions);
44 45
45 auto& kernel = Core::System::GetInstance().Kernel(); 46 auto& system = Core::System::GetInstance();
46 system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, 47 system_event = Kernel::WritableEvent::CreateEventPair(
47 "IAudioRenderer:SystemEvent"); 48 system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
48 renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable); 49 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
50 system_event.writable);
49 } 51 }
50 52
51private: 53private:
@@ -137,6 +139,17 @@ private:
137 rb.Push(rendering_time_limit_percent); 139 rb.Push(rendering_time_limit_percent);
138 } 140 }
139 141
142 void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
143 LOG_DEBUG(Service_Audio, "called");
144
145 // This service command currently only reports an unsupported operation
146 // error code, or aborts. Given that, we just always return an error
147 // code in this case.
148
149 IPC::ResponseBuilder rb{ctx, 2};
150 rb.Push(ERR_NOT_SUPPORTED);
151 }
152
140 Kernel::EventPair system_event; 153 Kernel::EventPair system_event;
141 std::unique_ptr<AudioCore::AudioRenderer> renderer; 154 std::unique_ptr<AudioCore::AudioRenderer> renderer;
142 u32 rendering_time_limit_percent = 100; 155 u32 rendering_time_limit_percent = 100;
@@ -234,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
234 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 247 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
235 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, 248 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
236 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, 249 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
237 {3, nullptr, "OpenAudioRendererAuto"}, 250 {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
238 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, 251 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
239 }; 252 };
240 // clang-format on 253 // clang-format on
@@ -247,12 +260,7 @@ AudRenU::~AudRenU() = default;
247void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { 260void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
248 LOG_DEBUG(Service_Audio, "called"); 261 LOG_DEBUG(Service_Audio, "called");
249 262
250 IPC::RequestParser rp{ctx}; 263 OpenAudioRendererImpl(ctx);
251 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
252 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
253
254 rb.Push(RESULT_SUCCESS);
255 rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
256} 264}
257 265
258void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 266void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -261,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
261 LOG_DEBUG(Service_Audio, "called"); 269 LOG_DEBUG(Service_Audio, "called");
262 270
263 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); 271 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
264 buffer_sz += params.unknown_c * 1024; 272 buffer_sz += params.submix_count * 1024;
265 buffer_sz += 0x940 * (params.unknown_c + 1); 273 buffer_sz += 0x940 * (params.submix_count + 1);
266 buffer_sz += 0x3F0 * params.voice_count; 274 buffer_sz += 0x3F0 * params.voice_count;
267 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); 275 buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
268 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 276 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
269 buffer_sz += 277 buffer_sz += Common::AlignUp(
270 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 278 (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
271 (params.mix_buffer_count + 6), 279 (params.mix_buffer_count + 6),
272 0x40); 280 0x40);
273 281
274 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 282 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
275 u32 count = params.unknown_c + 1; 283 const u32 count = params.submix_count + 1;
276 u64 node_count = Common::AlignUp(count, 0x40); 284 u64 node_count = Common::AlignUp(count, 0x40);
277 u64 node_state_buffer_sz = 285 const u64 node_state_buffer_sz =
278 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 286 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
279 u64 edge_matrix_buffer_sz = 0; 287 u64 edge_matrix_buffer_sz = 0;
280 node_count = Common::AlignUp(count * count, 0x40); 288 node_count = Common::AlignUp(count * count, 0x40);
@@ -288,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
288 296
289 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 297 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
290 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 298 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
291 buffer_sz += 0xE0 * params.unknown_2c; 299 buffer_sz += 0xE0 * params.num_splitter_send_channels;
292 buffer_sz += 0x20 * params.splitter_count; 300 buffer_sz += 0x20 * params.splitter_count;
293 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); 301 buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
294 } 302 }
295 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 303 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
296 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 304 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
297 ((params.voice_count * 256) | 0x40); 305 ((params.voice_count * 256) | 0x40);
298 306
299 if (params.unknown_1c >= 1) { 307 if (params.performance_frame_count >= 1) {
300 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 308 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
301 16 * params.voice_count + 16) + 309 16 * params.voice_count + 16) +
302 0x658) * 310 0x658) *
303 (params.unknown_1c + 1) + 311 (params.performance_frame_count + 1) +
304 0xc0, 312 0xc0,
305 0x40) + 313 0x40) +
306 output_sz; 314 output_sz;
@@ -324,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
324 rb.PushIpcInterface<Audio::IAudioDevice>(); 332 rb.PushIpcInterface<Audio::IAudioDevice>();
325} 333}
326 334
335void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
336 LOG_DEBUG(Service_Audio, "called");
337
338 OpenAudioRendererImpl(ctx);
339}
340
327void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 341void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
328 LOG_WARNING(Service_Audio, "(STUBBED) called"); 342 LOG_WARNING(Service_Audio, "(STUBBED) called");
329 343
@@ -334,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
334 // based on the current revision 348 // based on the current revision
335} 349}
336 350
351void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
352 IPC::RequestParser rp{ctx};
353 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
354 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
355
356 rb.Push(RESULT_SUCCESS);
357 rb.PushIpcInterface<IAudioRenderer>(params);
358}
359
337bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 360bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
338 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 361 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
339 switch (feature) { 362 switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 3d63388fb..e55d25973 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,8 +21,11 @@ private:
21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx); 21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx); 23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
24 void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
24 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); 25 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
25 26
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28
26 enum class AudioFeatures : u32 { 29 enum class AudioFeatures : u32 {
27 Splitter, 30 Splitter,
28 }; 31 };
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Audio {
10
11constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
12constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
13constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
14
15} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 11eba4a12..cb4a1160d 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -8,44 +8,34 @@
8#include <vector> 8#include <vector>
9 9
10#include <opus.h> 10#include <opus.h>
11#include <opus_multistream.h>
11 12
12#include "common/common_funcs.h" 13#include "common/assert.h"
13#include "common/logging/log.h" 14#include "common/logging/log.h"
14#include "core/hle/ipc_helpers.h" 15#include "core/hle/ipc_helpers.h"
15#include "core/hle/kernel/hle_ipc.h" 16#include "core/hle/kernel/hle_ipc.h"
16#include "core/hle/service/audio/hwopus.h" 17#include "core/hle/service/audio/hwopus.h"
17 18
18namespace Service::Audio { 19namespace Service::Audio {
19 20namespace {
20struct OpusDeleter { 21struct OpusDeleter {
21 void operator()(void* ptr) const { 22 void operator()(OpusMSDecoder* ptr) const {
22 operator delete(ptr); 23 opus_multistream_decoder_destroy(ptr);
23 } 24 }
24}; 25};
25 26
26class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 27using OpusDecoderPtr = std::unique_ptr<OpusMSDecoder, OpusDeleter>;
27public:
28 IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
29 u32 channel_count)
30 : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
31 sample_rate(sample_rate), channel_count(channel_count) {
32 // clang-format off
33 static const FunctionInfo functions[] = {
34 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
35 {1, nullptr, "SetContext"},
36 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
37 {3, nullptr, "SetContextForMultiStream"},
38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
39 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
40 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
41 {7, nullptr, "DecodeInterleavedForMultiStream"},
42 };
43 // clang-format on
44 28
45 RegisterHandlers(functions); 29struct OpusPacketHeader {
46 } 30 // Packet size in bytes.
31 u32_be size;
32 // Indicates the final range of the codec's entropy coder.
33 u32_be final_range;
34};
35static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
47 36
48private: 37class OpusDecoderState {
38public:
49 /// Describes extra behavior that may be asked of the decoding context. 39 /// Describes extra behavior that may be asked of the decoding context.
50 enum class ExtraBehavior { 40 enum class ExtraBehavior {
51 /// No extra behavior. 41 /// No extra behavior.
@@ -55,30 +45,27 @@ private:
55 ResetContext, 45 ResetContext,
56 }; 46 };
57 47
58 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { 48 enum class PerfTime {
59 LOG_DEBUG(Audio, "called"); 49 Disabled,
60 50 Enabled,
61 DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); 51 };
62 }
63
64 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
65 LOG_DEBUG(Audio, "called");
66
67 u64 performance = 0;
68 DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
69 }
70
71 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
72 LOG_DEBUG(Audio, "called");
73
74 IPC::RequestParser rp{ctx};
75 const auto extra_behavior =
76 rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
77 52
78 u64 performance = 0; 53 explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
79 DecodeInterleavedHelper(ctx, &performance, extra_behavior); 54 : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
55
56 // Decodes interleaved Opus packets. Optionally allows reporting time taken to
57 // perform the decoding, as well as any relevant extra behavior.
58 void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
59 ExtraBehavior extra_behavior) {
60 if (perf_time == PerfTime::Disabled) {
61 DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
62 } else {
63 u64 performance = 0;
64 DecodeInterleavedHelper(ctx, &performance, extra_behavior);
65 }
80 } 66 }
81 67
68private:
82 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, 69 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
83 ExtraBehavior extra_behavior) { 70 ExtraBehavior extra_behavior) {
84 u32 consumed = 0; 71 u32 consumed = 0;
@@ -89,8 +76,7 @@ private:
89 ResetDecoderContext(); 76 ResetDecoderContext();
90 } 77 }
91 78
92 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, 79 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
93 performance)) {
94 LOG_ERROR(Audio, "Failed to decode opus data"); 80 LOG_ERROR(Audio, "Failed to decode opus data");
95 IPC::ResponseBuilder rb{ctx, 2}; 81 IPC::ResponseBuilder rb{ctx, 2};
96 // TODO(ogniK): Use correct error code 82 // TODO(ogniK): Use correct error code
@@ -109,27 +95,27 @@ private:
109 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 95 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
110 } 96 }
111 97
112 bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, 98 bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
113 std::vector<opus_int16>& output, u64* out_performance_time) { 99 std::vector<opus_int16>& output, u64* out_performance_time) const {
114 const auto start_time = std::chrono::high_resolution_clock::now(); 100 const auto start_time = std::chrono::high_resolution_clock::now();
115 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 101 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
116 if (sizeof(OpusHeader) > input.size()) { 102 if (sizeof(OpusPacketHeader) > input.size()) {
117 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", 103 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
118 sizeof(OpusHeader), input.size()); 104 sizeof(OpusPacketHeader), input.size());
119 return false; 105 return false;
120 } 106 }
121 107
122 OpusHeader hdr{}; 108 OpusPacketHeader hdr{};
123 std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); 109 std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
124 if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { 110 if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
125 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", 111 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
126 sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); 112 sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
127 return false; 113 return false;
128 } 114 }
129 115
130 const auto frame = input.data() + sizeof(OpusHeader); 116 const auto frame = input.data() + sizeof(OpusPacketHeader);
131 const auto decoded_sample_count = opus_packet_get_nb_samples( 117 const auto decoded_sample_count = opus_packet_get_nb_samples(
132 frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), 118 frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
133 static_cast<opus_int32>(sample_rate)); 119 static_cast<opus_int32>(sample_rate));
134 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { 120 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
135 LOG_ERROR( 121 LOG_ERROR(
@@ -141,18 +127,18 @@ private:
141 127
142 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); 128 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
143 const auto out_sample_count = 129 const auto out_sample_count =
144 opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); 130 opus_multistream_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
145 if (out_sample_count < 0) { 131 if (out_sample_count < 0) {
146 LOG_ERROR(Audio, 132 LOG_ERROR(Audio,
147 "Incorrect sample count received from opus_decode, " 133 "Incorrect sample count received from opus_decode, "
148 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", 134 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
149 out_sample_count, frame_size, static_cast<u32>(hdr.sz)); 135 out_sample_count, frame_size, static_cast<u32>(hdr.size));
150 return false; 136 return false;
151 } 137 }
152 138
153 const auto end_time = std::chrono::high_resolution_clock::now() - start_time; 139 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
154 sample_count = out_sample_count; 140 sample_count = out_sample_count;
155 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 141 consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
156 if (out_performance_time != nullptr) { 142 if (out_performance_time != nullptr) {
157 *out_performance_time = 143 *out_performance_time =
158 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); 144 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -164,25 +150,86 @@ private:
164 void ResetDecoderContext() { 150 void ResetDecoderContext() {
165 ASSERT(decoder != nullptr); 151 ASSERT(decoder != nullptr);
166 152
167 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); 153 opus_multistream_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
168 } 154 }
169 155
170 struct OpusHeader { 156 OpusDecoderPtr decoder;
171 u32_be sz; // Needs to be BE for some odd reason
172 INSERT_PADDING_WORDS(1);
173 };
174 static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
175
176 std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
177 u32 sample_rate; 157 u32 sample_rate;
178 u32 channel_count; 158 u32 channel_count;
179}; 159};
180 160
181static std::size_t WorkerBufferSize(u32 channel_count) { 161class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
162public:
163 explicit IHardwareOpusDecoderManager(OpusDecoderState decoder_state)
164 : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
165 // clang-format off
166 static const FunctionInfo functions[] = {
167 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
168 {1, nullptr, "SetContext"},
169 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
170 {3, nullptr, "SetContextForMultiStream"},
171 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
172 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
173 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
174 {7, nullptr, "DecodeInterleavedForMultiStream"},
175 };
176 // clang-format on
177
178 RegisterHandlers(functions);
179 }
180
181private:
182 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
183 LOG_DEBUG(Audio, "called");
184
185 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Disabled,
186 OpusDecoderState::ExtraBehavior::None);
187 }
188
189 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
190 LOG_DEBUG(Audio, "called");
191
192 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled,
193 OpusDecoderState::ExtraBehavior::None);
194 }
195
196 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
197 LOG_DEBUG(Audio, "called");
198
199 IPC::RequestParser rp{ctx};
200 const auto extra_behavior = rp.Pop<bool>() ? OpusDecoderState::ExtraBehavior::ResetContext
201 : OpusDecoderState::ExtraBehavior::None;
202
203 decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled, extra_behavior);
204 }
205
206 OpusDecoderState decoder_state;
207};
208
209std::size_t WorkerBufferSize(u32 channel_count) {
182 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 210 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
183 return opus_decoder_get_size(static_cast<int>(channel_count)); 211 constexpr int num_streams = 1;
212 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
213 return opus_multistream_decoder_get_size(num_streams, num_stereo_streams);
184} 214}
185 215
216// Creates the mapping table that maps the input channels to the particular
217// output channels. In the stereo case, we map the left and right input channels
218// to the left and right output channels respectively.
219//
220// However, in the monophonic case, we only map the one available channel
221// to the sole output channel. We specify 255 for the would-be right channel
222// as this is a special value defined by Opus to indicate to the decoder to
223// ignore that channel.
224std::array<u8, 2> CreateMappingTable(u32 channel_count) {
225 if (channel_count == 2) {
226 return {{0, 1}};
227 }
228
229 return {{0, 255}};
230}
231} // Anonymous namespace
232
186void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { 233void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
187 IPC::RequestParser rp{ctx}; 234 IPC::RequestParser rp{ctx};
188 const auto sample_rate = rp.Pop<u32>(); 235 const auto sample_rate = rp.Pop<u32>();
@@ -220,10 +267,15 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
220 const std::size_t worker_sz = WorkerBufferSize(channel_count); 267 const std::size_t worker_sz = WorkerBufferSize(channel_count);
221 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); 268 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
222 269
223 std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ 270 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
224 static_cast<OpusDecoder*>(operator new(worker_sz))}; 271 const auto mapping_table = CreateMappingTable(channel_count);
225 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { 272
226 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); 273 int error = 0;
274 OpusDecoderPtr decoder{
275 opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
276 num_stereo_streams, mapping_table.data(), &error)};
277 if (error != OPUS_OK || decoder == nullptr) {
278 LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
227 IPC::ResponseBuilder rb{ctx, 2}; 279 IPC::ResponseBuilder rb{ctx, 2};
228 // TODO(ogniK): Use correct error code 280 // TODO(ogniK): Use correct error code
229 rb.Push(ResultCode(-1)); 281 rb.Push(ResultCode(-1));
@@ -232,8 +284,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
232 284
233 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 285 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
234 rb.Push(RESULT_SUCCESS); 286 rb.Push(RESULT_SUCCESS);
235 rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, 287 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
236 channel_count); 288 OpusDecoderState{std::move(decoder), sample_rate, channel_count});
237} 289}
238 290
239HwOpus::HwOpus() : ServiceFramework("hwopus") { 291HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 54959edd8..f03fb629c 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -733,7 +733,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
733FSP_SRV::~FSP_SRV() = default; 733FSP_SRV::~FSP_SRV() = default;
734 734
735void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) { 735void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
736 LOG_WARNING(Service_FS, "(STUBBED) called"); 736 IPC::RequestParser rp{ctx};
737 current_process_id = rp.Pop<u64>();
738
739 LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);
737 740
738 IPC::ResponseBuilder rb{ctx, 2}; 741 IPC::ResponseBuilder rb{ctx, 2};
739 rb.Push(RESULT_SUCCESS); 742 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index 3a5f4e200..d7572ba7a 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -32,6 +32,7 @@ private:
32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
33 33
34 FileSys::VirtualFile romfs; 34 FileSys::VirtualFile romfs;
35 u64 current_process_id = 0;
35}; 36};
36 37
37} // namespace Service::FileSystem 38} // namespace Service::FileSystem
diff --git a/src/core/hle/service/hid/controllers/controller_base.h b/src/core/hle/service/hid/controllers/controller_base.h
index f0e092b1b..5e5097a03 100644
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/swap.h" 8#include "common/swap.h"
9 9
10namespace Core::Timing {
11class CoreTiming;
12}
13
10namespace Service::HID { 14namespace Service::HID {
11class ControllerBase { 15class ControllerBase {
12public: 16public:
@@ -20,7 +24,8 @@ public:
20 virtual void OnRelease() = 0; 24 virtual void OnRelease() = 0;
21 25
22 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
23 virtual void OnUpdate(u8* data, std::size_t size) = 0; 27 virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
28 std::size_t size) = 0;
24 29
25 // Called when input devices should be loaded 30 // Called when input devices should be loaded
26 virtual void OnLoadInputDevices() = 0; 31 virtual void OnLoadInputDevices() = 0;
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index c22357d8c..c5c2e032a 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}
21 21
22void Controller_DebugPad::OnRelease() {} 22void Controller_DebugPad::OnRelease() {}
23 23
24void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) { 24void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
25 shared_memory.header.timestamp = CoreTiming::GetTicks(); 25 std::size_t size) {
26 shared_memory.header.timestamp = core_timing.GetTicks();
26 shared_memory.header.total_entry_count = 17; 27 shared_memory.header.total_entry_count = 17;
27 28
28 if (!IsControllerActivated()) { 29 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/debug_pad.h b/src/core/hle/service/hid/controllers/debug_pad.h
index 2b60ead12..e584b92ec 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -26,7 +26,7 @@ public:
26 void OnRelease() override; 26 void OnRelease() override;
27 27
28 // When the controller is requesting an update for the shared memory 28 // When the controller is requesting an update for the shared memory
29 void OnUpdate(u8* data, std::size_t size) override; 29 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
30 30
31 // Called when input devices should be loaded 31 // Called when input devices should be loaded
32 void OnLoadInputDevices() override; 32 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 898572277..a179252e3 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}
17 17
18void Controller_Gesture::OnRelease() {} 18void Controller_Gesture::OnRelease() {}
19 19
20void Controller_Gesture::OnUpdate(u8* data, std::size_t size) { 20void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 shared_memory.header.timestamp = CoreTiming::GetTicks(); 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks();
22 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
23 24
24 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.h b/src/core/hle/service/hid/controllers/gesture.h
index 1056ffbcd..f305fe90f 100644
--- a/src/core/hle/service/hid/controllers/gesture.h
+++ b/src/core/hle/service/hid/controllers/gesture.h
@@ -22,7 +22,7 @@ public:
22 void OnRelease() override; 22 void OnRelease() override;
23 23
24 // When the controller is requesting an update for the shared memory 24 // When the controller is requesting an update for the shared memory
25 void OnUpdate(u8* data, size_t size) override; 25 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;
26 26
27 // Called when input devices should be loaded 27 // Called when input devices should be loaded
28 void OnLoadInputDevices() override; 28 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index ca75adc2b..92d7bfb52 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}
19 19
20void Controller_Keyboard::OnRelease() {} 20void Controller_Keyboard::OnRelease() {}
21 21
22void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) { 22void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
23 shared_memory.header.timestamp = CoreTiming::GetTicks(); 23 std::size_t size) {
24 shared_memory.header.timestamp = core_timing.GetTicks();
24 shared_memory.header.total_entry_count = 17; 25 shared_memory.header.total_entry_count = 17;
25 26
26 if (!IsControllerActivated()) { 27 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.h b/src/core/hle/service/hid/controllers/keyboard.h
index f52775456..73cd2c7bb 100644
--- a/src/core/hle/service/hid/controllers/keyboard.h
+++ b/src/core/hle/service/hid/controllers/keyboard.h
@@ -25,7 +25,7 @@ public:
25 void OnRelease() override; 25 void OnRelease() override;
26 26
27 // When the controller is requesting an update for the shared memory 27 // When the controller is requesting an update for the shared memory
28 void OnUpdate(u8* data, std::size_t size) override; 28 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
29 29
30 // Called when input devices should be loaded 30 // Called when input devices should be loaded
31 void OnLoadInputDevices() override; 31 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 63391dbe9..11ab096d9 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
17void Controller_Mouse::OnInit() {} 17void Controller_Mouse::OnInit() {}
18void Controller_Mouse::OnRelease() {} 18void Controller_Mouse::OnRelease() {}
19 19
20void Controller_Mouse::OnUpdate(u8* data, std::size_t size) { 20void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 shared_memory.header.timestamp = CoreTiming::GetTicks(); 21 std::size_t size) {
22 shared_memory.header.timestamp = core_timing.GetTicks();
22 shared_memory.header.total_entry_count = 17; 23 shared_memory.header.total_entry_count = 17;
23 24
24 if (!IsControllerActivated()) { 25 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.h b/src/core/hle/service/hid/controllers/mouse.h
index 70b654d07..9d46eecbe 100644
--- a/src/core/hle/service/hid/controllers/mouse.h
+++ b/src/core/hle/service/hid/controllers/mouse.h
@@ -24,7 +24,7 @@ public:
24 void OnRelease() override; 24 void OnRelease() override;
25 25
26 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
27 void OnUpdate(u8* data, std::size_t size) override; 27 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
28 28
29 // Called when input devices should be loaded 29 // Called when input devices should be loaded
30 void OnLoadInputDevices() override; 30 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 04c8c35a8..e7fc7a619 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
288 rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX); 288 rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
289} 289}
290 290
291void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) { 291void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
292 std::size_t data_len) {
292 if (!IsControllerActivated()) 293 if (!IsControllerActivated())
293 return; 294 return;
294 for (std::size_t i = 0; i < shared_memory_entries.size(); i++) { 295 for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
308 const auto& last_entry = 309 const auto& last_entry =
309 main_controller->npad[main_controller->common.last_entry_index]; 310 main_controller->npad[main_controller->common.last_entry_index];
310 311
311 main_controller->common.timestamp = CoreTiming::GetTicks(); 312 main_controller->common.timestamp = core_timing.GetTicks();
312 main_controller->common.last_entry_index = 313 main_controller->common.last_entry_index =
313 (main_controller->common.last_entry_index + 1) % 17; 314 (main_controller->common.last_entry_index + 1) % 17;
314 315
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index ce057da82..4ff50b3cd 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -30,7 +30,7 @@ public:
30 void OnRelease() override; 30 void OnRelease() override;
31 31
32 // When the controller is requesting an update for the shared memory 32 // When the controller is requesting an update for the shared memory
33 void OnUpdate(u8* data, std::size_t size) override; 33 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
34 34
35 // Called when input devices should be loaded 35 // Called when input devices should be loaded
36 void OnLoadInputDevices() override; 36 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 02fcfadd9..946948f5e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}
16 16
17void Controller_Stubbed::OnRelease() {} 17void Controller_Stubbed::OnRelease() {}
18 18
19void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) { 19void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
20 std::size_t size) {
20 if (!smart_update) { 21 if (!smart_update) {
21 return; 22 return;
22 } 23 }
23 24
24 CommonHeader header{}; 25 CommonHeader header{};
25 header.timestamp = CoreTiming::GetTicks(); 26 header.timestamp = core_timing.GetTicks();
26 header.total_entry_count = 17; 27 header.total_entry_count = 17;
27 header.entry_count = 0; 28 header.entry_count = 0;
28 header.last_entry_index = 0; 29 header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/stubbed.h b/src/core/hle/service/hid/controllers/stubbed.h
index 4a21c643e..24469f03e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.h
+++ b/src/core/hle/service/hid/controllers/stubbed.h
@@ -20,7 +20,7 @@ public:
20 void OnRelease() override; 20 void OnRelease() override;
21 21
22 // When the controller is requesting an update for the shared memory 22 // When the controller is requesting an update for the shared memory
23 void OnUpdate(u8* data, std::size_t size) override; 23 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
24 24
25 // Called when input devices should be loaded 25 // Called when input devices should be loaded
26 void OnLoadInputDevices() override; 26 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index f666b1bd8..1a8445a43 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}
20 20
21void Controller_Touchscreen::OnRelease() {} 21void Controller_Touchscreen::OnRelease() {}
22 22
23void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { 23void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
24 shared_memory.header.timestamp = CoreTiming::GetTicks(); 24 std::size_t size) {
25 shared_memory.header.timestamp = core_timing.GetTicks();
25 shared_memory.header.total_entry_count = 17; 26 shared_memory.header.total_entry_count = 17;
26 27
27 if (!IsControllerActivated()) { 28 if (!IsControllerActivated()) {
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
48 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; 49 touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
49 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; 50 touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
50 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; 51 touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
51 const u64 tick = CoreTiming::GetTicks(); 52 const u64 tick = core_timing.GetTicks();
52 touch_entry.delta_time = tick - last_touch; 53 touch_entry.delta_time = tick - last_touch;
53 last_touch = tick; 54 last_touch = tick;
54 touch_entry.finger = Settings::values.touchscreen.finger; 55 touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index be2583864..76fc340e9 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -24,7 +24,7 @@ public:
24 void OnRelease() override; 24 void OnRelease() override;
25 25
26 // When the controller is requesting an update for the shared memory 26 // When the controller is requesting an update for the shared memory
27 void OnUpdate(u8* data, std::size_t size) override; 27 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
28 28
29 // Called when input devices should be loaded 29 // Called when input devices should be loaded
30 void OnLoadInputDevices() override; 30 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index cd397c70b..1a9da9576 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}
17 17
18void Controller_XPad::OnRelease() {} 18void Controller_XPad::OnRelease() {}
19 19
20void Controller_XPad::OnUpdate(u8* data, std::size_t size) { 20void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
21 std::size_t size) {
21 for (auto& xpad_entry : shared_memory.shared_memory_entries) { 22 for (auto& xpad_entry : shared_memory.shared_memory_entries) {
22 xpad_entry.header.timestamp = CoreTiming::GetTicks(); 23 xpad_entry.header.timestamp = core_timing.GetTicks();
23 xpad_entry.header.total_entry_count = 17; 24 xpad_entry.header.total_entry_count = 17;
24 25
25 if (!IsControllerActivated()) { 26 if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/xpad.h b/src/core/hle/service/hid/controllers/xpad.h
index ff836989f..2864e6617 100644
--- a/src/core/hle/service/hid/controllers/xpad.h
+++ b/src/core/hle/service/hid/controllers/xpad.h
@@ -22,7 +22,7 @@ public:
22 void OnRelease() override; 22 void OnRelease() override;
23 23
24 // When the controller is requesting an update for the shared memory 24 // When the controller is requesting an update for the shared memory
25 void OnUpdate(u8* data, std::size_t size) override; 25 void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
26 26
27 // Called when input devices should be loaded 27 // Called when input devices should be loaded
28 void OnLoadInputDevices() override; 28 void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 008bf3f02..8a6de83a2 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
36 36
37// Updating period for each HID device. 37// Updating period for each HID device.
38// TODO(ogniK): Find actual polling rate of hid 38// TODO(ogniK): Find actual polling rate of hid
39constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66; 39constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
40constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; 40constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
41constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; 41constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
42constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; 42constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
43 43
44IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") { 44IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -73,14 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
73 GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000); 73 GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);
74 74
75 // Register update callbacks 75 // Register update callbacks
76 auto& core_timing = Core::System::GetInstance().CoreTiming();
76 pad_update_event = 77 pad_update_event =
77 CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) { 78 core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
78 UpdateControllers(userdata, cycles_late); 79 UpdateControllers(userdata, cycles_late);
79 }); 80 });
80 81
81 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) 82 // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
82 83
83 CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event); 84 core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);
84 85
85 ReloadInputDevices(); 86 ReloadInputDevices();
86} 87}
@@ -94,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
94} 95}
95 96
96IAppletResource ::~IAppletResource() { 97IAppletResource ::~IAppletResource() {
97 CoreTiming::UnscheduleEvent(pad_update_event, 0); 98 Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
98} 99}
99 100
100void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { 101void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -106,15 +107,17 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
106} 107}
107 108
108void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) { 109void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
110 auto& core_timing = Core::System::GetInstance().CoreTiming();
111
109 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); 112 const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
110 for (const auto& controller : controllers) { 113 for (const auto& controller : controllers) {
111 if (should_reload) { 114 if (should_reload) {
112 controller->OnLoadInputDevices(); 115 controller->OnLoadInputDevices();
113 } 116 }
114 controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE); 117 controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
115 } 118 }
116 119
117 CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); 120 core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
118} 121}
119 122
120class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { 123class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index eca27c056..7cc58db4c 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -7,7 +7,7 @@
7#include "controllers/controller_base.h" 7#include "controllers/controller_base.h"
8#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
9 9
10namespace CoreTiming { 10namespace Core::Timing {
11struct EventType; 11struct EventType;
12} 12}
13 13
@@ -15,7 +15,7 @@ namespace Kernel {
15class SharedMemory; 15class SharedMemory;
16} 16}
17 17
18namespace SM { 18namespace Service::SM {
19class ServiceManager; 19class ServiceManager;
20} 20}
21 21
@@ -66,7 +66,7 @@ private:
66 66
67 Kernel::SharedPtr<Kernel::SharedMemory> shared_mem; 67 Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
68 68
69 CoreTiming::EventType* pad_update_event; 69 Core::Timing::EventType* pad_update_event;
70 70
71 std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)> 71 std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
72 controllers{}; 72 controllers{};
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 3c7f8b1ee..2c4625c99 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
98 98
99 IPC::ResponseBuilder rb{ctx, 5}; 99 IPC::ResponseBuilder rb{ctx, 5};
100 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
101 rb.PushRaw<u64>(CoreTiming::GetTicks()); 101 rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
102 rb.PushRaw<u32>(0); 102 rb.PushRaw<u32>(0);
103} 103}
104 104
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 92acc57b1..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,11 +23,11 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
23 23
24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, 24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, 25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
26 const MathUtil::Rectangle<int>& crop_rect) { 26 const Common::Rectangle<int>& crop_rect) {
27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); 27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
28 LOG_WARNING(Service, 28 LOG_TRACE(Service,
29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
30 addr, offset, width, height, stride, format); 30 addr, offset, width, height, stride, format);
31 31
32 using PixelFormat = Tegra::FramebufferConfig::PixelFormat; 32 using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
33 const Tegra::FramebufferConfig framebuffer{ 33 const Tegra::FramebufferConfig framebuffer{
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..ace71169f 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 25 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
27 NVFlinger::BufferQueue::BufferTransformFlags transform, 27 NVFlinger::BufferQueue::BufferTransformFlags transform,
28 const MathUtil::Rectangle<int>& crop_rect); 28 const Common::Rectangle<int>& crop_rect);
29 29
30private: 30private:
31 std::shared_ptr<nvmap> nvmap_dev; 31 std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..b031ebc66 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
12#include "core/hle/service/nvdrv/devices/nvmap.h" 12#include "core/hle/service/nvdrv/devices/nvmap.h"
13#include "core/memory.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
@@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
178 auto& gpu = system_instance.GPU(); 179 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); 180 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr); 181 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); 182 gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
182 183
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 184 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184 185
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index d57a54ee8..45812d238 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,7 @@
5#include <cstring> 5#include <cstring>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/core_timing_util.h" 10#include "core/core_timing_util.h"
10#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
184 185
185 IoctlGetGpuTime params{}; 186 IoctlGetGpuTime params{};
186 std::memcpy(&params, input.data(), input.size()); 187 std::memcpy(&params, input.data(), input.size());
187 params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks()); 188 params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
188 std::memcpy(output.data(), &params, output.size()); 189 std::memcpy(output.data(), &params, output.size());
189 return 0; 190 return 0;
190} 191}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 63}
64 64
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const MathUtil::Rectangle<int>& crop_rect) { 66 const Common::Rectangle<int>& crop_rect) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 67 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 68 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 69 ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index b171f256c..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -13,10 +13,6 @@
13#include "core/hle/kernel/object.h" 13#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/writable_event.h" 14#include "core/hle/kernel/writable_event.h"
15 15
16namespace CoreTiming {
17struct EventType;
18}
19
20namespace Service::NVFlinger { 16namespace Service::NVFlinger {
21 17
22struct IGBPBuffer { 18struct IGBPBuffer {
@@ -71,14 +67,14 @@ public:
71 Status status = Status::Free; 67 Status status = Status::Free;
72 IGBPBuffer igbp_buffer; 68 IGBPBuffer igbp_buffer;
73 BufferTransformFlags transform; 69 BufferTransformFlags transform;
74 MathUtil::Rectangle<int> crop_rect; 70 Common::Rectangle<int> crop_rect;
75 }; 71 };
76 72
77 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
78 std::optional<u32> DequeueBuffer(u32 width, u32 height); 74 std::optional<u32> DequeueBuffer(u32 width, u32 height);
79 const IGBPBuffer& RequestBuffer(u32 slot) const; 75 const IGBPBuffer& RequestBuffer(u32 slot) const;
80 void QueueBuffer(u32 slot, BufferTransformFlags transform, 76 void QueueBuffer(u32 slot, BufferTransformFlags transform,
81 const MathUtil::Rectangle<int>& crop_rect); 77 const Common::Rectangle<int>& crop_rect);
82 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
83 void ReleaseBuffer(u32 slot); 79 void ReleaseBuffer(u32 slot);
84 u32 Query(QueryType type); 80 u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 8dfc0df03..fc496b654 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -14,135 +14,170 @@
14#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
15#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/readable_event.h" 16#include "core/hle/kernel/readable_event.h"
17#include "core/hle/kernel/writable_event.h"
18#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 17#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
19#include "core/hle/service/nvdrv/nvdrv.h" 18#include "core/hle/service/nvdrv/nvdrv.h"
20#include "core/hle/service/nvflinger/buffer_queue.h" 19#include "core/hle/service/nvflinger/buffer_queue.h"
21#include "core/hle/service/nvflinger/nvflinger.h" 20#include "core/hle/service/nvflinger/nvflinger.h"
21#include "core/hle/service/vi/display/vi_display.h"
22#include "core/hle/service/vi/layer/vi_layer.h"
22#include "core/perf_stats.h" 23#include "core/perf_stats.h"
23#include "video_core/renderer_base.h" 24#include "video_core/renderer_base.h"
24 25
25namespace Service::NVFlinger { 26namespace Service::NVFlinger {
26 27
27constexpr std::size_t SCREEN_REFRESH_RATE = 60; 28constexpr std::size_t SCREEN_REFRESH_RATE = 60;
28constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); 29constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
30
31NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
32 displays.emplace_back(0, "Default");
33 displays.emplace_back(1, "External");
34 displays.emplace_back(2, "Edid");
35 displays.emplace_back(3, "Internal");
36 displays.emplace_back(4, "Null");
29 37
30NVFlinger::NVFlinger() {
31 // Schedule the screen composition events 38 // Schedule the screen composition events
32 composition_event = 39 composition_event =
33 CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { 40 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
34 Compose(); 41 Compose();
35 CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event); 42 this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
36 }); 43 });
37 44
38 CoreTiming::ScheduleEvent(frame_ticks, composition_event); 45 core_timing.ScheduleEvent(frame_ticks, composition_event);
39} 46}
40 47
41NVFlinger::~NVFlinger() { 48NVFlinger::~NVFlinger() {
42 CoreTiming::UnscheduleEvent(composition_event, 0); 49 core_timing.UnscheduleEvent(composition_event, 0);
43} 50}
44 51
45void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { 52void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
46 nvdrv = std::move(instance); 53 nvdrv = std::move(instance);
47} 54}
48 55
49u64 NVFlinger::OpenDisplay(std::string_view name) { 56std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
50 LOG_DEBUG(Service, "Opening \"{}\" display", name); 57 LOG_DEBUG(Service, "Opening \"{}\" display", name);
51 58
52 // TODO(Subv): Currently we only support the Default display. 59 // TODO(Subv): Currently we only support the Default display.
53 ASSERT(name == "Default"); 60 ASSERT(name == "Default");
54 61
55 const auto itr = std::find_if(displays.begin(), displays.end(), 62 const auto itr =
56 [&](const Display& display) { return display.name == name; }); 63 std::find_if(displays.begin(), displays.end(),
57 64 [&](const VI::Display& display) { return display.GetName() == name; });
58 ASSERT(itr != displays.end()); 65 if (itr == displays.end()) {
66 return {};
67 }
59 68
60 return itr->id; 69 return itr->GetID();
61} 70}
62 71
63u64 NVFlinger::CreateLayer(u64 display_id) { 72std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
64 auto& display = FindDisplay(display_id); 73 auto* const display = FindDisplay(display_id);
65 74
66 ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment"); 75 if (display == nullptr) {
76 return {};
77 }
67 78
68 const u64 layer_id = next_layer_id++; 79 const u64 layer_id = next_layer_id++;
69 const u32 buffer_queue_id = next_buffer_queue_id++; 80 const u32 buffer_queue_id = next_buffer_queue_id++;
70 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); 81 buffer_queues.emplace_back(buffer_queue_id, layer_id);
71 display.layers.emplace_back(layer_id, buffer_queue); 82 display->CreateLayer(layer_id, buffer_queues.back());
72 buffer_queues.emplace_back(std::move(buffer_queue));
73 return layer_id; 83 return layer_id;
74} 84}
75 85
76u32 NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { 86std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
77 const auto& layer = FindLayer(display_id, layer_id); 87 const auto* const layer = FindLayer(display_id, layer_id);
78 return layer.buffer_queue->GetId(); 88
89 if (layer == nullptr) {
90 return {};
91 }
92
93 return layer->GetBufferQueue().GetId();
79} 94}
80 95
81Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) { 96Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
82 return FindDisplay(display_id).vsync_event.readable; 97 auto* const display = FindDisplay(display_id);
98
99 if (display == nullptr) {
100 return nullptr;
101 }
102
103 return display->GetVSyncEvent();
83} 104}
84 105
85std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { 106BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
86 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 107 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
87 [&](const auto& queue) { return queue->GetId() == id; }); 108 [id](const auto& queue) { return queue.GetId() == id; });
88 109
89 ASSERT(itr != buffer_queues.end()); 110 ASSERT(itr != buffer_queues.end());
90 return *itr; 111 return *itr;
91} 112}
92 113
93Display& NVFlinger::FindDisplay(u64 display_id) { 114const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
94 const auto itr = std::find_if(displays.begin(), displays.end(), 115 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
95 [&](const Display& display) { return display.id == display_id; }); 116 [id](const auto& queue) { return queue.GetId() == id; });
96 117
97 ASSERT(itr != displays.end()); 118 ASSERT(itr != buffer_queues.end());
98 return *itr; 119 return *itr;
99} 120}
100 121
101const Display& NVFlinger::FindDisplay(u64 display_id) const { 122VI::Display* NVFlinger::FindDisplay(u64 display_id) {
102 const auto itr = std::find_if(displays.begin(), displays.end(), 123 const auto itr =
103 [&](const Display& display) { return display.id == display_id; }); 124 std::find_if(displays.begin(), displays.end(),
125 [&](const VI::Display& display) { return display.GetID() == display_id; });
104 126
105 ASSERT(itr != displays.end()); 127 if (itr == displays.end()) {
106 return *itr; 128 return nullptr;
129 }
130
131 return &*itr;
107} 132}
108 133
109Layer& NVFlinger::FindLayer(u64 display_id, u64 layer_id) { 134const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
110 auto& display = FindDisplay(display_id); 135 const auto itr =
136 std::find_if(displays.begin(), displays.end(),
137 [&](const VI::Display& display) { return display.GetID() == display_id; });
111 138
112 const auto itr = std::find_if(display.layers.begin(), display.layers.end(), 139 if (itr == displays.end()) {
113 [&](const Layer& layer) { return layer.id == layer_id; }); 140 return nullptr;
141 }
114 142
115 ASSERT(itr != display.layers.end()); 143 return &*itr;
116 return *itr;
117} 144}
118 145
119const Layer& NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { 146VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
120 const auto& display = FindDisplay(display_id); 147 auto* const display = FindDisplay(display_id);
121 148
122 const auto itr = std::find_if(display.layers.begin(), display.layers.end(), 149 if (display == nullptr) {
123 [&](const Layer& layer) { return layer.id == layer_id; }); 150 return nullptr;
151 }
124 152
125 ASSERT(itr != display.layers.end()); 153 return display->FindLayer(layer_id);
126 return *itr; 154}
155
156const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
157 const auto* const display = FindDisplay(display_id);
158
159 if (display == nullptr) {
160 return nullptr;
161 }
162
163 return display->FindLayer(layer_id);
127} 164}
128 165
129void NVFlinger::Compose() { 166void NVFlinger::Compose() {
130 for (auto& display : displays) { 167 for (auto& display : displays) {
131 // Trigger vsync for this display at the end of drawing 168 // Trigger vsync for this display at the end of drawing
132 SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); 169 SCOPE_EXIT({ display.SignalVSyncEvent(); });
133 170
134 // Don't do anything for displays without layers. 171 // Don't do anything for displays without layers.
135 if (display.layers.empty()) 172 if (!display.HasLayers())
136 continue; 173 continue;
137 174
138 // TODO(Subv): Support more than 1 layer. 175 // TODO(Subv): Support more than 1 layer.
139 ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); 176 VI::Layer& layer = display.GetLayer(0);
140 177 auto& buffer_queue = layer.GetBufferQueue();
141 Layer& layer = display.layers[0];
142 auto& buffer_queue = layer.buffer_queue;
143 178
144 // Search for a queued buffer and acquire it 179 // Search for a queued buffer and acquire it
145 auto buffer = buffer_queue->AcquireBuffer(); 180 auto buffer = buffer_queue.AcquireBuffer();
146 181
147 MicroProfileFlip(); 182 MicroProfileFlip();
148 183
@@ -151,7 +186,7 @@ void NVFlinger::Compose() {
151 186
152 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
153 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
154 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
155 continue; 190 continue;
156 } 191 }
157 192
@@ -167,19 +202,8 @@ void NVFlinger::Compose() {
167 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 202 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
168 buffer->get().transform, buffer->get().crop_rect); 203 buffer->get().transform, buffer->get().crop_rect);
169 204
170 buffer_queue->ReleaseBuffer(buffer->get().slot); 205 buffer_queue.ReleaseBuffer(buffer->get().slot);
171 } 206 }
172} 207}
173 208
174Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
175Layer::~Layer() = default;
176
177Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
178 auto& kernel = Core::System::GetInstance().Kernel();
179 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
180 fmt::format("Display VSync Event {}", id));
181}
182
183Display::~Display() = default;
184
185} // namespace Service::NVFlinger 209} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 83e974ed3..c0a83fffb 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,8 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <memory> 7#include <memory>
8#include <optional>
9#include <string> 9#include <string>
10#include <string_view> 10#include <string_view>
11#include <vector> 11#include <vector>
@@ -13,9 +13,10 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
15 15
16namespace CoreTiming { 16namespace Core::Timing {
17class CoreTiming;
17struct EventType; 18struct EventType;
18} 19} // namespace Core::Timing
19 20
20namespace Kernel { 21namespace Kernel {
21class ReadableEvent; 22class ReadableEvent;
@@ -24,53 +25,50 @@ class WritableEvent;
24 25
25namespace Service::Nvidia { 26namespace Service::Nvidia {
26class Module; 27class Module;
27} 28} // namespace Service::Nvidia
29
30namespace Service::VI {
31class Display;
32class Layer;
33} // namespace Service::VI
28 34
29namespace Service::NVFlinger { 35namespace Service::NVFlinger {
30 36
31class BufferQueue; 37class BufferQueue;
32 38
33struct Layer {
34 Layer(u64 id, std::shared_ptr<BufferQueue> queue);
35 ~Layer();
36
37 u64 id;
38 std::shared_ptr<BufferQueue> buffer_queue;
39};
40
41struct Display {
42 Display(u64 id, std::string name);
43 ~Display();
44
45 u64 id;
46 std::string name;
47
48 std::vector<Layer> layers;
49 Kernel::EventPair vsync_event;
50};
51
52class NVFlinger final { 39class NVFlinger final {
53public: 40public:
54 NVFlinger(); 41 explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
55 ~NVFlinger(); 42 ~NVFlinger();
56 43
57 /// Sets the NVDrv module instance to use to send buffers to the GPU. 44 /// Sets the NVDrv module instance to use to send buffers to the GPU.
58 void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance); 45 void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);
59 46
60 /// Opens the specified display and returns the ID. 47 /// Opens the specified display and returns the ID.
61 u64 OpenDisplay(std::string_view name); 48 ///
49 /// If an invalid display name is provided, then an empty optional is returned.
50 std::optional<u64> OpenDisplay(std::string_view name);
62 51
63 /// Creates a layer on the specified display and returns the layer ID. 52 /// Creates a layer on the specified display and returns the layer ID.
64 u64 CreateLayer(u64 display_id); 53 ///
54 /// If an invalid display ID is specified, then an empty optional is returned.
55 std::optional<u64> CreateLayer(u64 display_id);
65 56
66 /// Finds the buffer queue ID of the specified layer in the specified display. 57 /// Finds the buffer queue ID of the specified layer in the specified display.
67 u32 FindBufferQueueId(u64 display_id, u64 layer_id) const; 58 ///
59 /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
60 std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;
68 61
69 /// Gets the vsync event for the specified display. 62 /// Gets the vsync event for the specified display.
70 Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id); 63 ///
64 /// If an invalid display ID is provided, then nullptr is returned.
65 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
71 66
72 /// Obtains a buffer queue identified by the ID. 67 /// Obtains a buffer queue identified by the ID.
73 std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; 68 BufferQueue& FindBufferQueue(u32 id);
69
70 /// Obtains a buffer queue identified by the ID.
71 const BufferQueue& FindBufferQueue(u32 id) const;
74 72
75 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 73 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
76 /// finished. 74 /// finished.
@@ -78,27 +76,21 @@ public:
78 76
79private: 77private:
80 /// Finds the display identified by the specified ID. 78 /// Finds the display identified by the specified ID.
81 Display& FindDisplay(u64 display_id); 79 VI::Display* FindDisplay(u64 display_id);
82 80
83 /// Finds the display identified by the specified ID. 81 /// Finds the display identified by the specified ID.
84 const Display& FindDisplay(u64 display_id) const; 82 const VI::Display* FindDisplay(u64 display_id) const;
85 83
86 /// Finds the layer identified by the specified ID in the desired display. 84 /// Finds the layer identified by the specified ID in the desired display.
87 Layer& FindLayer(u64 display_id, u64 layer_id); 85 VI::Layer* FindLayer(u64 display_id, u64 layer_id);
88 86
89 /// Finds the layer identified by the specified ID in the desired display. 87 /// Finds the layer identified by the specified ID in the desired display.
90 const Layer& FindLayer(u64 display_id, u64 layer_id) const; 88 const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
91 89
92 std::shared_ptr<Nvidia::Module> nvdrv; 90 std::shared_ptr<Nvidia::Module> nvdrv;
93 91
94 std::array<Display, 5> displays{{ 92 std::vector<VI::Display> displays;
95 {0, "Default"}, 93 std::vector<BufferQueue> buffer_queues;
96 {1, "External"},
97 {2, "Edid"},
98 {3, "Internal"},
99 {4, "Null"},
100 }};
101 std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
102 94
103 /// Id to use for the next layer that is created, this counter is shared among all displays. 95 /// Id to use for the next layer that is created, this counter is shared among all displays.
104 u64 next_layer_id = 1; 96 u64 next_layer_id = 1;
@@ -106,8 +98,11 @@ private:
106 /// layers. 98 /// layers.
107 u32 next_buffer_queue_id = 1; 99 u32 next_buffer_queue_id = 1;
108 100
109 /// CoreTiming event that handles screen composition. 101 /// Event that handles screen composition.
110 CoreTiming::EventType* composition_event; 102 Core::Timing::EventType* composition_event;
103
104 /// Core timing instance for registering/unregistering the composition event.
105 Core::Timing::CoreTiming& core_timing;
111}; 106};
112 107
113} // namespace Service::NVFlinger 108} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index d25b80ab0..00806b0ed 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -11,7 +11,6 @@
11#include "core/hle/ipc.h" 11#include "core/hle/ipc.h"
12#include "core/hle/ipc_helpers.h" 12#include "core/hle/ipc_helpers.h"
13#include "core/hle/kernel/client_port.h" 13#include "core/hle/kernel/client_port.h"
14#include "core/hle/kernel/handle_table.h"
15#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/server_port.h" 16#include "core/hle/kernel/server_port.h"
@@ -76,7 +75,8 @@ namespace Service {
76 * Creates a function string for logging, complete with the name (or header code, depending 75 * Creates a function string for logging, complete with the name (or header code, depending
77 * on what's passed in) the port name, and all the cmd_buff arguments. 76 * on what's passed in) the port name, and all the cmd_buff arguments.
78 */ 77 */
79[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name, 78[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
79 std::string_view port_name,
80 const u32* cmd_buff) { 80 const u32* cmd_buff) {
81 // Number of params == bits 0-5 + bits 6-11 81 // Number of params == bits 0-5 + bits 6-11
82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); 82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +158,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
158 return ReportUnimplementedFunction(ctx, info); 158 return ReportUnimplementedFunction(ctx, info);
159 } 159 }
160 160
161 LOG_TRACE( 161 LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
162 Service, "{}",
163 MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
164 handler_invoker(this, info->handler_callback, ctx); 162 handler_invoker(this, info->handler_callback, ctx);
165} 163}
166 164
@@ -169,7 +167,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
169 case IPC::CommandType::Close: { 167 case IPC::CommandType::Close: {
170 IPC::ResponseBuilder rb{context, 2}; 168 IPC::ResponseBuilder rb{context, 2};
171 rb.Push(RESULT_SUCCESS); 169 rb.Push(RESULT_SUCCESS);
172 return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead); 170 return IPC::ERR_REMOTE_PROCESS_DEAD;
173 } 171 }
174 case IPC::CommandType::ControlWithContext: 172 case IPC::CommandType::ControlWithContext:
175 case IPC::CommandType::Control: { 173 case IPC::CommandType::Control: {
@@ -194,10 +192,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
194// Module interface 192// Module interface
195 193
196/// Initialize ServiceManager 194/// Initialize ServiceManager
197void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) { 195void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
196 FileSys::VfsFilesystem& vfs) {
198 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it 197 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
199 // here and pass it into the respective InstallInterfaces functions. 198 // here and pass it into the respective InstallInterfaces functions.
200 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(); 199 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
201 200
202 SM::ServiceManager::InstallInterfaces(sm); 201 SM::ServiceManager::InstallInterfaces(sm);
203 202
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 029533628..830790269 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -14,6 +14,14 @@
14//////////////////////////////////////////////////////////////////////////////////////////////////// 14////////////////////////////////////////////////////////////////////////////////////////////////////
15// Namespace Service 15// Namespace Service
16 16
17namespace Core {
18class System;
19}
20
21namespace FileSys {
22class VfsFilesystem;
23}
24
17namespace Kernel { 25namespace Kernel {
18class ClientPort; 26class ClientPort;
19class ServerPort; 27class ServerPort;
@@ -21,10 +29,6 @@ class ServerSession;
21class HLERequestContext; 29class HLERequestContext;
22} // namespace Kernel 30} // namespace Kernel
23 31
24namespace FileSys {
25class VfsFilesystem;
26}
27
28namespace Service { 32namespace Service {
29 33
30namespace SM { 34namespace SM {
@@ -178,7 +182,8 @@ private:
178}; 182};
179 183
180/// Initialize ServiceManager 184/// Initialize ServiceManager
181void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs); 185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
186 FileSys::VfsFilesystem& vfs);
182 187
183/// Shutdown ServiceManager 188/// Shutdown ServiceManager
184void Shutdown(); 189void Shutdown();
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index bef25433e..b9d6381b4 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -67,7 +67,7 @@ public:
67 if (port == nullptr) { 67 if (port == nullptr) {
68 return nullptr; 68 return nullptr;
69 } 69 }
70 return std::static_pointer_cast<T>(port->hle_handler); 70 return std::static_pointer_cast<T>(port->GetHLEHandler());
71 } 71 }
72 72
73 void InvokeControlRequest(Kernel::HLERequestContext& context); 73 void InvokeControlRequest(Kernel::HLERequestContext& context);
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index c13640ad8..aa115935d 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -5,6 +5,7 @@
5#include <chrono> 5#include <chrono>
6#include <ctime> 6#include <ctime>
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/core_timing_util.h" 10#include "core/core_timing_util.h"
10#include "core/hle/ipc_helpers.h" 11#include "core/hle/ipc_helpers.h"
@@ -106,8 +107,9 @@ private:
106 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { 107 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
107 LOG_DEBUG(Service_Time, "called"); 108 LOG_DEBUG(Service_Time, "called");
108 109
109 SteadyClockTimePoint steady_clock_time_point{ 110 const auto& core_timing = Core::System::GetInstance().CoreTiming();
110 CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000}; 111 const SteadyClockTimePoint steady_clock_time_point{
112 Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
111 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; 113 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
112 rb.Push(RESULT_SUCCESS); 114 rb.Push(RESULT_SUCCESS);
113 rb.PushRaw(steady_clock_time_point); 115 rb.PushRaw(steady_clock_time_point);
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
281 return; 283 return;
282 } 284 }
283 285
286 const auto& core_timing = Core::System::GetInstance().CoreTiming();
284 const SteadyClockTimePoint steady_clock_time_point{ 287 const SteadyClockTimePoint steady_clock_time_point{
285 CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}}; 288 Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};
286 289
287 CalendarTime calendar_time{}; 290 CalendarTime calendar_time{};
288 calendar_time.year = tm->tm_year + 1900; 291 calendar_time.year = tm->tm_year + 1900;
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
new file mode 100644
index 000000000..01d80311b
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -0,0 +1,71 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "core/core.h"
12#include "core/hle/kernel/readable_event.h"
13#include "core/hle/service/vi/display/vi_display.h"
14#include "core/hle/service/vi/layer/vi_layer.h"
15
16namespace Service::VI {
17
18Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
19 auto& kernel = Core::System::GetInstance().Kernel();
20 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
21 fmt::format("Display VSync Event {}", id));
22}
23
24Display::~Display() = default;
25
26Layer& Display::GetLayer(std::size_t index) {
27 return layers.at(index);
28}
29
30const Layer& Display::GetLayer(std::size_t index) const {
31 return layers.at(index);
32}
33
34Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
35 return vsync_event.readable;
36}
37
38void Display::SignalVSyncEvent() {
39 vsync_event.writable->Signal();
40}
41
42void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
43 // TODO(Subv): Support more than 1 layer.
44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
45
46 layers.emplace_back(id, buffer_queue);
47}
48
49Layer* Display::FindLayer(u64 id) {
50 const auto itr = std::find_if(layers.begin(), layers.end(),
51 [id](const VI::Layer& layer) { return layer.GetID() == id; });
52
53 if (itr == layers.end()) {
54 return nullptr;
55 }
56
57 return &*itr;
58}
59
60const Layer* Display::FindLayer(u64 id) const {
61 const auto itr = std::find_if(layers.begin(), layers.end(),
62 [id](const VI::Layer& layer) { return layer.GetID() == id; });
63
64 if (itr == layers.end()) {
65 return nullptr;
66 }
67
68 return &*itr;
69}
70
71} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
new file mode 100644
index 000000000..2acd46ff8
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <vector>
9
10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12
13namespace Service::NVFlinger {
14class BufferQueue;
15}
16
17namespace Service::VI {
18
19class Layer;
20
21/// Represents a single display type
22class Display {
23public:
24 /// Constructs a display with a given unique ID and name.
25 ///
26 /// @param id The unique ID for this display.
27 /// @param name The name for this display.
28 ///
29 Display(u64 id, std::string name);
30 ~Display();
31
32 Display(const Display&) = delete;
33 Display& operator=(const Display&) = delete;
34
35 Display(Display&&) = default;
36 Display& operator=(Display&&) = default;
37
38 /// Gets the unique ID assigned to this display.
39 u64 GetID() const {
40 return id;
41 }
42
43 /// Gets the name of this display
44 const std::string& GetName() const {
45 return name;
46 }
47
48 /// Whether or not this display has any layers added to it.
49 bool HasLayers() const {
50 return !layers.empty();
51 }
52
53 /// Gets a layer for this display based off an index.
54 Layer& GetLayer(std::size_t index);
55
56 /// Gets a layer for this display based off an index.
57 const Layer& GetLayer(std::size_t index) const;
58
59 /// Gets the readable vsync event.
60 Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
61
62 /// Signals the internal vsync event.
63 void SignalVSyncEvent();
64
65 /// Creates and adds a layer to this display with the given ID.
66 ///
67 /// @param id The ID to assign to the created layer.
68 /// @param buffer_queue The buffer queue for the layer instance to use.
69 ///
70 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
71
72 /// Attempts to find a layer with the given ID.
73 ///
74 /// @param id The layer ID.
75 ///
76 /// @returns If found, the Layer instance with the given ID.
77 /// If not found, then nullptr is returned.
78 ///
79 Layer* FindLayer(u64 id);
80
81 /// Attempts to find a layer with the given ID.
82 ///
83 /// @param id The layer ID.
84 ///
85 /// @returns If found, the Layer instance with the given ID.
86 /// If not found, then nullptr is returned.
87 ///
88 const Layer* FindLayer(u64 id) const;
89
90private:
91 u64 id;
92 std::string name;
93
94 std::vector<Layer> layers;
95 Kernel::EventPair vsync_event;
96};
97
98} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp
new file mode 100644
index 000000000..954225c26
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/vi/layer/vi_layer.h"
6
7namespace Service::VI {
8
9Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
10
11Layer::~Layer() = default;
12
13} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h
new file mode 100644
index 000000000..c6bfd01f6
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -0,0 +1,52 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Service::NVFlinger {
10class BufferQueue;
11}
12
13namespace Service::VI {
14
15/// Represents a single display layer.
16class Layer {
17public:
18 /// Constructs a layer with a given ID and buffer queue.
19 ///
20 /// @param id The ID to assign to this layer.
21 /// @param queue The buffer queue for this layer to use.
22 ///
23 Layer(u64 id, NVFlinger::BufferQueue& queue);
24 ~Layer();
25
26 Layer(const Layer&) = delete;
27 Layer& operator=(const Layer&) = delete;
28
29 Layer(Layer&&) = default;
30 Layer& operator=(Layer&&) = delete;
31
32 /// Gets the ID for this layer.
33 u64 GetID() const {
34 return id;
35 }
36
37 /// Gets a reference to the buffer queue this layer is using.
38 NVFlinger::BufferQueue& GetBufferQueue() {
39 return buffer_queue;
40 }
41
42 /// Gets a const reference to the buffer queue this layer is using.
43 const NVFlinger::BufferQueue& GetBufferQueue() const {
44 return buffer_queue;
45 }
46
47private:
48 u64 id;
49 NVFlinger::BufferQueue& buffer_queue;
50};
51
52} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index fe08c38f2..566cd6006 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
24#include "core/hle/service/nvdrv/nvdrv.h" 24#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 25#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 26#include "core/hle/service/nvflinger/nvflinger.h"
27#include "core/hle/service/service.h"
27#include "core/hle/service/vi/vi.h" 28#include "core/hle/service/vi/vi.h"
28#include "core/hle/service/vi/vi_m.h" 29#include "core/hle/service/vi/vi_m.h"
29#include "core/hle/service/vi/vi_s.h" 30#include "core/hle/service/vi/vi_s.h"
@@ -33,7 +34,9 @@
33namespace Service::VI { 34namespace Service::VI {
34 35
35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; 36constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
37constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; 38constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
39constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
37 40
38struct DisplayInfo { 41struct DisplayInfo {
39 /// The name of this particular display. 42 /// The name of this particular display.
@@ -419,7 +422,7 @@ public:
419 u32_le fence_is_valid; 422 u32_le fence_is_valid;
420 std::array<Fence, 2> fences; 423 std::array<Fence, 2> fences;
421 424
422 MathUtil::Rectangle<int> GetCropRect() const { 425 Common::Rectangle<int> GetCropRect() const {
423 return {crop_left, crop_top, crop_right, crop_bottom}; 426 return {crop_left, crop_top, crop_right, crop_bottom};
424 } 427 }
425 }; 428 };
@@ -524,7 +527,7 @@ private:
524 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 527 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
525 static_cast<u32>(transaction), flags); 528 static_cast<u32>(transaction), flags);
526 529
527 auto buffer_queue = nv_flinger->FindBufferQueue(id); 530 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
528 531
529 if (transaction == TransactionId::Connect) { 532 if (transaction == TransactionId::Connect) {
530 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 533 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -537,7 +540,7 @@ private:
537 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 540 } else if (transaction == TransactionId::SetPreallocatedBuffer) {
538 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 541 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
539 542
540 buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); 543 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
541 544
542 IGBPSetPreallocatedBufferResponseParcel response{}; 545 IGBPSetPreallocatedBufferResponseParcel response{};
543 ctx.WriteBuffer(response.Serialize()); 546 ctx.WriteBuffer(response.Serialize());
@@ -545,7 +548,7 @@ private:
545 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 548 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
546 const u32 width{request.data.width}; 549 const u32 width{request.data.width};
547 const u32 height{request.data.height}; 550 const u32 height{request.data.height};
548 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 551 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
549 552
550 if (slot) { 553 if (slot) {
551 // Buffer is available 554 // Buffer is available
@@ -558,8 +561,8 @@ private:
558 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, 561 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
559 Kernel::ThreadWakeupReason reason) { 562 Kernel::ThreadWakeupReason reason) {
560 // Repeat TransactParcel DequeueBuffer when a buffer is available 563 // Repeat TransactParcel DequeueBuffer when a buffer is available
561 auto buffer_queue = nv_flinger->FindBufferQueue(id); 564 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
562 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 565 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
563 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 566 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
564 567
565 IGBPDequeueBufferResponseParcel response{*slot}; 568 IGBPDequeueBufferResponseParcel response{*slot};
@@ -567,28 +570,28 @@ private:
567 IPC::ResponseBuilder rb{ctx, 2}; 570 IPC::ResponseBuilder rb{ctx, 2};
568 rb.Push(RESULT_SUCCESS); 571 rb.Push(RESULT_SUCCESS);
569 }, 572 },
570 buffer_queue->GetWritableBufferWaitEvent()); 573 buffer_queue.GetWritableBufferWaitEvent());
571 } 574 }
572 } else if (transaction == TransactionId::RequestBuffer) { 575 } else if (transaction == TransactionId::RequestBuffer) {
573 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 576 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
574 577
575 auto& buffer = buffer_queue->RequestBuffer(request.slot); 578 auto& buffer = buffer_queue.RequestBuffer(request.slot);
576 579
577 IGBPRequestBufferResponseParcel response{buffer}; 580 IGBPRequestBufferResponseParcel response{buffer};
578 ctx.WriteBuffer(response.Serialize()); 581 ctx.WriteBuffer(response.Serialize());
579 } else if (transaction == TransactionId::QueueBuffer) { 582 } else if (transaction == TransactionId::QueueBuffer) {
580 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 583 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
581 584
582 buffer_queue->QueueBuffer(request.data.slot, request.data.transform, 585 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
583 request.data.GetCropRect()); 586 request.data.GetCropRect());
584 587
585 IGBPQueueBufferResponseParcel response{1280, 720}; 588 IGBPQueueBufferResponseParcel response{1280, 720};
586 ctx.WriteBuffer(response.Serialize()); 589 ctx.WriteBuffer(response.Serialize());
587 } else if (transaction == TransactionId::Query) { 590 } else if (transaction == TransactionId::Query) {
588 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 591 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
589 592
590 u32 value = 593 const u32 value =
591 buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); 594 buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
592 595
593 IGBPQueryResponseParcel response{value}; 596 IGBPQueryResponseParcel response{value};
594 ctx.WriteBuffer(response.Serialize()); 597 ctx.WriteBuffer(response.Serialize());
@@ -628,12 +631,12 @@ private:
628 631
629 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 632 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
630 633
631 const auto buffer_queue = nv_flinger->FindBufferQueue(id); 634 const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
632 635
633 // TODO(Subv): Find out what this actually is. 636 // TODO(Subv): Find out what this actually is.
634 IPC::ResponseBuilder rb{ctx, 2, 1}; 637 IPC::ResponseBuilder rb{ctx, 2, 1};
635 rb.Push(RESULT_SUCCESS); 638 rb.Push(RESULT_SUCCESS);
636 rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); 639 rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
637 } 640 }
638 641
639 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 642 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -751,6 +754,7 @@ public:
751 {1102, nullptr, "GetDisplayResolution"}, 754 {1102, nullptr, "GetDisplayResolution"},
752 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"}, 755 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
753 {2011, nullptr, "DestroyManagedLayer"}, 756 {2011, nullptr, "DestroyManagedLayer"},
757 {2012, nullptr, "CreateStrayLayer"},
754 {2050, nullptr, "CreateIndirectLayer"}, 758 {2050, nullptr, "CreateIndirectLayer"},
755 {2051, nullptr, "DestroyIndirectLayer"}, 759 {2051, nullptr, "DestroyIndirectLayer"},
756 {2052, nullptr, "CreateIndirectProducerEndPoint"}, 760 {2052, nullptr, "CreateIndirectProducerEndPoint"},
@@ -838,11 +842,16 @@ private:
838 "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}", 842 "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
839 unknown, display, aruid); 843 unknown, display, aruid);
840 844
841 const u64 layer_id = nv_flinger->CreateLayer(display); 845 const auto layer_id = nv_flinger->CreateLayer(display);
846 if (!layer_id) {
847 IPC::ResponseBuilder rb{ctx, 2};
848 rb.Push(ERR_NOT_FOUND);
849 return;
850 }
842 851
843 IPC::ResponseBuilder rb{ctx, 4}; 852 IPC::ResponseBuilder rb{ctx, 4};
844 rb.Push(RESULT_SUCCESS); 853 rb.Push(RESULT_SUCCESS);
845 rb.Push(layer_id); 854 rb.Push(*layer_id);
846 } 855 }
847 856
848 void AddToLayerStack(Kernel::HLERequestContext& ctx) { 857 void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -950,9 +959,16 @@ private:
950 959
951 ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet"); 960 ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");
952 961
962 const auto display_id = nv_flinger->OpenDisplay(name);
963 if (!display_id) {
964 IPC::ResponseBuilder rb{ctx, 2};
965 rb.Push(ERR_NOT_FOUND);
966 return;
967 }
968
953 IPC::ResponseBuilder rb{ctx, 4}; 969 IPC::ResponseBuilder rb{ctx, 4};
954 rb.Push(RESULT_SUCCESS); 970 rb.Push(RESULT_SUCCESS);
955 rb.Push<u64>(nv_flinger->OpenDisplay(name)); 971 rb.Push<u64>(*display_id);
956 } 972 }
957 973
958 void CloseDisplay(Kernel::HLERequestContext& ctx) { 974 void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1043,10 +1059,21 @@ private:
1043 1059
1044 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid); 1060 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);
1045 1061
1046 const u64 display_id = nv_flinger->OpenDisplay(display_name); 1062 const auto display_id = nv_flinger->OpenDisplay(display_name);
1047 const u32 buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, layer_id); 1063 if (!display_id) {
1064 IPC::ResponseBuilder rb{ctx, 2};
1065 rb.Push(ERR_NOT_FOUND);
1066 return;
1067 }
1068
1069 const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
1070 if (!buffer_queue_id) {
1071 IPC::ResponseBuilder rb{ctx, 2};
1072 rb.Push(ERR_NOT_FOUND);
1073 return;
1074 }
1048 1075
1049 NativeWindow native_window{buffer_queue_id}; 1076 NativeWindow native_window{*buffer_queue_id};
1050 IPC::ResponseBuilder rb{ctx, 4}; 1077 IPC::ResponseBuilder rb{ctx, 4};
1051 rb.Push(RESULT_SUCCESS); 1078 rb.Push(RESULT_SUCCESS);
1052 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1079 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1062,13 +1089,24 @@ private:
1062 1089
1063 // TODO(Subv): What's the difference between a Stray and a Managed layer? 1090 // TODO(Subv): What's the difference between a Stray and a Managed layer?
1064 1091
1065 const u64 layer_id = nv_flinger->CreateLayer(display_id); 1092 const auto layer_id = nv_flinger->CreateLayer(display_id);
1066 const u32 buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, layer_id); 1093 if (!layer_id) {
1094 IPC::ResponseBuilder rb{ctx, 2};
1095 rb.Push(ERR_NOT_FOUND);
1096 return;
1097 }
1098
1099 const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
1100 if (!buffer_queue_id) {
1101 IPC::ResponseBuilder rb{ctx, 2};
1102 rb.Push(ERR_NOT_FOUND);
1103 return;
1104 }
1067 1105
1068 NativeWindow native_window{buffer_queue_id}; 1106 NativeWindow native_window{*buffer_queue_id};
1069 IPC::ResponseBuilder rb{ctx, 6}; 1107 IPC::ResponseBuilder rb{ctx, 6};
1070 rb.Push(RESULT_SUCCESS); 1108 rb.Push(RESULT_SUCCESS);
1071 rb.Push(layer_id); 1109 rb.Push(*layer_id);
1072 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1110 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
1073 } 1111 }
1074 1112
@@ -1088,7 +1126,12 @@ private:
1088 1126
1089 LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id); 1127 LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);
1090 1128
1091 const auto vsync_event = nv_flinger->GetVsyncEvent(display_id); 1129 const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
1130 if (!vsync_event) {
1131 IPC::ResponseBuilder rb{ctx, 2};
1132 rb.Push(ERR_NOT_FOUND);
1133 return;
1134 }
1092 1135
1093 IPC::ResponseBuilder rb{ctx, 2, 1}; 1136 IPC::ResponseBuilder rb{ctx, 2, 1};
1094 rb.Push(RESULT_SUCCESS); 1137 rb.Push(RESULT_SUCCESS);
@@ -1162,26 +1205,40 @@ IApplicationDisplayService::IApplicationDisplayService(
1162 RegisterHandlers(functions); 1205 RegisterHandlers(functions);
1163} 1206}
1164 1207
1165Module::Interface::Interface(std::shared_ptr<Module> module, const char* name, 1208static bool IsValidServiceAccess(Permission permission, Policy policy) {
1166 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 1209 if (permission == Permission::User) {
1167 : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {} 1210 return policy == Policy::User;
1211 }
1212
1213 if (permission == Permission::System || permission == Permission::Manager) {
1214 return policy == Policy::User || policy == Policy::Compositor;
1215 }
1168 1216
1169Module::Interface::~Interface() = default; 1217 return false;
1218}
1170 1219
1171void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) { 1220void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
1172 LOG_WARNING(Service_VI, "(STUBBED) called"); 1221 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
1222 Permission permission) {
1223 IPC::RequestParser rp{ctx};
1224 const auto policy = rp.PopEnum<Policy>();
1225
1226 if (!IsValidServiceAccess(permission, policy)) {
1227 IPC::ResponseBuilder rb{ctx, 2};
1228 rb.Push(ERR_PERMISSION_DENIED);
1229 return;
1230 }
1173 1231
1174 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 1232 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1175 rb.Push(RESULT_SUCCESS); 1233 rb.Push(RESULT_SUCCESS);
1176 rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger); 1234 rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
1177} 1235}
1178 1236
1179void InstallInterfaces(SM::ServiceManager& service_manager, 1237void InstallInterfaces(SM::ServiceManager& service_manager,
1180 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { 1238 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
1181 auto module = std::make_shared<Module>(); 1239 std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
1182 std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager); 1240 std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
1183 std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager); 1241 std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
1184 std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
1185} 1242}
1186 1243
1187} // namespace Service::VI 1244} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index e3963502a..6b66f8b81 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/service.h" 7#include <memory>
8#include "common/common_types.h"
9
10namespace Kernel {
11class HLERequestContext;
12}
8 13
9namespace Service::NVFlinger { 14namespace Service::NVFlinger {
10class NVFlinger; 15class NVFlinger;
11} 16}
12 17
18namespace Service::SM {
19class ServiceManager;
20}
21
13namespace Service::VI { 22namespace Service::VI {
14 23
15enum class DisplayResolution : u32 { 24enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
19 UndockedHeight = 720, 28 UndockedHeight = 720,
20}; 29};
21 30
22class Module final { 31/// Permission level for a particular VI service instance
23public: 32enum class Permission {
24 class Interface : public ServiceFramework<Interface> { 33 User,
25 public: 34 System,
26 explicit Interface(std::shared_ptr<Module> module, const char* name, 35 Manager,
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 36};
28 ~Interface() override;
29
30 void GetDisplayService(Kernel::HLERequestContext& ctx);
31 37
32 protected: 38/// A policy type that may be requested via GetDisplayService and
33 std::shared_ptr<Module> module; 39/// GetDisplayServiceWithProxyNameExchange
34 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 40enum class Policy {
35 }; 41 User,
42 Compositor,
36}; 43};
37 44
45namespace detail {
46void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
47 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
48} // namespace detail
49
38/// Registers all VI services with the specified service manager. 50/// Registers all VI services with the specified service manager.
39void InstallInterfaces(SM::ServiceManager& service_manager, 51void InstallInterfaces(SM::ServiceManager& service_manager,
40 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 52 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp
index 207c06b16..06070087f 100644
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_m.h" 7#include "core/hle/service/vi/vi_m.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {2, &VI_M::GetDisplayService, "GetDisplayService"}, 14 {2, &VI_M::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_M::~VI_M() = default; 20VI_M::~VI_M() = default;
19 21
22void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h
index 487d58d50..290e06689 100644
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_M final : public Module::Interface { 19class VI_M final : public ServiceFramework<VI_M> {
12public: 20public:
13 explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_M() override; 22 ~VI_M() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp
index 920e6a1f6..57c596cc4 100644
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_s.h" 7#include "core/hle/service/vi/vi_s.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {1, &VI_S::GetDisplayService, "GetDisplayService"}, 14 {1, &VI_S::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_S::~VI_S() = default; 20VI_S::~VI_S() = default;
19 21
22void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h
index bbc31148f..47804dc0b 100644
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_S final : public Module::Interface { 19class VI_S final : public ServiceFramework<VI_S> {
12public: 20public:
13 explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_S() override; 22 ~VI_S() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp
index d81e410d6..9d5ceb608 100644
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_u.h" 7#include "core/hle/service/vi/vi_u.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {0, &VI_U::GetDisplayService, "GetDisplayService"}, 14 {0, &VI_U::GetDisplayService, "GetDisplayService"},
13 }; 15 };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
16 18
17VI_U::~VI_U() = default; 19VI_U::~VI_U() = default;
18 20
21void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
22 LOG_DEBUG(Service_VI, "called");
23
24 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
25}
26
19} // namespace Service::VI 27} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h
index b92f28c92..19bdb73b0 100644
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_U final : public Module::Interface { 19class VI_U final : public ServiceFramework<VI_U> {
12public: 20public:
13 explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_U() override; 22 ~VI_U() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6057c7f26..8b1920f22 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/file_util.h" 10#include "common/file_util.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
13#include "core/hle/kernel/vm_manager.h" 14#include "core/hle/kernel/vm_manager.h"
14#include "core/loader/elf.h" 15#include "core/loader/elf.h"
diff --git a/src/core/loader/linker.cpp b/src/core/loader/linker.cpp
deleted file mode 100644
index 57ca8c3ee..000000000
--- a/src/core/loader/linker.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_funcs.h"
8#include "common/logging/log.h"
9#include "common/swap.h"
10#include "core/loader/linker.h"
11#include "core/memory.h"
12
13namespace Loader {
14
15enum class RelocationType : u32 { ABS64 = 257, GLOB_DAT = 1025, JUMP_SLOT = 1026, RELATIVE = 1027 };
16
17enum DynamicType : u32 {
18 DT_NULL = 0,
19 DT_PLTRELSZ = 2,
20 DT_STRTAB = 5,
21 DT_SYMTAB = 6,
22 DT_RELA = 7,
23 DT_RELASZ = 8,
24 DT_STRSZ = 10,
25 DT_JMPREL = 23,
26};
27
28struct Elf64_Rela {
29 u64_le offset;
30 RelocationType type;
31 u32_le symbol;
32 s64_le addend;
33};
34static_assert(sizeof(Elf64_Rela) == 0x18, "Elf64_Rela has incorrect size.");
35
36struct Elf64_Dyn {
37 u64_le tag;
38 u64_le value;
39};
40static_assert(sizeof(Elf64_Dyn) == 0x10, "Elf64_Dyn has incorrect size.");
41
42struct Elf64_Sym {
43 u32_le name;
44 INSERT_PADDING_BYTES(0x2);
45 u16_le shndx;
46 u64_le value;
47 u64_le size;
48};
49static_assert(sizeof(Elf64_Sym) == 0x18, "Elf64_Sym has incorrect size.");
50
51void Linker::WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
52 u64 relocation_offset, u64 size, VAddr load_base) {
53 for (u64 i = 0; i < size; i += sizeof(Elf64_Rela)) {
54 Elf64_Rela rela;
55 std::memcpy(&rela, &program_image[relocation_offset + i], sizeof(Elf64_Rela));
56
57 const Symbol& symbol = symbols[rela.symbol];
58 switch (rela.type) {
59 case RelocationType::RELATIVE: {
60 const u64 value = load_base + rela.addend;
61 if (!symbol.name.empty()) {
62 exports[symbol.name] = value;
63 }
64 std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
65 break;
66 }
67 case RelocationType::JUMP_SLOT:
68 case RelocationType::GLOB_DAT:
69 if (!symbol.value) {
70 imports[symbol.name] = {rela.offset + load_base, 0};
71 } else {
72 exports[symbol.name] = symbol.value;
73 std::memcpy(&program_image[rela.offset], &symbol.value, sizeof(u64));
74 }
75 break;
76 case RelocationType::ABS64:
77 if (!symbol.value) {
78 imports[symbol.name] = {rela.offset + load_base, rela.addend};
79 } else {
80 const u64 value = symbol.value + rela.addend;
81 exports[symbol.name] = value;
82 std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
83 }
84 break;
85 default:
86 LOG_CRITICAL(Loader, "Unknown relocation type: {}", static_cast<int>(rela.type));
87 break;
88 }
89 }
90}
91
92void Linker::Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base) {
93 std::map<u64, u64> dynamic;
94 while (dynamic_section_offset < program_image.size()) {
95 Elf64_Dyn dyn;
96 std::memcpy(&dyn, &program_image[dynamic_section_offset], sizeof(Elf64_Dyn));
97 dynamic_section_offset += sizeof(Elf64_Dyn);
98
99 if (dyn.tag == DT_NULL) {
100 break;
101 }
102 dynamic[dyn.tag] = dyn.value;
103 }
104
105 u64 offset = dynamic[DT_SYMTAB];
106 std::vector<Symbol> symbols;
107 while (offset < program_image.size()) {
108 Elf64_Sym sym;
109 std::memcpy(&sym, &program_image[offset], sizeof(Elf64_Sym));
110 offset += sizeof(Elf64_Sym);
111
112 if (sym.name >= dynamic[DT_STRSZ]) {
113 break;
114 }
115
116 std::string name = reinterpret_cast<char*>(&program_image[dynamic[DT_STRTAB] + sym.name]);
117 if (sym.value) {
118 exports[name] = load_base + sym.value;
119 symbols.emplace_back(std::move(name), load_base + sym.value);
120 } else {
121 symbols.emplace_back(std::move(name), 0);
122 }
123 }
124
125 if (dynamic.find(DT_RELA) != dynamic.end()) {
126 WriteRelocations(program_image, symbols, dynamic[DT_RELA], dynamic[DT_RELASZ], load_base);
127 }
128
129 if (dynamic.find(DT_JMPREL) != dynamic.end()) {
130 WriteRelocations(program_image, symbols, dynamic[DT_JMPREL], dynamic[DT_PLTRELSZ],
131 load_base);
132 }
133}
134
135void Linker::ResolveImports() {
136 // Resolve imports
137 for (const auto& import : imports) {
138 const auto& search = exports.find(import.first);
139 if (search != exports.end()) {
140 Memory::Write64(import.second.ea, search->second + import.second.addend);
141 } else {
142 LOG_ERROR(Loader, "Unresolved import: {}", import.first);
143 }
144 }
145}
146
147} // namespace Loader
diff --git a/src/core/loader/linker.h b/src/core/loader/linker.h
deleted file mode 100644
index 107625837..000000000
--- a/src/core/loader/linker.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <string>
9#include "common/common_types.h"
10
11namespace Loader {
12
13class Linker {
14protected:
15 struct Symbol {
16 Symbol(std::string&& name, u64 value) : name(std::move(name)), value(value) {}
17 std::string name;
18 u64 value;
19 };
20
21 struct Import {
22 VAddr ea;
23 s64 addend;
24 };
25
26 void WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
27 u64 relocation_offset, u64 size, VAddr load_base);
28 void Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base);
29
30 void ResolveImports();
31
32 std::map<std::string, Import> imports;
33 std::map<std::string, VAddr> exports;
34};
35
36} // namespace Loader
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 4fad0c0dd..5de02a94b 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -14,6 +14,7 @@
14#include "core/file_sys/romfs_factory.h" 14#include "core/file_sys/romfs_factory.h"
15#include "core/file_sys/vfs_offset.h" 15#include "core/file_sys/vfs_offset.h"
16#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
17#include "core/hle/kernel/code_set.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
19#include "core/hle/service/filesystem/filesystem.h" 20#include "core/hle/service/filesystem/filesystem.h"
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 013d629c0..85b0ed644 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <string> 8#include <string>
8#include <vector> 9#include <vector>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "core/loader/linker.h"
11#include "core/loader/loader.h" 11#include "core/loader/loader.h"
12 12
13namespace FileSys { 13namespace FileSys {
@@ -21,7 +21,7 @@ class Process;
21namespace Loader { 21namespace Loader {
22 22
23/// Loads an NRO file 23/// Loads an NRO file
24class AppLoader_NRO final : public AppLoader, Linker { 24class AppLoader_NRO final : public AppLoader {
25public: 25public:
26 explicit AppLoader_NRO(FileSys::VirtualFile file); 26 explicit AppLoader_NRO(FileSys::VirtualFile file);
27 ~AppLoader_NRO() override; 27 ~AppLoader_NRO() override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 6ded0b707..e1c8908a1 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -11,6 +11,7 @@
11#include "common/swap.h" 11#include "common/swap.h"
12#include "core/file_sys/patch_manager.h" 12#include "core/file_sys/patch_manager.h"
13#include "core/gdbstub/gdbstub.h" 13#include "core/gdbstub/gdbstub.h"
14#include "core/hle/kernel/code_set.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/vm_manager.h" 16#include "core/hle/kernel/vm_manager.h"
16#include "core/loader/nso.h" 17#include "core/loader/nso.h"
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 135b6ea5a..167c8a694 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -6,8 +6,8 @@
6 6
7#include <optional> 7#include <optional>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "common/swap.h"
9#include "core/file_sys/patch_manager.h" 10#include "core/file_sys/patch_manager.h"
10#include "core/loader/linker.h"
11#include "core/loader/loader.h" 11#include "core/loader/loader.h"
12 12
13namespace Kernel { 13namespace Kernel {
@@ -26,7 +26,7 @@ struct NSOArgumentHeader {
26static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size."); 26static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size.");
27 27
28/// Loads an NSO file 28/// Loads an NSO file
29class AppLoader_NSO final : public AppLoader, Linker { 29class AppLoader_NSO final : public AppLoader {
30public: 30public:
31 explicit AppLoader_NSO(FileSys::VirtualFile file); 31 explicit AppLoader_NSO(FileSys::VirtualFile file);
32 32
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..365ac82b4 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/page_table.h"
13#include "common/swap.h" 14#include "common/swap.h"
14#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
15#include "core/core.h" 16#include "core/core.h"
@@ -18,13 +19,14 @@
18#include "core/hle/lock.h" 19#include "core/hle/lock.h"
19#include "core/memory.h" 20#include "core/memory.h"
20#include "core/memory_setup.h" 21#include "core/memory_setup.h"
22#include "video_core/gpu.h"
21#include "video_core/renderer_base.h" 23#include "video_core/renderer_base.h"
22 24
23namespace Memory { 25namespace Memory {
24 26
25static PageTable* current_page_table = nullptr; 27static Common::PageTable* current_page_table = nullptr;
26 28
27void SetCurrentPageTable(PageTable* page_table) { 29void SetCurrentPageTable(Common::PageTable* page_table) {
28 current_page_table = page_table; 30 current_page_table = page_table;
29 31
30 auto& system = Core::System::GetInstance(); 32 auto& system = Core::System::GetInstance();
@@ -36,88 +38,80 @@ void SetCurrentPageTable(PageTable* page_table) {
36 } 38 }
37} 39}
38 40
39PageTable* GetCurrentPageTable() { 41Common::PageTable* GetCurrentPageTable() {
40 return current_page_table; 42 return current_page_table;
41} 43}
42 44
43PageTable::PageTable() = default; 45static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
44 46 Common::PageType type) {
45PageTable::PageTable(std::size_t address_space_width_in_bits) {
46 Resize(address_space_width_in_bits);
47}
48
49PageTable::~PageTable() = default;
50
51void PageTable::Resize(std::size_t address_space_width_in_bits) {
52 const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
53
54 pointers.resize(num_page_table_entries);
55 attributes.resize(num_page_table_entries);
56
57 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
58 // vector size is subsequently decreased (via resize), the vector might not automatically
59 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
60 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
61
62 pointers.shrink_to_fit();
63 attributes.shrink_to_fit();
64}
65
66static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 47 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
68 (base + size) * PAGE_SIZE); 48 (base + size) * PAGE_SIZE);
69 49
70 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, 50 // During boot, current_page_table might not be set yet, in which case we need not flush
71 FlushMode::FlushAndInvalidate); 51 if (current_page_table) {
52 Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
53 size * PAGE_SIZE);
54 }
72 55
73 VAddr end = base + size; 56 VAddr end = base + size;
74 while (base != end) { 57 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 58 base + page_table.pointers.size());
76 59
77 page_table.attributes[base] = type; 60 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
78 page_table.pointers[base] = memory;
79 61
80 base += 1; 62 if (memory == nullptr) {
81 if (memory != nullptr) 63 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
64 } else {
65 while (base != end) {
66 page_table.pointers[base] = memory;
67
68 base += 1;
82 memory += PAGE_SIZE; 69 memory += PAGE_SIZE;
70 }
83 } 71 }
84} 72}
85 73
86void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) { 74void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
87 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 75 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
88 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 76 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
89 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory); 77 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
90} 78}
91 79
92void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) { 80void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
81 Common::MemoryHookPointer mmio_handler) {
93 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 82 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
94 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 83 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
95 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special); 84 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
96 85
97 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 86 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
98 SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)}; 87 Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
99 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 88 page_table.special_regions.add(
89 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
100} 90}
101 91
102void UnmapRegion(PageTable& page_table, VAddr base, u64 size) { 92void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
103 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 93 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
104 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 94 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
105 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped); 95 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
106 96
107 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 97 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
108 page_table.special_regions.erase(interval); 98 page_table.special_regions.erase(interval);
109} 99}
110 100
111void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 101void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
102 Common::MemoryHookPointer hook) {
112 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 103 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
113 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 104 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
114 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 105 page_table.special_regions.add(
106 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
115} 107}
116 108
117void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 109void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
110 Common::MemoryHookPointer hook) {
118 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 111 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
119 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 112 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
120 page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region})); 113 page_table.special_regions.subtract(
114 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
121} 115}
122 116
123/** 117/**
@@ -166,22 +160,19 @@ T Read(const VAddr vaddr) {
166 return value; 160 return value;
167 } 161 }
168 162
169 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state 163 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
170 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
171
172 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
173 switch (type) { 164 switch (type) {
174 case PageType::Unmapped: 165 case Common::PageType::Unmapped:
175 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); 166 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
176 return 0; 167 return 0;
177 case PageType::Memory: 168 case Common::PageType::Memory:
178 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 169 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
179 break; 170 break;
180 case PageType::RasterizerCachedMemory: { 171 case Common::PageType::RasterizerCachedMemory: {
181 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); 172 auto host_ptr{GetPointerFromVMA(vaddr)};
182 173 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
183 T value; 174 T value;
184 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); 175 std::memcpy(&value, host_ptr, sizeof(T));
185 return value; 176 return value;
186 } 177 }
187 default: 178 default:
@@ -199,21 +190,19 @@ void Write(const VAddr vaddr, const T data) {
199 return; 190 return;
200 } 191 }
201 192
202 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state 193 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
203 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
204
205 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
206 switch (type) { 194 switch (type) {
207 case PageType::Unmapped: 195 case Common::PageType::Unmapped:
208 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 196 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
209 static_cast<u32>(data), vaddr); 197 static_cast<u32>(data), vaddr);
210 return; 198 return;
211 case PageType::Memory: 199 case Common::PageType::Memory:
212 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 200 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
213 break; 201 break;
214 case PageType::RasterizerCachedMemory: { 202 case Common::PageType::RasterizerCachedMemory: {
215 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); 203 auto host_ptr{GetPointerFromVMA(vaddr)};
216 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); 204 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
205 std::memcpy(host_ptr, &data, sizeof(T));
217 break; 206 break;
218 } 207 }
219 default: 208 default:
@@ -228,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
228 if (page_pointer) 217 if (page_pointer)
229 return true; 218 return true;
230 219
231 if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) 220 if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
232 return true; 221 return true;
233 222
234 if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) 223 if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
235 return false; 224 return false;
236 225
237 return false; 226 return false;
@@ -251,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
251 return page_pointer + (vaddr & PAGE_MASK); 240 return page_pointer + (vaddr & PAGE_MASK);
252 } 241 }
253 242
254 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) { 243 if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
244 Common::PageType::RasterizerCachedMemory) {
255 return GetPointerFromVMA(vaddr); 245 return GetPointerFromVMA(vaddr);
256 } 246 }
257 247
@@ -285,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
285 275
286 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; 276 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
287 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { 277 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
288 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; 278 Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
289 279
290 if (cached) { 280 if (cached) {
291 // Switch page type to cached if now cached 281 // Switch page type to cached if now cached
292 switch (page_type) { 282 switch (page_type) {
293 case PageType::Unmapped: 283 case Common::PageType::Unmapped:
294 // It is not necessary for a process to have this region mapped into its address 284 // It is not necessary for a process to have this region mapped into its address
295 // space, for example, a system module need not have a VRAM mapping. 285 // space, for example, a system module need not have a VRAM mapping.
296 break; 286 break;
297 case PageType::Memory: 287 case Common::PageType::Memory:
298 page_type = PageType::RasterizerCachedMemory; 288 page_type = Common::PageType::RasterizerCachedMemory;
299 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; 289 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
300 break; 290 break;
301 case PageType::RasterizerCachedMemory: 291 case Common::PageType::RasterizerCachedMemory:
302 // There can be more than one GPU region mapped per CPU region, so it's common that 292 // There can be more than one GPU region mapped per CPU region, so it's common that
303 // this area is already marked as cached. 293 // this area is already marked as cached.
304 break; 294 break;
@@ -308,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
308 } else { 298 } else {
309 // Switch page type to uncached if now uncached 299 // Switch page type to uncached if now uncached
310 switch (page_type) { 300 switch (page_type) {
311 case PageType::Unmapped: 301 case Common::PageType::Unmapped:
312 // It is not necessary for a process to have this region mapped into its address 302 // It is not necessary for a process to have this region mapped into its address
313 // space, for example, a system module need not have a VRAM mapping. 303 // space, for example, a system module need not have a VRAM mapping.
314 break; 304 break;
315 case PageType::Memory: 305 case Common::PageType::Memory:
316 // There can be more than one GPU region mapped per CPU region, so it's common that 306 // There can be more than one GPU region mapped per CPU region, so it's common that
317 // this area is already unmarked as cached. 307 // this area is already unmarked as cached.
318 break; 308 break;
319 case PageType::RasterizerCachedMemory: { 309 case Common::PageType::RasterizerCachedMemory: {
320 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); 310 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
321 if (pointer == nullptr) { 311 if (pointer == nullptr) {
322 // It's possible that this function has been called while updating the pagetable 312 // It's possible that this function has been called while updating the pagetable
323 // after unmapping a VMA. In that case the underlying VMA will no longer exist, 313 // after unmapping a VMA. In that case the underlying VMA will no longer exist,
324 // and we should just leave the pagetable entry blank. 314 // and we should just leave the pagetable entry blank.
325 page_type = PageType::Unmapped; 315 page_type = Common::PageType::Unmapped;
326 } else { 316 } else {
327 page_type = PageType::Memory; 317 page_type = Common::PageType::Memory;
328 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; 318 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
329 } 319 }
330 break; 320 break;
@@ -336,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
336 } 326 }
337} 327}
338 328
339void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
340 auto& system_instance = Core::System::GetInstance();
341
342 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
343 // null here
344 if (!system_instance.IsPoweredOn()) {
345 return;
346 }
347
348 const VAddr end = start + size;
349
350 const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
351 if (start >= region_end || end <= region_start) {
352 // No overlap with region
353 return;
354 }
355
356 const VAddr overlap_start = std::max(start, region_start);
357 const VAddr overlap_end = std::min(end, region_end);
358 const VAddr overlap_size = overlap_end - overlap_start;
359
360 auto& rasterizer = system_instance.Renderer().Rasterizer();
361 switch (mode) {
362 case FlushMode::Flush:
363 rasterizer.FlushRegion(overlap_start, overlap_size);
364 break;
365 case FlushMode::Invalidate:
366 rasterizer.InvalidateRegion(overlap_start, overlap_size);
367 break;
368 case FlushMode::FlushAndInvalidate:
369 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
370 break;
371 }
372 };
373
374 const auto& vm_manager = Core::CurrentProcess()->VMManager();
375
376 CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
377 CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
378}
379
380u8 Read8(const VAddr addr) { 329u8 Read8(const VAddr addr) {
381 return Read<u8>(addr); 330 return Read<u8>(addr);
382} 331}
@@ -407,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
407 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 356 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
408 357
409 switch (page_table.attributes[page_index]) { 358 switch (page_table.attributes[page_index]) {
410 case PageType::Unmapped: { 359 case Common::PageType::Unmapped: {
411 LOG_ERROR(HW_Memory, 360 LOG_ERROR(HW_Memory,
412 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 361 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
413 current_vaddr, src_addr, size); 362 current_vaddr, src_addr, size);
414 std::memset(dest_buffer, 0, copy_amount); 363 std::memset(dest_buffer, 0, copy_amount);
415 break; 364 break;
416 } 365 }
417 case PageType::Memory: { 366 case Common::PageType::Memory: {
418 DEBUG_ASSERT(page_table.pointers[page_index]); 367 DEBUG_ASSERT(page_table.pointers[page_index]);
419 368
420 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 369 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
421 std::memcpy(dest_buffer, src_ptr, copy_amount); 370 std::memcpy(dest_buffer, src_ptr, copy_amount);
422 break; 371 break;
423 } 372 }
424 case PageType::RasterizerCachedMemory: { 373 case Common::PageType::RasterizerCachedMemory: {
425 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 374 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
426 FlushMode::Flush); 375 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
427 std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); 376 std::memcpy(dest_buffer, host_ptr, copy_amount);
428 break; 377 break;
429 } 378 }
430 default: 379 default:
@@ -471,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
471 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 420 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
472 421
473 switch (page_table.attributes[page_index]) { 422 switch (page_table.attributes[page_index]) {
474 case PageType::Unmapped: { 423 case Common::PageType::Unmapped: {
475 LOG_ERROR(HW_Memory, 424 LOG_ERROR(HW_Memory,
476 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 425 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
477 current_vaddr, dest_addr, size); 426 current_vaddr, dest_addr, size);
478 break; 427 break;
479 } 428 }
480 case PageType::Memory: { 429 case Common::PageType::Memory: {
481 DEBUG_ASSERT(page_table.pointers[page_index]); 430 DEBUG_ASSERT(page_table.pointers[page_index]);
482 431
483 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 432 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
484 std::memcpy(dest_ptr, src_buffer, copy_amount); 433 std::memcpy(dest_ptr, src_buffer, copy_amount);
485 break; 434 break;
486 } 435 }
487 case PageType::RasterizerCachedMemory: { 436 case Common::PageType::RasterizerCachedMemory: {
488 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 437 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
489 FlushMode::Invalidate); 438 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
490 std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); 439 std::memcpy(host_ptr, src_buffer, copy_amount);
491 break; 440 break;
492 } 441 }
493 default: 442 default:
@@ -517,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
517 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 466 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
518 467
519 switch (page_table.attributes[page_index]) { 468 switch (page_table.attributes[page_index]) {
520 case PageType::Unmapped: { 469 case Common::PageType::Unmapped: {
521 LOG_ERROR(HW_Memory, 470 LOG_ERROR(HW_Memory,
522 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 471 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
523 current_vaddr, dest_addr, size); 472 current_vaddr, dest_addr, size);
524 break; 473 break;
525 } 474 }
526 case PageType::Memory: { 475 case Common::PageType::Memory: {
527 DEBUG_ASSERT(page_table.pointers[page_index]); 476 DEBUG_ASSERT(page_table.pointers[page_index]);
528 477
529 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 478 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
530 std::memset(dest_ptr, 0, copy_amount); 479 std::memset(dest_ptr, 0, copy_amount);
531 break; 480 break;
532 } 481 }
533 case PageType::RasterizerCachedMemory: { 482 case Common::PageType::RasterizerCachedMemory: {
534 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 483 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
535 FlushMode::Invalidate); 484 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
536 std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); 485 std::memset(host_ptr, 0, copy_amount);
537 break; 486 break;
538 } 487 }
539 default: 488 default:
@@ -559,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
559 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 508 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
560 509
561 switch (page_table.attributes[page_index]) { 510 switch (page_table.attributes[page_index]) {
562 case PageType::Unmapped: { 511 case Common::PageType::Unmapped: {
563 LOG_ERROR(HW_Memory, 512 LOG_ERROR(HW_Memory,
564 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 513 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
565 current_vaddr, src_addr, size); 514 current_vaddr, src_addr, size);
566 ZeroBlock(process, dest_addr, copy_amount); 515 ZeroBlock(process, dest_addr, copy_amount);
567 break; 516 break;
568 } 517 }
569 case PageType::Memory: { 518 case Common::PageType::Memory: {
570 DEBUG_ASSERT(page_table.pointers[page_index]); 519 DEBUG_ASSERT(page_table.pointers[page_index]);
571 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 520 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
572 WriteBlock(process, dest_addr, src_ptr, copy_amount); 521 WriteBlock(process, dest_addr, src_ptr, copy_amount);
573 break; 522 break;
574 } 523 }
575 case PageType::RasterizerCachedMemory: { 524 case Common::PageType::RasterizerCachedMemory: {
576 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 525 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
577 FlushMode::Flush); 526 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
578 WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); 527 WriteBlock(process, dest_addr, host_ptr, copy_amount);
579 break; 528 break;
580 } 529 }
581 default: 530 default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..3f60d868c 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -10,7 +10,10 @@
10#include <vector> 10#include <vector>
11#include <boost/icl/interval_map.hpp> 11#include <boost/icl/interval_map.hpp>
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/memory_hook.h" 13
14namespace Common {
15struct PageTable;
16}
14 17
15namespace Kernel { 18namespace Kernel {
16class Process; 19class Process;
@@ -26,71 +29,6 @@ constexpr std::size_t PAGE_BITS = 12;
26constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS; 29constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
27constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 30constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
28 31
29enum class PageType : u8 {
30 /// Page is unmapped and should cause an access error.
31 Unmapped,
32 /// Page is mapped to regular memory. This is the only type you can get pointers to.
33 Memory,
34 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
35 /// invalidation
36 RasterizerCachedMemory,
37 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
38 Special,
39};
40
41struct SpecialRegion {
42 enum class Type {
43 DebugHook,
44 IODevice,
45 } type;
46
47 MemoryHookPointer handler;
48
49 bool operator<(const SpecialRegion& other) const {
50 return std::tie(type, handler) < std::tie(other.type, other.handler);
51 }
52
53 bool operator==(const SpecialRegion& other) const {
54 return std::tie(type, handler) == std::tie(other.type, other.handler);
55 }
56};
57
58/**
59 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
60 * mimics the way a real CPU page table works.
61 */
62struct PageTable {
63 explicit PageTable();
64 explicit PageTable(std::size_t address_space_width_in_bits);
65 ~PageTable();
66
67 /**
68 * Resizes the page table to be able to accomodate enough pages within
69 * a given address space.
70 *
71 * @param address_space_width_in_bits The address size width in bits.
72 */
73 void Resize(std::size_t address_space_width_in_bits);
74
75 /**
76 * Vector of memory pointers backing each page. An entry can only be non-null if the
77 * corresponding entry in the `attributes` vector is of type `Memory`.
78 */
79 std::vector<u8*> pointers;
80
81 /**
82 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
83 * of type `Special`.
84 */
85 boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
86
87 /**
88 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
89 * the corresponding entry in `pointers` MUST be set to null.
90 */
91 std::vector<PageType> attributes;
92};
93
94/// Virtual user-space memory regions 32/// Virtual user-space memory regions
95enum : VAddr { 33enum : VAddr {
96 /// Read-only page containing kernel and system configuration values. 34 /// Read-only page containing kernel and system configuration values.
@@ -116,8 +54,8 @@ enum : VAddr {
116}; 54};
117 55
118/// Currently active page table 56/// Currently active page table
119void SetCurrentPageTable(PageTable* page_table); 57void SetCurrentPageTable(Common::PageTable* page_table);
120PageTable* GetCurrentPageTable(); 58Common::PageTable* GetCurrentPageTable();
121 59
122/// Determines if the given VAddr is valid for the specified process. 60/// Determines if the given VAddr is valid for the specified process.
123bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); 61bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +99,4 @@ enum class FlushMode {
161 */ 99 */
162void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); 100void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
163 101
164/**
165 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
166 * address region.
167 */
168void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
169
170} // namespace Memory 102} // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/memory_hook.h" 8#include "common/memory_hook.h"
9
10namespace Common {
11struct PageTable;
12}
9 13
10namespace Memory { 14namespace Memory {
11 15
@@ -17,7 +21,7 @@ namespace Memory {
17 * @param size The amount of bytes to map. Must be page-aligned. 21 * @param size The amount of bytes to map. Must be page-aligned.
18 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`. 22 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
19 */ 23 */
20void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target); 24void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
21 25
22/** 26/**
23 * Maps a region of the emulated process address space as a IO region. 27 * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
26 * @param size The amount of bytes to map. Must be page-aligned. 30 * @param size The amount of bytes to map. Must be page-aligned.
27 * @param mmio_handler The handler that backs the mapping. 31 * @param mmio_handler The handler that backs the mapping.
28 */ 32 */
29void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler); 33void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
34 Common::MemoryHookPointer mmio_handler);
30 35
31void UnmapRegion(PageTable& page_table, VAddr base, u64 size); 36void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
32 37
33void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 38void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
34void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 39 Common::MemoryHookPointer hook);
40void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
41 Common::MemoryHookPointer hook);
35 42
36} // namespace Memory 43} // namespace Memory
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6dd3139cc 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 98 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 99 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 100 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index c97387fc7..cdfb2f742 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -391,7 +391,9 @@ struct Values {
391 float resolution_factor; 391 float resolution_factor;
392 bool use_frame_limit; 392 bool use_frame_limit;
393 u16 frame_limit; 393 u16 frame_limit;
394 bool use_disk_shader_cache;
394 bool use_accurate_gpu_emulation; 395 bool use_accurate_gpu_emulation;
396 bool use_asynchronous_gpu_emulation;
395 397
396 float bg_red; 398 float bg_red;
397 float bg_green; 399 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 09ed74d78..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -158,8 +158,12 @@ TelemetrySession::TelemetrySession() {
158 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit", 158 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
159 Settings::values.use_frame_limit); 159 Settings::values.use_frame_limit);
160 AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit); 160 AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
161 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
162 Settings::values.use_disk_shader_cache);
161 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
162 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
163 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
164 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
165} 169}
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 1c7db28c0..5b4e032bd 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -7,15 +7,18 @@ add_library(input_common STATIC
7 main.h 7 main.h
8 motion_emu.cpp 8 motion_emu.cpp
9 motion_emu.h 9 motion_emu.h
10 10 sdl/sdl.cpp
11 $<$<BOOL:${SDL2_FOUND}>:sdl/sdl.cpp sdl/sdl.h> 11 sdl/sdl.h
12) 12)
13 13
14create_target_directory_groups(input_common)
15
16target_link_libraries(input_common PUBLIC core PRIVATE common)
17
18if(SDL2_FOUND) 14if(SDL2_FOUND)
15 target_sources(input_common PRIVATE
16 sdl/sdl_impl.cpp
17 sdl/sdl_impl.h
18 )
19 target_link_libraries(input_common PRIVATE SDL2) 19 target_link_libraries(input_common PRIVATE SDL2)
20 target_compile_definitions(input_common PRIVATE HAVE_SDL2) 20 target_compile_definitions(input_common PRIVATE HAVE_SDL2)
21endif() 21endif()
22
23create_target_directory_groups(input_common)
24target_link_libraries(input_common PUBLIC core PRIVATE common)
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 37f572853..8e66c1b15 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -17,10 +17,7 @@ namespace InputCommon {
17 17
18static std::shared_ptr<Keyboard> keyboard; 18static std::shared_ptr<Keyboard> keyboard;
19static std::shared_ptr<MotionEmu> motion_emu; 19static std::shared_ptr<MotionEmu> motion_emu;
20 20static std::unique_ptr<SDL::State> sdl;
21#ifdef HAVE_SDL2
22static std::thread poll_thread;
23#endif
24 21
25void Init() { 22void Init() {
26 keyboard = std::make_shared<Keyboard>(); 23 keyboard = std::make_shared<Keyboard>();
@@ -30,15 +27,7 @@ void Init() {
30 motion_emu = std::make_shared<MotionEmu>(); 27 motion_emu = std::make_shared<MotionEmu>();
31 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu); 28 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu);
32 29
33#ifdef HAVE_SDL2 30 sdl = SDL::Init();
34 SDL::Init();
35#endif
36}
37
38void StartJoystickEventHandler() {
39#ifdef HAVE_SDL2
40 poll_thread = std::thread(SDL::PollLoop);
41#endif
42} 31}
43 32
44void Shutdown() { 33void Shutdown() {
@@ -47,11 +36,7 @@ void Shutdown() {
47 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button"); 36 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button");
48 Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); 37 Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
49 motion_emu.reset(); 38 motion_emu.reset();
50 39 sdl.reset();
51#ifdef HAVE_SDL2
52 SDL::Shutdown();
53 poll_thread.join();
54#endif
55} 40}
56 41
57Keyboard* GetKeyboard() { 42Keyboard* GetKeyboard() {
@@ -88,7 +73,7 @@ namespace Polling {
88 73
89std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) { 74std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) {
90#ifdef HAVE_SDL2 75#ifdef HAVE_SDL2
91 return SDL::Polling::GetPollers(type); 76 return sdl->GetPollers(type);
92#else 77#else
93 return {}; 78 return {};
94#endif 79#endif
diff --git a/src/input_common/main.h b/src/input_common/main.h
index 9eb13106e..77a0ce90b 100644
--- a/src/input_common/main.h
+++ b/src/input_common/main.h
@@ -20,8 +20,6 @@ void Init();
20/// Deregisters all built-in input device factories and shuts them down. 20/// Deregisters all built-in input device factories and shuts them down.
21void Shutdown(); 21void Shutdown();
22 22
23void StartJoystickEventHandler();
24
25class Keyboard; 23class Keyboard;
26 24
27/// Gets the keyboard button device factory. 25/// Gets the keyboard button device factory.
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..6d96d4019 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
32 } 32 }
33 33
34 void BeginTilt(int x, int y) { 34 void BeginTilt(int x, int y) {
35 mouse_origin = Math::MakeVec(x, y); 35 mouse_origin = Common::MakeVec(x, y);
36 is_tilting = true; 36 is_tilting = true;
37 } 37 }
38 38
39 void Tilt(int x, int y) { 39 void Tilt(int x, int y) {
40 auto mouse_move = Math::MakeVec(x, y) - mouse_origin; 40 auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
41 if (is_tilting) { 41 if (is_tilting) {
42 std::lock_guard<std::mutex> guard(tilt_mutex); 42 std::lock_guard<std::mutex> guard(tilt_mutex);
43 if (mouse_move.x == 0 && mouse_move.y == 0) { 43 if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
45 } else { 45 } else {
46 tilt_direction = mouse_move.Cast<float>(); 46 tilt_direction = mouse_move.Cast<float>();
47 tilt_angle = 47 tilt_angle =
48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); 48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
49 } 49 }
50 } 50 }
51 } 51 }
@@ -56,7 +56,7 @@ public:
56 is_tilting = false; 56 is_tilting = false;
57 } 57 }
58 58
59 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { 59 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
60 std::lock_guard<std::mutex> guard(status_mutex); 60 std::lock_guard<std::mutex> guard(status_mutex);
61 return status; 61 return status;
62 } 62 }
@@ -66,17 +66,17 @@ private:
66 const std::chrono::steady_clock::duration update_duration; 66 const std::chrono::steady_clock::duration update_duration;
67 const float sensitivity; 67 const float sensitivity;
68 68
69 Math::Vec2<int> mouse_origin; 69 Common::Vec2<int> mouse_origin;
70 70
71 std::mutex tilt_mutex; 71 std::mutex tilt_mutex;
72 Math::Vec2<float> tilt_direction; 72 Common::Vec2<float> tilt_direction;
73 float tilt_angle = 0; 73 float tilt_angle = 0;
74 74
75 bool is_tilting = false; 75 bool is_tilting = false;
76 76
77 Common::Event shutdown_event; 77 Common::Event shutdown_event;
78 78
79 std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; 79 std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
80 std::mutex status_mutex; 80 std::mutex status_mutex;
81 81
82 // Note: always keep the thread declaration at the end so that other objects are initialized 82 // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:
85 85
86 void MotionEmuThread() { 86 void MotionEmuThread() {
87 auto update_time = std::chrono::steady_clock::now(); 87 auto update_time = std::chrono::steady_clock::now();
88 Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); 88 Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
89 Math::Quaternion<float> old_q; 89 Common::Quaternion<float> old_q;
90 90
91 while (!shutdown_event.WaitUntil(update_time)) { 91 while (!shutdown_event.WaitUntil(update_time)) {
92 update_time += update_duration; 92 update_time += update_duration;
@@ -96,18 +96,18 @@ private:
96 std::lock_guard<std::mutex> guard(tilt_mutex); 96 std::lock_guard<std::mutex> guard(tilt_mutex);
97 97
98 // Find the quaternion describing current 3DS tilting 98 // Find the quaternion describing current 3DS tilting
99 q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), 99 q = Common::MakeQuaternion(
100 tilt_angle); 100 Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
101 } 101 }
102 102
103 auto inv_q = q.Inverse(); 103 auto inv_q = q.Inverse();
104 104
105 // Set the gravity vector in world space 105 // Set the gravity vector in world space
106 auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); 106 auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
107 107
108 // Find the angular rate vector in world space 108 // Find the angular rate vector in world space
109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2; 109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
110 angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; 110 angular_rate *= 1000 / update_millisecond / Common::PI * 180;
111 111
112 // Transform the two vectors from world space to 3DS space 112 // Transform the two vectors from world space to 3DS space
113 gravity = QuaternionRotate(inv_q, gravity); 113 gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); 131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
132 } 132 }
133 133
134 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { 134 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
135 return device->GetStatus(); 135 return device->GetStatus();
136 } 136 }
137 137
diff --git a/src/input_common/sdl/sdl.cpp b/src/input_common/sdl/sdl.cpp
index faf3c1fa3..644db3448 100644
--- a/src/input_common/sdl/sdl.cpp
+++ b/src/input_common/sdl/sdl.cpp
@@ -1,631 +1,19 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "input_common/main.h"
24#include "input_common/sdl/sdl.h" 5#include "input_common/sdl/sdl.h"
6#ifdef HAVE_SDL2
7#include "input_common/sdl/sdl_impl.h"
8#endif
25 9
26namespace InputCommon { 10namespace InputCommon::SDL {
27 11
28namespace SDL { 12std::unique_ptr<State> Init() {
29 13#ifdef HAVE_SDL2
30class SDLJoystick; 14 return std::make_unique<SDLState>();
31class SDLButtonFactory; 15#else
32class SDLAnalogFactory; 16 return std::make_unique<NullState>();
33 17#endif
34/// Map of GUID of a list of corresponding virtual Joysticks
35static std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
36static std::mutex joystick_map_mutex;
37
38static std::shared_ptr<SDLButtonFactory> button_factory;
39static std::shared_ptr<SDLAnalogFactory> analog_factory;
40
41/// Used by the Pollers during config
42static std::atomic<bool> polling;
43static Common::SPSCQueue<SDL_Event> event_queue;
44
45static std::atomic<bool> initialized = false;
46
47static std::string GetGUID(SDL_Joystick* joystick) {
48 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
49 char guid_str[33];
50 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
51 return guid_str;
52}
53
54class SDLJoystick {
55public:
56 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
57 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
58 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
59
60 void SetButton(int button, bool value) {
61 std::lock_guard<std::mutex> lock(mutex);
62 state.buttons[button] = value;
63 }
64
65 bool GetButton(int button) const {
66 std::lock_guard<std::mutex> lock(mutex);
67 return state.buttons.at(button);
68 }
69
70 void SetAxis(int axis, Sint16 value) {
71 std::lock_guard<std::mutex> lock(mutex);
72 state.axes[axis] = value;
73 }
74
75 float GetAxis(int axis) const {
76 std::lock_guard<std::mutex> lock(mutex);
77 return state.axes.at(axis) / 32767.0f;
78 }
79
80 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
81 float x = GetAxis(axis_x);
82 float y = GetAxis(axis_y);
83 y = -y; // 3DS uses an y-axis inverse from SDL
84
85 // Make sure the coordinates are in the unit circle,
86 // otherwise normalize it.
87 float r = x * x + y * y;
88 if (r > 1.0f) {
89 r = std::sqrt(r);
90 x /= r;
91 y /= r;
92 }
93
94 return std::make_tuple(x, y);
95 }
96
97 void SetHat(int hat, Uint8 direction) {
98 std::lock_guard<std::mutex> lock(mutex);
99 state.hats[hat] = direction;
100 }
101
102 bool GetHatDirection(int hat, Uint8 direction) const {
103 std::lock_guard<std::mutex> lock(mutex);
104 return (state.hats.at(hat) & direction) != 0;
105 }
106 /**
107 * The guid of the joystick
108 */
109 const std::string& GetGUID() const {
110 return guid;
111 }
112
113 /**
114 * The number of joystick from the same type that were connected before this joystick
115 */
116 int GetPort() const {
117 return port;
118 }
119
120 SDL_Joystick* GetSDLJoystick() const {
121 return sdl_joystick.get();
122 }
123
124 void SetSDLJoystick(SDL_Joystick* joystick,
125 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
126 sdl_joystick =
127 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
128 }
129
130private:
131 struct State {
132 std::unordered_map<int, bool> buttons;
133 std::unordered_map<int, Sint16> axes;
134 std::unordered_map<int, Uint8> hats;
135 } state;
136 std::string guid;
137 int port;
138 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
139 mutable std::mutex mutex;
140};
141
142/**
143 * Get the nth joystick with the corresponding GUID
144 */
145static std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port) {
146 std::lock_guard<std::mutex> lock(joystick_map_mutex);
147 const auto it = joystick_map.find(guid);
148 if (it != joystick_map.end()) {
149 while (it->second.size() <= port) {
150 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
151 [](SDL_Joystick*) {});
152 it->second.emplace_back(std::move(joystick));
153 }
154 return it->second[port];
155 }
156 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
157 return joystick_map[guid].emplace_back(std::move(joystick));
158}
159
160/**
161 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
162 * it to a SDLJoystick with the same guid and that port
163 */
164static std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
165 std::lock_guard<std::mutex> lock(joystick_map_mutex);
166 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
167 const std::string guid = GetGUID(sdl_joystick);
168 auto map_it = joystick_map.find(guid);
169 if (map_it != joystick_map.end()) {
170 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
171 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
172 return sdl_joystick == joystick->GetSDLJoystick();
173 });
174 if (vec_it != map_it->second.end()) {
175 // This is the common case: There is already an existing SDL_Joystick maped to a
176 // SDLJoystick. return the SDLJoystick
177 return *vec_it;
178 }
179 // Search for a SDLJoystick without a mapped SDL_Joystick...
180 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
181 [](const std::shared_ptr<SDLJoystick>& joystick) {
182 return !joystick->GetSDLJoystick();
183 });
184 if (nullptr_it != map_it->second.end()) {
185 // ... and map it
186 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
187 return *nullptr_it;
188 }
189 // There is no SDLJoystick without a mapped SDL_Joystick
190 // Create a new SDLJoystick
191 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
192 return map_it->second.emplace_back(std::move(joystick));
193 }
194 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
195 return joystick_map[guid].emplace_back(std::move(joystick));
196}
197
198void InitJoystick(int joystick_index) {
199 std::lock_guard<std::mutex> lock(joystick_map_mutex);
200 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
201 if (!sdl_joystick) {
202 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
203 return;
204 }
205 std::string guid = GetGUID(sdl_joystick);
206 if (joystick_map.find(guid) == joystick_map.end()) {
207 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
208 joystick_map[guid].emplace_back(std::move(joystick));
209 return;
210 }
211 auto& joystick_guid_list = joystick_map[guid];
212 const auto it = std::find_if(
213 joystick_guid_list.begin(), joystick_guid_list.end(),
214 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
215 if (it != joystick_guid_list.end()) {
216 (*it)->SetSDLJoystick(sdl_joystick);
217 return;
218 }
219 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
220 joystick_guid_list.emplace_back(std::move(joystick));
221}
222
223void CloseJoystick(SDL_Joystick* sdl_joystick) {
224 std::lock_guard<std::mutex> lock(joystick_map_mutex);
225 std::string guid = GetGUID(sdl_joystick);
226 // This call to guid is save since the joystick is guranteed to be in that map
227 auto& joystick_guid_list = joystick_map[guid];
228 const auto joystick_it =
229 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
230 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
231 return joystick->GetSDLJoystick() == sdl_joystick;
232 });
233 (*joystick_it)->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
234}
235
236void HandleGameControllerEvent(const SDL_Event& event) {
237 switch (event.type) {
238 case SDL_JOYBUTTONUP: {
239 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
240 if (joystick) {
241 joystick->SetButton(event.jbutton.button, false);
242 }
243 break;
244 }
245 case SDL_JOYBUTTONDOWN: {
246 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
247 if (joystick) {
248 joystick->SetButton(event.jbutton.button, true);
249 }
250 break;
251 }
252 case SDL_JOYHATMOTION: {
253 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
254 if (joystick) {
255 joystick->SetHat(event.jhat.hat, event.jhat.value);
256 }
257 break;
258 }
259 case SDL_JOYAXISMOTION: {
260 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
261 if (joystick) {
262 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
263 }
264 break;
265 }
266 case SDL_JOYDEVICEREMOVED:
267 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
268 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
269 break;
270 case SDL_JOYDEVICEADDED:
271 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
272 InitJoystick(event.jdevice.which);
273 break;
274 }
275}
276
277void CloseSDLJoysticks() {
278 std::lock_guard<std::mutex> lock(joystick_map_mutex);
279 joystick_map.clear();
280}
281
282void PollLoop() {
283 if (SDL_Init(SDL_INIT_JOYSTICK) < 0) {
284 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
285 return;
286 }
287
288 SDL_Event event;
289 while (initialized) {
290 // Wait for 10 ms or until an event happens
291 if (SDL_WaitEventTimeout(&event, 10)) {
292 // Don't handle the event if we are configuring
293 if (polling) {
294 event_queue.Push(event);
295 } else {
296 HandleGameControllerEvent(event);
297 }
298 }
299 }
300 CloseSDLJoysticks();
301 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
302}
303
304class SDLButton final : public Input::ButtonDevice {
305public:
306 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
307 : joystick(std::move(joystick_)), button(button_) {}
308
309 bool GetStatus() const override {
310 return joystick->GetButton(button);
311 }
312
313private:
314 std::shared_ptr<SDLJoystick> joystick;
315 int button;
316};
317
318class SDLDirectionButton final : public Input::ButtonDevice {
319public:
320 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
321 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
322
323 bool GetStatus() const override {
324 return joystick->GetHatDirection(hat, direction);
325 }
326
327private:
328 std::shared_ptr<SDLJoystick> joystick;
329 int hat;
330 Uint8 direction;
331};
332
333class SDLAxisButton final : public Input::ButtonDevice {
334public:
335 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
336 bool trigger_if_greater_)
337 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
338 trigger_if_greater(trigger_if_greater_) {}
339
340 bool GetStatus() const override {
341 float axis_value = joystick->GetAxis(axis);
342 if (trigger_if_greater)
343 return axis_value > threshold;
344 return axis_value < threshold;
345 }
346
347private:
348 std::shared_ptr<SDLJoystick> joystick;
349 int axis;
350 float threshold;
351 bool trigger_if_greater;
352};
353
354class SDLAnalog final : public Input::AnalogDevice {
355public:
356 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_)
357 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_) {}
358
359 std::tuple<float, float> GetStatus() const override {
360 return joystick->GetAnalog(axis_x, axis_y);
361 }
362
363private:
364 std::shared_ptr<SDLJoystick> joystick;
365 int axis_x;
366 int axis_y;
367};
368
369/// A button device factory that creates button devices from SDL joystick
370class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
371public:
372 /**
373 * Creates a button device from a joystick button
374 * @param params contains parameters for creating the device:
375 * - "guid": the guid of the joystick to bind
376 * - "port": the nth joystick of the same type to bind
377 * - "button"(optional): the index of the button to bind
378 * - "hat"(optional): the index of the hat to bind as direction buttons
379 * - "axis"(optional): the index of the axis to bind
380 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
381 * "down", "left" or "right"
382 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
383 * triggered if the axis value crosses
384 * - "direction"(only used for axis): "+" means the button is triggered when the axis
385 * value is greater than the threshold; "-" means the button is triggered when the axis
386 * value is smaller than the threshold
387 */
388 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
389 const std::string guid = params.Get("guid", "0");
390 const int port = params.Get("port", 0);
391
392 auto joystick = GetSDLJoystickByGUID(guid, port);
393
394 if (params.Has("hat")) {
395 const int hat = params.Get("hat", 0);
396 const std::string direction_name = params.Get("direction", "");
397 Uint8 direction;
398 if (direction_name == "up") {
399 direction = SDL_HAT_UP;
400 } else if (direction_name == "down") {
401 direction = SDL_HAT_DOWN;
402 } else if (direction_name == "left") {
403 direction = SDL_HAT_LEFT;
404 } else if (direction_name == "right") {
405 direction = SDL_HAT_RIGHT;
406 } else {
407 direction = 0;
408 }
409 // This is necessary so accessing GetHat with hat won't crash
410 joystick->SetHat(hat, SDL_HAT_CENTERED);
411 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
412 }
413
414 if (params.Has("axis")) {
415 const int axis = params.Get("axis", 0);
416 const float threshold = params.Get("threshold", 0.5f);
417 const std::string direction_name = params.Get("direction", "");
418 bool trigger_if_greater;
419 if (direction_name == "+") {
420 trigger_if_greater = true;
421 } else if (direction_name == "-") {
422 trigger_if_greater = false;
423 } else {
424 trigger_if_greater = true;
425 LOG_ERROR(Input, "Unknown direction '{}'", direction_name);
426 }
427 // This is necessary so accessing GetAxis with axis won't crash
428 joystick->SetAxis(axis, 0);
429 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
430 }
431
432 const int button = params.Get("button", 0);
433 // This is necessary so accessing GetButton with button won't crash
434 joystick->SetButton(button, false);
435 return std::make_unique<SDLButton>(joystick, button);
436 }
437};
438
439/// An analog device factory that creates analog devices from SDL joystick
440class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
441public:
442 /**
443 * Creates analog device from joystick axes
444 * @param params contains parameters for creating the device:
445 * - "guid": the guid of the joystick to bind
446 * - "port": the nth joystick of the same type
447 * - "axis_x": the index of the axis to be bind as x-axis
448 * - "axis_y": the index of the axis to be bind as y-axis
449 */
450 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
451 const std::string guid = params.Get("guid", "0");
452 const int port = params.Get("port", 0);
453 const int axis_x = params.Get("axis_x", 0);
454 const int axis_y = params.Get("axis_y", 1);
455
456 auto joystick = GetSDLJoystickByGUID(guid, port);
457
458 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
459 joystick->SetAxis(axis_x, 0);
460 joystick->SetAxis(axis_y, 0);
461 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y);
462 }
463};
464
465void Init() {
466 using namespace Input;
467 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>());
468 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>());
469 polling = false;
470 initialized = true;
471}
472
473void Shutdown() {
474 if (initialized) {
475 using namespace Input;
476 UnregisterFactory<ButtonDevice>("sdl");
477 UnregisterFactory<AnalogDevice>("sdl");
478 initialized = false;
479 }
480}
481
482Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event) {
483 Common::ParamPackage params({{"engine", "sdl"}});
484 switch (event.type) {
485 case SDL_JOYAXISMOTION: {
486 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
487 params.Set("port", joystick->GetPort());
488 params.Set("guid", joystick->GetGUID());
489 params.Set("axis", event.jaxis.axis);
490 if (event.jaxis.value > 0) {
491 params.Set("direction", "+");
492 params.Set("threshold", "0.5");
493 } else {
494 params.Set("direction", "-");
495 params.Set("threshold", "-0.5");
496 }
497 break;
498 }
499 case SDL_JOYBUTTONUP: {
500 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
501 params.Set("port", joystick->GetPort());
502 params.Set("guid", joystick->GetGUID());
503 params.Set("button", event.jbutton.button);
504 break;
505 }
506 case SDL_JOYHATMOTION: {
507 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
508 params.Set("port", joystick->GetPort());
509 params.Set("guid", joystick->GetGUID());
510 params.Set("hat", event.jhat.hat);
511 switch (event.jhat.value) {
512 case SDL_HAT_UP:
513 params.Set("direction", "up");
514 break;
515 case SDL_HAT_DOWN:
516 params.Set("direction", "down");
517 break;
518 case SDL_HAT_LEFT:
519 params.Set("direction", "left");
520 break;
521 case SDL_HAT_RIGHT:
522 params.Set("direction", "right");
523 break;
524 default:
525 return {};
526 }
527 break;
528 }
529 }
530 return params;
531}
532
533namespace Polling {
534
535class SDLPoller : public InputCommon::Polling::DevicePoller {
536public:
537 void Start() override {
538 event_queue.Clear();
539 polling = true;
540 }
541
542 void Stop() override {
543 polling = false;
544 }
545};
546
547class SDLButtonPoller final : public SDLPoller {
548public:
549 Common::ParamPackage GetNextInput() override {
550 SDL_Event event;
551 while (event_queue.Pop(event)) {
552 switch (event.type) {
553 case SDL_JOYAXISMOTION:
554 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
555 break;
556 }
557 case SDL_JOYBUTTONUP:
558 case SDL_JOYHATMOTION:
559 return SDLEventToButtonParamPackage(event);
560 }
561 }
562 return {};
563 }
564};
565
566class SDLAnalogPoller final : public SDLPoller {
567public:
568 void Start() override {
569 SDLPoller::Start();
570
571 // Reset stored axes
572 analog_xaxis = -1;
573 analog_yaxis = -1;
574 analog_axes_joystick = -1;
575 }
576
577 Common::ParamPackage GetNextInput() override {
578 SDL_Event event;
579 while (event_queue.Pop(event)) {
580 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
581 continue;
582 }
583 // An analog device needs two axes, so we need to store the axis for later and wait for
584 // a second SDL event. The axes also must be from the same joystick.
585 int axis = event.jaxis.axis;
586 if (analog_xaxis == -1) {
587 analog_xaxis = axis;
588 analog_axes_joystick = event.jaxis.which;
589 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
590 analog_axes_joystick == event.jaxis.which) {
591 analog_yaxis = axis;
592 }
593 }
594 Common::ParamPackage params;
595 if (analog_xaxis != -1 && analog_yaxis != -1) {
596 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
597 params.Set("engine", "sdl");
598 params.Set("port", joystick->GetPort());
599 params.Set("guid", joystick->GetGUID());
600 params.Set("axis_x", analog_xaxis);
601 params.Set("axis_y", analog_yaxis);
602 analog_xaxis = -1;
603 analog_yaxis = -1;
604 analog_axes_joystick = -1;
605 return params;
606 }
607 return params;
608 }
609
610private:
611 int analog_xaxis = -1;
612 int analog_yaxis = -1;
613 SDL_JoystickID analog_axes_joystick = -1;
614};
615
616std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
617 InputCommon::Polling::DeviceType type) {
618 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
619 switch (type) {
620 case InputCommon::Polling::DeviceType::Analog:
621 pollers.push_back(std::make_unique<SDLAnalogPoller>());
622 break;
623 case InputCommon::Polling::DeviceType::Button:
624 pollers.push_back(std::make_unique<SDLButtonPoller>());
625 break;
626 }
627 return pollers;
628} 18}
629} // namespace Polling 19} // namespace InputCommon::SDL
630} // namespace SDL
631} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl.h b/src/input_common/sdl/sdl.h
index 0206860d3..02a8d2e2c 100644
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -1,4 +1,4 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -7,45 +7,36 @@
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9#include "core/frontend/input.h" 9#include "core/frontend/input.h"
10#include "input_common/main.h"
10 11
11union SDL_Event; 12union SDL_Event;
13
12namespace Common { 14namespace Common {
13class ParamPackage; 15class ParamPackage;
14} 16} // namespace Common
15namespace InputCommon { 17
16namespace Polling { 18namespace InputCommon::Polling {
17class DevicePoller; 19class DevicePoller;
18enum class DeviceType; 20enum class DeviceType;
19} // namespace Polling 21} // namespace InputCommon::Polling
20} // namespace InputCommon
21
22namespace InputCommon {
23namespace SDL {
24
25/// Initializes and registers SDL device factories
26void Init();
27
28/// Unresisters SDL device factories and shut them down.
29void Shutdown();
30
31/// Needs to be called before SDL_QuitSubSystem.
32void CloseSDLJoysticks();
33 22
34/// Handle SDL_Events for joysticks from SDL_PollEvent 23namespace InputCommon::SDL {
35void HandleGameControllerEvent(const SDL_Event& event);
36 24
37/// A Loop that calls HandleGameControllerEvent until Shutdown is called 25class State {
38void PollLoop(); 26public:
27 /// Unresisters SDL device factories and shut them down.
28 virtual ~State() = default;
39 29
40/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice 30 virtual std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
41Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event); 31 InputCommon::Polling::DeviceType type) = 0;
32};
42 33
43namespace Polling { 34class NullState : public State {
35public:
36 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
37 InputCommon::Polling::DeviceType type) override {}
38};
44 39
45/// Get all DevicePoller that use the SDL backend for a specific device type 40std::unique_ptr<State> Init();
46std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
47 InputCommon::Polling::DeviceType type);
48 41
49} // namespace Polling 42} // namespace InputCommon::SDL
50} // namespace SDL
51} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
new file mode 100644
index 000000000..6e8376549
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -0,0 +1,668 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "core/frontend/input.h"
24#include "input_common/sdl/sdl_impl.h"
25
26namespace InputCommon {
27
28namespace SDL {
29
30static std::string GetGUID(SDL_Joystick* joystick) {
31 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
32 char guid_str[33];
33 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
34 return guid_str;
35}
36
37/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
38static Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event);
39
40static int SDLEventWatcher(void* userdata, SDL_Event* event) {
41 SDLState* sdl_state = reinterpret_cast<SDLState*>(userdata);
42 // Don't handle the event if we are configuring
43 if (sdl_state->polling) {
44 sdl_state->event_queue.Push(*event);
45 } else {
46 sdl_state->HandleGameControllerEvent(*event);
47 }
48 return 0;
49}
50
51class SDLJoystick {
52public:
53 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
54 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
55 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
56
57 void SetButton(int button, bool value) {
58 std::lock_guard<std::mutex> lock(mutex);
59 state.buttons[button] = value;
60 }
61
62 bool GetButton(int button) const {
63 std::lock_guard<std::mutex> lock(mutex);
64 return state.buttons.at(button);
65 }
66
67 void SetAxis(int axis, Sint16 value) {
68 std::lock_guard<std::mutex> lock(mutex);
69 state.axes[axis] = value;
70 }
71
72 float GetAxis(int axis) const {
73 std::lock_guard<std::mutex> lock(mutex);
74 return state.axes.at(axis) / 32767.0f;
75 }
76
77 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
78 float x = GetAxis(axis_x);
79 float y = GetAxis(axis_y);
80 y = -y; // 3DS uses an y-axis inverse from SDL
81
82 // Make sure the coordinates are in the unit circle,
83 // otherwise normalize it.
84 float r = x * x + y * y;
85 if (r > 1.0f) {
86 r = std::sqrt(r);
87 x /= r;
88 y /= r;
89 }
90
91 return std::make_tuple(x, y);
92 }
93
94 void SetHat(int hat, Uint8 direction) {
95 std::lock_guard<std::mutex> lock(mutex);
96 state.hats[hat] = direction;
97 }
98
99 bool GetHatDirection(int hat, Uint8 direction) const {
100 std::lock_guard<std::mutex> lock(mutex);
101 return (state.hats.at(hat) & direction) != 0;
102 }
103 /**
104 * The guid of the joystick
105 */
106 const std::string& GetGUID() const {
107 return guid;
108 }
109
110 /**
111 * The number of joystick from the same type that were connected before this joystick
112 */
113 int GetPort() const {
114 return port;
115 }
116
117 SDL_Joystick* GetSDLJoystick() const {
118 return sdl_joystick.get();
119 }
120
121 void SetSDLJoystick(SDL_Joystick* joystick,
122 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
123 sdl_joystick =
124 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
125 }
126
127private:
128 struct State {
129 std::unordered_map<int, bool> buttons;
130 std::unordered_map<int, Sint16> axes;
131 std::unordered_map<int, Uint8> hats;
132 } state;
133 std::string guid;
134 int port;
135 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
136 mutable std::mutex mutex;
137};
138
139/**
140 * Get the nth joystick with the corresponding GUID
141 */
142std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
143 std::lock_guard<std::mutex> lock(joystick_map_mutex);
144 const auto it = joystick_map.find(guid);
145 if (it != joystick_map.end()) {
146 while (it->second.size() <= port) {
147 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
148 [](SDL_Joystick*) {});
149 it->second.emplace_back(std::move(joystick));
150 }
151 return it->second[port];
152 }
153 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
154 return joystick_map[guid].emplace_back(std::move(joystick));
155}
156
157/**
158 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
159 * it to a SDLJoystick with the same guid and that port
160 */
161std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
162 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
163 const std::string guid = GetGUID(sdl_joystick);
164 std::lock_guard<std::mutex> lock(joystick_map_mutex);
165 auto map_it = joystick_map.find(guid);
166 if (map_it != joystick_map.end()) {
167 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
168 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
169 return sdl_joystick == joystick->GetSDLJoystick();
170 });
171 if (vec_it != map_it->second.end()) {
172 // This is the common case: There is already an existing SDL_Joystick maped to a
173 // SDLJoystick. return the SDLJoystick
174 return *vec_it;
175 }
176 // Search for a SDLJoystick without a mapped SDL_Joystick...
177 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
178 [](const std::shared_ptr<SDLJoystick>& joystick) {
179 return !joystick->GetSDLJoystick();
180 });
181 if (nullptr_it != map_it->second.end()) {
182 // ... and map it
183 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
184 return *nullptr_it;
185 }
186 // There is no SDLJoystick without a mapped SDL_Joystick
187 // Create a new SDLJoystick
188 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
189 return map_it->second.emplace_back(std::move(joystick));
190 }
191 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
192 return joystick_map[guid].emplace_back(std::move(joystick));
193}
194
195void SDLState::InitJoystick(int joystick_index) {
196 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
197 if (!sdl_joystick) {
198 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
199 return;
200 }
201 std::string guid = GetGUID(sdl_joystick);
202 std::lock_guard<std::mutex> lock(joystick_map_mutex);
203 if (joystick_map.find(guid) == joystick_map.end()) {
204 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
205 joystick_map[guid].emplace_back(std::move(joystick));
206 return;
207 }
208 auto& joystick_guid_list = joystick_map[guid];
209 const auto it = std::find_if(
210 joystick_guid_list.begin(), joystick_guid_list.end(),
211 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
212 if (it != joystick_guid_list.end()) {
213 (*it)->SetSDLJoystick(sdl_joystick);
214 return;
215 }
216 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
217 joystick_guid_list.emplace_back(std::move(joystick));
218}
219
220void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
221 std::string guid = GetGUID(sdl_joystick);
222 std::shared_ptr<SDLJoystick> joystick;
223 {
224 std::lock_guard<std::mutex> lock(joystick_map_mutex);
225 // This call to guid is safe since the joystick is guaranteed to be in the map
226 auto& joystick_guid_list = joystick_map[guid];
227 const auto joystick_it =
228 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
229 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
230 return joystick->GetSDLJoystick() == sdl_joystick;
231 });
232 joystick = *joystick_it;
233 }
234 // Destruct SDL_Joystick outside the lock guard because SDL can internally call event calback
235 // which locks the mutex again
236 joystick->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
237}
238
239void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
240 switch (event.type) {
241 case SDL_JOYBUTTONUP: {
242 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
243 joystick->SetButton(event.jbutton.button, false);
244 }
245 break;
246 }
247 case SDL_JOYBUTTONDOWN: {
248 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
249 joystick->SetButton(event.jbutton.button, true);
250 }
251 break;
252 }
253 case SDL_JOYHATMOTION: {
254 if (auto joystick = GetSDLJoystickBySDLID(event.jhat.which)) {
255 joystick->SetHat(event.jhat.hat, event.jhat.value);
256 }
257 break;
258 }
259 case SDL_JOYAXISMOTION: {
260 if (auto joystick = GetSDLJoystickBySDLID(event.jaxis.which)) {
261 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
262 }
263 break;
264 }
265 case SDL_JOYDEVICEREMOVED:
266 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
267 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
268 break;
269 case SDL_JOYDEVICEADDED:
270 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
271 InitJoystick(event.jdevice.which);
272 break;
273 }
274}
275
276void SDLState::CloseJoysticks() {
277 std::lock_guard<std::mutex> lock(joystick_map_mutex);
278 joystick_map.clear();
279}
280
281class SDLButton final : public Input::ButtonDevice {
282public:
283 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
284 : joystick(std::move(joystick_)), button(button_) {}
285
286 bool GetStatus() const override {
287 return joystick->GetButton(button);
288 }
289
290private:
291 std::shared_ptr<SDLJoystick> joystick;
292 int button;
293};
294
295class SDLDirectionButton final : public Input::ButtonDevice {
296public:
297 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
298 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
299
300 bool GetStatus() const override {
301 return joystick->GetHatDirection(hat, direction);
302 }
303
304private:
305 std::shared_ptr<SDLJoystick> joystick;
306 int hat;
307 Uint8 direction;
308};
309
310class SDLAxisButton final : public Input::ButtonDevice {
311public:
312 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
313 bool trigger_if_greater_)
314 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
315 trigger_if_greater(trigger_if_greater_) {}
316
317 bool GetStatus() const override {
318 float axis_value = joystick->GetAxis(axis);
319 if (trigger_if_greater)
320 return axis_value > threshold;
321 return axis_value < threshold;
322 }
323
324private:
325 std::shared_ptr<SDLJoystick> joystick;
326 int axis;
327 float threshold;
328 bool trigger_if_greater;
329};
330
331class SDLAnalog final : public Input::AnalogDevice {
332public:
333 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, float deadzone_)
334 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_) {}
335
336 std::tuple<float, float> GetStatus() const override {
337 const auto [x, y] = joystick->GetAnalog(axis_x, axis_y);
338 const float r = std::sqrt((x * x) + (y * y));
339 if (r > deadzone) {
340 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
341 y / r * (r - deadzone) / (1 - deadzone));
342 }
343 return std::make_tuple<float, float>(0.0f, 0.0f);
344 }
345
346private:
347 std::shared_ptr<SDLJoystick> joystick;
348 const int axis_x;
349 const int axis_y;
350 const float deadzone;
351};
352
353/// A button device factory that creates button devices from SDL joystick
354class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
355public:
356 explicit SDLButtonFactory(SDLState& state_) : state(state_) {}
357
358 /**
359 * Creates a button device from a joystick button
360 * @param params contains parameters for creating the device:
361 * - "guid": the guid of the joystick to bind
362 * - "port": the nth joystick of the same type to bind
363 * - "button"(optional): the index of the button to bind
364 * - "hat"(optional): the index of the hat to bind as direction buttons
365 * - "axis"(optional): the index of the axis to bind
366 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
367 * "down", "left" or "right"
368 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
369 * triggered if the axis value crosses
370 * - "direction"(only used for axis): "+" means the button is triggered when the axis
371 * value is greater than the threshold; "-" means the button is triggered when the axis
372 * value is smaller than the threshold
373 */
374 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
375 const std::string guid = params.Get("guid", "0");
376 const int port = params.Get("port", 0);
377
378 auto joystick = state.GetSDLJoystickByGUID(guid, port);
379
380 if (params.Has("hat")) {
381 const int hat = params.Get("hat", 0);
382 const std::string direction_name = params.Get("direction", "");
383 Uint8 direction;
384 if (direction_name == "up") {
385 direction = SDL_HAT_UP;
386 } else if (direction_name == "down") {
387 direction = SDL_HAT_DOWN;
388 } else if (direction_name == "left") {
389 direction = SDL_HAT_LEFT;
390 } else if (direction_name == "right") {
391 direction = SDL_HAT_RIGHT;
392 } else {
393 direction = 0;
394 }
395 // This is necessary so accessing GetHat with hat won't crash
396 joystick->SetHat(hat, SDL_HAT_CENTERED);
397 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
398 }
399
400 if (params.Has("axis")) {
401 const int axis = params.Get("axis", 0);
402 const float threshold = params.Get("threshold", 0.5f);
403 const std::string direction_name = params.Get("direction", "");
404 bool trigger_if_greater;
405 if (direction_name == "+") {
406 trigger_if_greater = true;
407 } else if (direction_name == "-") {
408 trigger_if_greater = false;
409 } else {
410 trigger_if_greater = true;
411 LOG_ERROR(Input, "Unknown direction {}", direction_name);
412 }
413 // This is necessary so accessing GetAxis with axis won't crash
414 joystick->SetAxis(axis, 0);
415 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
416 }
417
418 const int button = params.Get("button", 0);
419 // This is necessary so accessing GetButton with button won't crash
420 joystick->SetButton(button, false);
421 return std::make_unique<SDLButton>(joystick, button);
422 }
423
424private:
425 SDLState& state;
426};
427
428/// An analog device factory that creates analog devices from SDL joystick
429class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
430public:
431 explicit SDLAnalogFactory(SDLState& state_) : state(state_) {}
432 /**
433 * Creates analog device from joystick axes
434 * @param params contains parameters for creating the device:
435 * - "guid": the guid of the joystick to bind
436 * - "port": the nth joystick of the same type
437 * - "axis_x": the index of the axis to be bind as x-axis
438 * - "axis_y": the index of the axis to be bind as y-axis
439 */
440 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
441 const std::string guid = params.Get("guid", "0");
442 const int port = params.Get("port", 0);
443 const int axis_x = params.Get("axis_x", 0);
444 const int axis_y = params.Get("axis_y", 1);
445 float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f);
446
447 auto joystick = state.GetSDLJoystickByGUID(guid, port);
448
449 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
450 joystick->SetAxis(axis_x, 0);
451 joystick->SetAxis(axis_y, 0);
452 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone);
453 }
454
455private:
456 SDLState& state;
457};
458
459SDLState::SDLState() {
460 using namespace Input;
461 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>(*this));
462 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>(*this));
463
464 // If the frontend is going to manage the event loop, then we dont start one here
465 start_thread = !SDL_WasInit(SDL_INIT_JOYSTICK);
466 if (start_thread && SDL_Init(SDL_INIT_JOYSTICK) < 0) {
467 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
468 return;
469 }
470 if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
471 LOG_ERROR(Input, "Failed to set Hint for background events", SDL_GetError());
472 }
473
474 SDL_AddEventWatch(&SDLEventWatcher, this);
475
476 initialized = true;
477 if (start_thread) {
478 poll_thread = std::thread([this] {
479 using namespace std::chrono_literals;
480 while (initialized) {
481 SDL_PumpEvents();
482 std::this_thread::sleep_for(10ms);
483 }
484 });
485 }
486 // Because the events for joystick connection happens before we have our event watcher added, we
487 // can just open all the joysticks right here
488 for (int i = 0; i < SDL_NumJoysticks(); ++i) {
489 InitJoystick(i);
490 }
491}
492
493SDLState::~SDLState() {
494 using namespace Input;
495 UnregisterFactory<ButtonDevice>("sdl");
496 UnregisterFactory<AnalogDevice>("sdl");
497
498 CloseJoysticks();
499 SDL_DelEventWatch(&SDLEventWatcher, this);
500
501 initialized = false;
502 if (start_thread) {
503 poll_thread.join();
504 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
505 }
506}
507
508Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) {
509 Common::ParamPackage params({{"engine", "sdl"}});
510
511 switch (event.type) {
512 case SDL_JOYAXISMOTION: {
513 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
514 params.Set("port", joystick->GetPort());
515 params.Set("guid", joystick->GetGUID());
516 params.Set("axis", event.jaxis.axis);
517 if (event.jaxis.value > 0) {
518 params.Set("direction", "+");
519 params.Set("threshold", "0.5");
520 } else {
521 params.Set("direction", "-");
522 params.Set("threshold", "-0.5");
523 }
524 break;
525 }
526 case SDL_JOYBUTTONUP: {
527 auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which);
528 params.Set("port", joystick->GetPort());
529 params.Set("guid", joystick->GetGUID());
530 params.Set("button", event.jbutton.button);
531 break;
532 }
533 case SDL_JOYHATMOTION: {
534 auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which);
535 params.Set("port", joystick->GetPort());
536 params.Set("guid", joystick->GetGUID());
537 params.Set("hat", event.jhat.hat);
538 switch (event.jhat.value) {
539 case SDL_HAT_UP:
540 params.Set("direction", "up");
541 break;
542 case SDL_HAT_DOWN:
543 params.Set("direction", "down");
544 break;
545 case SDL_HAT_LEFT:
546 params.Set("direction", "left");
547 break;
548 case SDL_HAT_RIGHT:
549 params.Set("direction", "right");
550 break;
551 default:
552 return {};
553 }
554 break;
555 }
556 }
557 return params;
558}
559
560namespace Polling {
561
562class SDLPoller : public InputCommon::Polling::DevicePoller {
563public:
564 explicit SDLPoller(SDLState& state_) : state(state_) {}
565
566 void Start() override {
567 state.event_queue.Clear();
568 state.polling = true;
569 }
570
571 void Stop() override {
572 state.polling = false;
573 }
574
575protected:
576 SDLState& state;
577};
578
579class SDLButtonPoller final : public SDLPoller {
580public:
581 explicit SDLButtonPoller(SDLState& state_) : SDLPoller(state_) {}
582
583 Common::ParamPackage GetNextInput() override {
584 SDL_Event event;
585 while (state.event_queue.Pop(event)) {
586 switch (event.type) {
587 case SDL_JOYAXISMOTION:
588 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
589 break;
590 }
591 case SDL_JOYBUTTONUP:
592 case SDL_JOYHATMOTION:
593 return SDLEventToButtonParamPackage(state, event);
594 }
595 }
596 return {};
597 }
598};
599
600class SDLAnalogPoller final : public SDLPoller {
601public:
602 explicit SDLAnalogPoller(SDLState& state_) : SDLPoller(state_) {}
603
604 void Start() override {
605 SDLPoller::Start();
606
607 // Reset stored axes
608 analog_xaxis = -1;
609 analog_yaxis = -1;
610 analog_axes_joystick = -1;
611 }
612
613 Common::ParamPackage GetNextInput() override {
614 SDL_Event event;
615 while (state.event_queue.Pop(event)) {
616 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
617 continue;
618 }
619 // An analog device needs two axes, so we need to store the axis for later and wait for
620 // a second SDL event. The axes also must be from the same joystick.
621 int axis = event.jaxis.axis;
622 if (analog_xaxis == -1) {
623 analog_xaxis = axis;
624 analog_axes_joystick = event.jaxis.which;
625 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
626 analog_axes_joystick == event.jaxis.which) {
627 analog_yaxis = axis;
628 }
629 }
630 Common::ParamPackage params;
631 if (analog_xaxis != -1 && analog_yaxis != -1) {
632 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
633 params.Set("engine", "sdl");
634 params.Set("port", joystick->GetPort());
635 params.Set("guid", joystick->GetGUID());
636 params.Set("axis_x", analog_xaxis);
637 params.Set("axis_y", analog_yaxis);
638 analog_xaxis = -1;
639 analog_yaxis = -1;
640 analog_axes_joystick = -1;
641 return params;
642 }
643 return params;
644 }
645
646private:
647 int analog_xaxis = -1;
648 int analog_yaxis = -1;
649 SDL_JoystickID analog_axes_joystick = -1;
650};
651} // namespace Polling
652
653std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPollers(
654 InputCommon::Polling::DeviceType type) {
655 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
656 switch (type) {
657 case InputCommon::Polling::DeviceType::Analog:
658 pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
659 break;
660 case InputCommon::Polling::DeviceType::Button:
661 pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
662 break;
663 return pollers;
664 }
665}
666
667} // namespace SDL
668} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
new file mode 100644
index 000000000..fec82fbe6
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.h
@@ -0,0 +1,64 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <thread>
10#include "common/threadsafe_queue.h"
11#include "input_common/sdl/sdl.h"
12
13union SDL_Event;
14using SDL_Joystick = struct _SDL_Joystick;
15using SDL_JoystickID = s32;
16
17namespace InputCommon::SDL {
18
19class SDLJoystick;
20class SDLButtonFactory;
21class SDLAnalogFactory;
22
23class SDLState : public State {
24public:
25 /// Initializes and registers SDL device factories
26 SDLState();
27
28 /// Unresisters SDL device factories and shut them down.
29 ~SDLState() override;
30
31 /// Handle SDL_Events for joysticks from SDL_PollEvent
32 void HandleGameControllerEvent(const SDL_Event& event);
33
34 std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id);
35 std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
36
37 /// Get all DevicePoller that use the SDL backend for a specific device type
38 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
39 InputCommon::Polling::DeviceType type) override;
40
41 /// Used by the Pollers during config
42 std::atomic<bool> polling = false;
43 Common::SPSCQueue<SDL_Event> event_queue;
44
45private:
46 void InitJoystick(int joystick_index);
47 void CloseJoystick(SDL_Joystick* sdl_joystick);
48
49 /// Needs to be called before SDL_QuitSubSystem.
50 void CloseJoysticks();
51
52 /// Map of GUID of a list of corresponding virtual Joysticks
53 std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
54 std::mutex joystick_map_mutex;
55
56 std::shared_ptr<SDLButtonFactory> button_factory;
57 std::shared_ptr<SDLAnalogFactory> analog_factory;
58
59 bool start_thread = false;
60 std::atomic<bool> initialized = false;
61
62 std::thread poll_thread;
63};
64} // namespace InputCommon::SDL
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..3e1a735c3 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6 6
7#include "common/page_table.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
9#include "core/memory.h" 10#include "core/memory.h"
@@ -13,16 +14,16 @@
13namespace ArmTests { 14namespace ArmTests {
14 15
15TestEnvironment::TestEnvironment(bool mutable_memory_) 16TestEnvironment::TestEnvironment(bool mutable_memory_)
16 : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { 17 : mutable_memory(mutable_memory_),
17 18 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
18 auto process = Kernel::Process::Create(kernel, ""); 19 auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
19 kernel.MakeCurrentProcess(process.get()); 20 kernel.MakeCurrentProcess(process.get());
20 page_table = &Core::CurrentProcess()->VMManager().page_table; 21 page_table = &process->VMManager().page_table;
21 22
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 23 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 24 page_table->special_regions.clear();
24 std::fill(page_table->attributes.begin(), page_table->attributes.end(), 25 std::fill(page_table->attributes.begin(), page_table->attributes.end(),
25 Memory::PageType::Unmapped); 26 Common::PageType::Unmapped);
26 27
27 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); 28 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
28 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); 29 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
12#include "core/hle/kernel/kernel.h" 13#include "core/hle/kernel/kernel.h"
13#include "core/memory_hook.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
@@ -58,7 +58,7 @@ public:
58 58
59private: 59private:
60 friend struct TestMemory; 60 friend struct TestMemory;
61 struct TestMemory final : Memory::MemoryHook { 61 struct TestMemory final : Common::MemoryHook {
62 explicit TestMemory(TestEnvironment* env_) : env(env_) {} 62 explicit TestMemory(TestEnvironment* env_) : env(env_) {}
63 TestEnvironment* env; 63 TestEnvironment* env;
64 64
@@ -86,7 +86,7 @@ private:
86 bool mutable_memory; 86 bool mutable_memory;
87 std::shared_ptr<TestMemory> test_memory; 87 std::shared_ptr<TestMemory> test_memory;
88 std::vector<WriteRecord> write_records; 88 std::vector<WriteRecord> write_records;
89 Memory::PageTable* page_table = nullptr; 89 Common::PageTable* page_table = nullptr;
90 Kernel::KernelCore kernel; 90 Kernel::KernelCore kernel;
91}; 91};
92 92
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 2242c14cf..340d6a272 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
28 REQUIRE(lateness == cycles_late); 28 REQUIRE(lateness == cycles_late);
29} 29}
30 30
31class ScopeInit final { 31struct ScopeInit final {
32public:
33 ScopeInit() { 32 ScopeInit() {
34 CoreTiming::Init(); 33 core_timing.Initialize();
35 } 34 }
36 ~ScopeInit() { 35 ~ScopeInit() {
37 CoreTiming::Shutdown(); 36 core_timing.Shutdown();
38 } 37 }
38
39 Core::Timing::CoreTiming core_timing;
39}; 40};
40 41
41static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0, 42static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
42 int cpu_downcount = 0) { 43 int expected_lateness = 0, int cpu_downcount = 0) {
43 callbacks_ran_flags = 0; 44 callbacks_ran_flags = 0;
44 expected_callback = CB_IDS[idx]; 45 expected_callback = CB_IDS[idx];
45 lateness = expected_lateness; 46 lateness = expected_lateness;
46 47
47 CoreTiming::AddTicks(CoreTiming::GetDowncount() - 48 // Pretend we executed X cycles of instructions.
48 cpu_downcount); // Pretend we executed X cycles of instructions. 49 core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
49 CoreTiming::Advance(); 50 core_timing.Advance();
50 51
51 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); 52 REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
52 REQUIRE(downcount == CoreTiming::GetDowncount()); 53 REQUIRE(downcount == core_timing.GetDowncount());
53} 54}
54 55
55TEST_CASE("CoreTiming[BasicOrder]", "[core]") { 56TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
56 ScopeInit guard; 57 ScopeInit guard;
58 auto& core_timing = guard.core_timing;
57 59
58 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 60 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
59 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 61 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
60 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 62 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
61 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); 63 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
62 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); 64 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
63 65
64 // Enter slice 0 66 // Enter slice 0
65 CoreTiming::Advance(); 67 core_timing.Advance();
66 68
67 // D -> B -> C -> A -> E 69 // D -> B -> C -> A -> E
68 CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); 70 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
69 REQUIRE(1000 == CoreTiming::GetDowncount()); 71 REQUIRE(1000 == core_timing.GetDowncount());
70 CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]); 72 core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
71 REQUIRE(500 == CoreTiming::GetDowncount()); 73 REQUIRE(500 == core_timing.GetDowncount());
72 CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]); 74 core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
73 REQUIRE(500 == CoreTiming::GetDowncount()); 75 REQUIRE(500 == core_timing.GetDowncount());
74 CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]); 76 core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
75 REQUIRE(100 == CoreTiming::GetDowncount()); 77 REQUIRE(100 == core_timing.GetDowncount());
76 CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]); 78 core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
77 REQUIRE(100 == CoreTiming::GetDowncount()); 79 REQUIRE(100 == core_timing.GetDowncount());
78 80
79 AdvanceAndCheck(3, 400); 81 AdvanceAndCheck(core_timing, 3, 400);
80 AdvanceAndCheck(1, 300); 82 AdvanceAndCheck(core_timing, 1, 300);
81 AdvanceAndCheck(2, 200); 83 AdvanceAndCheck(core_timing, 2, 200);
82 AdvanceAndCheck(0, 200); 84 AdvanceAndCheck(core_timing, 0, 200);
83 AdvanceAndCheck(4, MAX_SLICE_LENGTH); 85 AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
84} 86}
85 87
86TEST_CASE("CoreTiming[Threadsave]", "[core]") { 88TEST_CASE("CoreTiming[Threadsave]", "[core]") {
87 ScopeInit guard; 89 ScopeInit guard;
90 auto& core_timing = guard.core_timing;
88 91
89 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 92 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
90 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 93 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
91 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 94 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
92 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); 95 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
93 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); 96 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
94 97
95 // Enter slice 0 98 // Enter slice 0
96 CoreTiming::Advance(); 99 core_timing.Advance();
97 100
98 // D -> B -> C -> A -> E 101 // D -> B -> C -> A -> E
99 CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); 102 core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
100 // Manually force since ScheduleEventThreadsafe doesn't call it 103 // Manually force since ScheduleEventThreadsafe doesn't call it
101 CoreTiming::ForceExceptionCheck(1000); 104 core_timing.ForceExceptionCheck(1000);
102 REQUIRE(1000 == CoreTiming::GetDowncount()); 105 REQUIRE(1000 == core_timing.GetDowncount());
103 CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); 106 core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
104 // Manually force since ScheduleEventThreadsafe doesn't call it 107 // Manually force since ScheduleEventThreadsafe doesn't call it
105 CoreTiming::ForceExceptionCheck(500); 108 core_timing.ForceExceptionCheck(500);
106 REQUIRE(500 == CoreTiming::GetDowncount()); 109 REQUIRE(500 == core_timing.GetDowncount());
107 CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); 110 core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
108 // Manually force since ScheduleEventThreadsafe doesn't call it 111 // Manually force since ScheduleEventThreadsafe doesn't call it
109 CoreTiming::ForceExceptionCheck(800); 112 core_timing.ForceExceptionCheck(800);
110 REQUIRE(500 == CoreTiming::GetDowncount()); 113 REQUIRE(500 == core_timing.GetDowncount());
111 CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); 114 core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
112 // Manually force since ScheduleEventThreadsafe doesn't call it 115 // Manually force since ScheduleEventThreadsafe doesn't call it
113 CoreTiming::ForceExceptionCheck(100); 116 core_timing.ForceExceptionCheck(100);
114 REQUIRE(100 == CoreTiming::GetDowncount()); 117 REQUIRE(100 == core_timing.GetDowncount());
115 CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); 118 core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
116 // Manually force since ScheduleEventThreadsafe doesn't call it 119 // Manually force since ScheduleEventThreadsafe doesn't call it
117 CoreTiming::ForceExceptionCheck(1200); 120 core_timing.ForceExceptionCheck(1200);
118 REQUIRE(100 == CoreTiming::GetDowncount()); 121 REQUIRE(100 == core_timing.GetDowncount());
119 122
120 AdvanceAndCheck(3, 400); 123 AdvanceAndCheck(core_timing, 3, 400);
121 AdvanceAndCheck(1, 300); 124 AdvanceAndCheck(core_timing, 1, 300);
122 AdvanceAndCheck(2, 200); 125 AdvanceAndCheck(core_timing, 2, 200);
123 AdvanceAndCheck(0, 200); 126 AdvanceAndCheck(core_timing, 0, 200);
124 AdvanceAndCheck(4, MAX_SLICE_LENGTH); 127 AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
125} 128}
126 129
127namespace SharedSlotTest { 130namespace SharedSlotTest {
@@ -142,59 +145,63 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
142 using namespace SharedSlotTest; 145 using namespace SharedSlotTest;
143 146
144 ScopeInit guard; 147 ScopeInit guard;
148 auto& core_timing = guard.core_timing;
145 149
146 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>); 150 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
147 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>); 151 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
148 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>); 152 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
149 CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>); 153 Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
150 CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>); 154 Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);
151 155
152 CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); 156 core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
153 CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); 157 core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
154 CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]); 158 core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
155 CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]); 159 core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
156 CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]); 160 core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);
157 161
158 // Enter slice 0 162 // Enter slice 0
159 CoreTiming::Advance(); 163 core_timing.Advance();
160 REQUIRE(1000 == CoreTiming::GetDowncount()); 164 REQUIRE(1000 == core_timing.GetDowncount());
161 165
162 callbacks_ran_flags = 0; 166 callbacks_ran_flags = 0;
163 counter = 0; 167 counter = 0;
164 lateness = 0; 168 lateness = 0;
165 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 169 core_timing.AddTicks(core_timing.GetDowncount());
166 CoreTiming::Advance(); 170 core_timing.Advance();
167 REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); 171 REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
168 REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); 172 REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
169} 173}
170 174
171TEST_CASE("CoreTiming[PredictableLateness]", "[core]") { 175TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
172 ScopeInit guard; 176 ScopeInit guard;
177 auto& core_timing = guard.core_timing;
173 178
174 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 179 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
175 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 180 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
176 181
177 // Enter slice 0 182 // Enter slice 0
178 CoreTiming::Advance(); 183 core_timing.Advance();
179 184
180 CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]); 185 core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
181 CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]); 186 core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
182 187
183 AdvanceAndCheck(0, 90, 10, -10); // (100 - 10) 188 AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
184 AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50); 189 AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
185} 190}
186 191
187namespace ChainSchedulingTest { 192namespace ChainSchedulingTest {
188static int reschedules = 0; 193static int reschedules = 0;
189 194
190static void RescheduleCallback(u64 userdata, s64 cycles_late) { 195static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
196 s64 cycles_late) {
191 --reschedules; 197 --reschedules;
192 REQUIRE(reschedules >= 0); 198 REQUIRE(reschedules >= 0);
193 REQUIRE(lateness == cycles_late); 199 REQUIRE(lateness == cycles_late);
194 200
195 if (reschedules > 0) 201 if (reschedules > 0) {
196 CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata), 202 core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
197 userdata); 203 userdata);
204 }
198} 205}
199} // namespace ChainSchedulingTest 206} // namespace ChainSchedulingTest
200 207
@@ -202,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
202 using namespace ChainSchedulingTest; 209 using namespace ChainSchedulingTest;
203 210
204 ScopeInit guard; 211 ScopeInit guard;
212 auto& core_timing = guard.core_timing;
205 213
206 CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); 214 Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
207 CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); 215 Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
208 CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); 216 Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
209 CoreTiming::EventType* cb_rs = 217 Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
210 CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback); 218 "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
219 RescheduleCallback(core_timing, userdata, cycles_late);
220 });
211 221
212 // Enter slice 0 222 // Enter slice 0
213 CoreTiming::Advance(); 223 core_timing.Advance();
214 224
215 CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]); 225 core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
216 CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); 226 core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
217 CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]); 227 core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
218 CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); 228 core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
219 REQUIRE(800 == CoreTiming::GetDowncount()); 229 REQUIRE(800 == core_timing.GetDowncount());
220 230
221 reschedules = 3; 231 reschedules = 3;
222 AdvanceAndCheck(0, 200); // cb_a 232 AdvanceAndCheck(core_timing, 0, 200); // cb_a
223 AdvanceAndCheck(1, 1000); // cb_b, cb_rs 233 AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
224 REQUIRE(2 == reschedules); 234 REQUIRE(2 == reschedules);
225 235
226 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 236 core_timing.AddTicks(core_timing.GetDowncount());
227 CoreTiming::Advance(); // cb_rs 237 core_timing.Advance(); // cb_rs
228 REQUIRE(1 == reschedules); 238 REQUIRE(1 == reschedules);
229 REQUIRE(200 == CoreTiming::GetDowncount()); 239 REQUIRE(200 == core_timing.GetDowncount());
230 240
231 AdvanceAndCheck(2, 800); // cb_c 241 AdvanceAndCheck(core_timing, 2, 800); // cb_c
232 242
233 CoreTiming::AddTicks(CoreTiming::GetDowncount()); 243 core_timing.AddTicks(core_timing.GetDowncount());
234 CoreTiming::Advance(); // cb_rs 244 core_timing.Advance(); // cb_rs
235 REQUIRE(0 == reschedules); 245 REQUIRE(0 == reschedules);
236 REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); 246 REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
237} 247}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6113e17ff..14b76680f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,18 +5,24 @@ add_library(video_core STATIC
5 debug_utils/debug_utils.h 5 debug_utils/debug_utils.h
6 engines/fermi_2d.cpp 6 engines/fermi_2d.cpp
7 engines/fermi_2d.h 7 engines/fermi_2d.h
8 engines/kepler_compute.cpp
9 engines/kepler_compute.h
8 engines/kepler_memory.cpp 10 engines/kepler_memory.cpp
9 engines/kepler_memory.h 11 engines/kepler_memory.h
10 engines/maxwell_3d.cpp 12 engines/maxwell_3d.cpp
11 engines/maxwell_3d.h 13 engines/maxwell_3d.h
12 engines/maxwell_compute.cpp
13 engines/maxwell_compute.h
14 engines/maxwell_dma.cpp 14 engines/maxwell_dma.cpp
15 engines/maxwell_dma.h 15 engines/maxwell_dma.h
16 engines/shader_bytecode.h 16 engines/shader_bytecode.h
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -44,6 +50,8 @@ add_library(video_core STATIC
44 renderer_opengl/gl_shader_cache.h 50 renderer_opengl/gl_shader_cache.h
45 renderer_opengl/gl_shader_decompiler.cpp 51 renderer_opengl/gl_shader_decompiler.cpp
46 renderer_opengl/gl_shader_decompiler.h 52 renderer_opengl/gl_shader_decompiler.h
53 renderer_opengl/gl_shader_disk_cache.cpp
54 renderer_opengl/gl_shader_disk_cache.h
47 renderer_opengl/gl_shader_gen.cpp 55 renderer_opengl/gl_shader_gen.cpp
48 renderer_opengl/gl_shader_gen.h 56 renderer_opengl/gl_shader_gen.h
49 renderer_opengl/gl_shader_manager.cpp 57 renderer_opengl/gl_shader_manager.cpp
@@ -72,6 +80,7 @@ add_library(video_core STATIC
72 shader/decode/hfma2.cpp 80 shader/decode/hfma2.cpp
73 shader/decode/conversion.cpp 81 shader/decode/conversion.cpp
74 shader/decode/memory.cpp 82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
75 shader/decode/float_set_predicate.cpp 84 shader/decode/float_set_predicate.cpp
76 shader/decode/integer_set_predicate.cpp 85 shader/decode/integer_set_predicate.cpp
77 shader/decode/half_set_predicate.cpp 86 shader/decode/half_set_predicate.cpp
@@ -92,6 +101,8 @@ add_library(video_core STATIC
92 surface.h 101 surface.h
93 textures/astc.cpp 102 textures/astc.cpp
94 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
95 textures/decoders.cpp 106 textures/decoders.cpp
96 textures/decoders.h 107 textures/decoders.h
97 textures/texture.h 108 textures/texture.h
@@ -99,7 +110,31 @@ add_library(video_core STATIC
99 video_core.h 110 video_core.h
100) 111)
101 112
113if (ENABLE_VULKAN)
114 target_sources(video_core PRIVATE
115 renderer_vulkan/declarations.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
118 renderer_vulkan/vk_buffer_cache.cpp
119 renderer_vulkan/vk_buffer_cache.h
120 renderer_vulkan/vk_device.cpp
121 renderer_vulkan/vk_device.h
122 renderer_vulkan/vk_memory_manager.cpp
123 renderer_vulkan/vk_memory_manager.h
124 renderer_vulkan/vk_resource_manager.cpp
125 renderer_vulkan/vk_resource_manager.h
126 renderer_vulkan/vk_sampler_cache.cpp
127 renderer_vulkan/vk_sampler_cache.h
128 renderer_vulkan/vk_scheduler.cpp
129 renderer_vulkan/vk_scheduler.h
130 renderer_vulkan/vk_stream_buffer.cpp
131 renderer_vulkan/vk_stream_buffer.h)
132
133 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
134 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
135endif()
136
102create_target_directory_groups(video_core) 137create_target_directory_groups(video_core)
103 138
104target_link_libraries(video_core PUBLIC common core) 139target_link_libraries(video_core PUBLIC common core)
105target_link_libraries(video_core PRIVATE glad) 140target_link_libraries(video_core PRIVATE glad lz4_static)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 63a958f11..8b1bea1ae 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,16 +33,33 @@ void DmaPusher::DispatchCalls() {
33} 33}
34 34
35bool DmaPusher::Step() { 35bool DmaPusher::Step() {
36 if (dma_get != dma_put) { 36 if (!ib_enable || dma_pushbuffer.empty()) {
37 // Push buffer non-empty, read a word 37 // pushbuffer empty and IB empty or nonexistent - nothing to do
38 const CommandHeader command_header{ 38 return false;
39 Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; 39 }
40 40
41 dma_get += sizeof(u32); 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main;
42 46
43 if (!non_main) { 47 if (dma_pushbuffer_subindex >= command_list.size()) {
44 dma_mget = dma_get; 48 // We've gone through the current list, remove it from the queue
45 } 49 dma_pushbuffer.pop();
50 dma_pushbuffer_subindex = 0;
51 }
52
53 if (command_list_header.size == 0) {
54 return true;
55 }
56
57 // Push buffer non-empty, read a word
58 command_headers.resize(command_list_header.size);
59 gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
60 command_list_header.size * sizeof(u32));
61
62 for (const CommandHeader& command_header : command_headers) {
46 63
47 // now, see if we're in the middle of a command 64 // now, see if we're in the middle of a command
48 if (dma_state.length_pending) { 65 if (dma_state.length_pending) {
@@ -89,22 +106,11 @@ bool DmaPusher::Step() {
89 break; 106 break;
90 } 107 }
91 } 108 }
92 } else if (ib_enable && !dma_pushbuffer.empty()) { 109 }
93 // Current pushbuffer empty, but we have more IB entries to read 110
94 const CommandList& command_list{dma_pushbuffer.front()}; 111 if (!non_main) {
95 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 112 // TODO (degasus): This is dead code, as dma_mget is never read.
96 dma_get = command_list_header.addr; 113 dma_mget = dma_put;
97 dma_put = dma_get + command_list_header.size * sizeof(u32);
98 non_main = command_list_header.is_non_main;
99
100 if (dma_pushbuffer_subindex >= command_list.size()) {
101 // We've gone through the current list, remove it from the queue
102 dma_pushbuffer.pop();
103 dma_pushbuffer_subindex = 0;
104 }
105 } else {
106 // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
107 return {};
108 } 114 }
109 115
110 return true; 116 return true;
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1097e5c49..27a36348c 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,8 @@ private:
75 75
76 GPU& gpu; 76 GPU& gpu;
77 77
78 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
79
78 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 80 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
79 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer 81 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
80 82
@@ -89,11 +91,8 @@ private:
89 DmaState dma_state{}; 91 DmaState dma_state{};
90 bool dma_increment_once{}; 92 bool dma_increment_once{};
91 93
92 GPUVAddr dma_put{}; ///< pushbuffer current end address
93 GPUVAddr dma_get{}; ///< pushbuffer current read address
94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
95 bool ib_enable{true}; ///< IB mode enabled 95 bool ib_enable{true}; ///< IB mode enabled
96 bool non_main{}; ///< non-main pushbuffer active
97}; 96};
98 97
99} // namespace Tegra 98} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 80f70e332..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "common/assert.h"
6#include "core/memory.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
7#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10#include "video_core/textures/decoders.h"
11 10
12namespace Tegra::Engines { 11namespace Tegra::Engines {
13 12
@@ -21,7 +20,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
21 regs.reg_array[method_call.method] = method_call.argument; 20 regs.reg_array[method_call.method] = method_call.argument;
22 21
23 switch (method_call.method) { 22 switch (method_call.method) {
24 case FERMI2D_REG_INDEX(trigger): { 23 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
24 // so trigger on the second 32-bit write.
25 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
25 HandleSurfaceCopy(); 26 HandleSurfaceCopy();
26 break; 27 break;
27 } 28 }
@@ -32,55 +33,23 @@ void Fermi2D::HandleSurfaceCopy() {
32 LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", 33 LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
33 static_cast<u32>(regs.operation)); 34 static_cast<u32>(regs.operation));
34 35
35 const GPUVAddr source = regs.src.Address();
36 const GPUVAddr dest = regs.dst.Address();
37
38 // TODO(Subv): Only same-format and same-size copies are allowed for now.
39 ASSERT(regs.src.format == regs.dst.format);
40 ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
41
42 // TODO(Subv): Only raw copies are implemented. 36 // TODO(Subv): Only raw copies are implemented.
43 ASSERT(regs.operation == Regs::Operation::SrcCopy); 37 ASSERT(regs.operation == Regs::Operation::SrcCopy);
44 38
45 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); 39 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
46 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); 40 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
47 41 const u32 src_blit_x2{
48 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); 42 static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 43 const u32 src_blit_y2{
50 44 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
52 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54 45
55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 46 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
56 // We have to invalidate the destination region to evict any outdated surfaces from the 47 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
57 // cache. We do this before actually writing the new data because the destination address 48 regs.blit_dst_x + regs.blit_dst_width,
58 // might contain a dirty surface that will have to be written back to memory. 49 regs.blit_dst_y + regs.blit_dst_height};
59 rasterizer.InvalidateRegion(dest_cpu,
60 dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
61 50
62 if (regs.src.linear == regs.dst.linear) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
63 // If the input layout and the output layout are the same, just perform a raw copy. 52 UNIMPLEMENTED();
64 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
65 Memory::CopyBlock(dest_cpu, source_cpu,
66 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
67 return;
68 }
69 u8* src_buffer = Memory::GetPointer(source_cpu);
70 u8* dst_buffer = Memory::GetPointer(dest_cpu);
71 if (!regs.src.linear && regs.dst.linear) {
72 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
73 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
74 src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
75 dst_buffer, true, regs.src.BlockHeight(),
76 regs.src.BlockDepth(), 0);
77 } else {
78 // If the input is linear and the output is tiled, swizzle the input and copy it over.
79 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
80 src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
81 src_buffer, false, regs.dst.BlockHeight(),
82 regs.dst.BlockDepth(), 0);
83 }
84 } 53 }
85} 54}
86 55
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 50009bf75..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
@@ -94,12 +94,22 @@ public:
94 94
95 Operation operation; 95 Operation operation;
96 96
97 INSERT_PADDING_WORDS(0x9); 97 INSERT_PADDING_WORDS(0x177);
98 98
99 // TODO(Subv): This is only a guess. 99 u32 blit_control;
100 u32 trigger;
101 100
102 INSERT_PADDING_WORDS(0x1A3); 101 INSERT_PADDING_WORDS(0x8);
102
103 u32 blit_dst_x;
104 u32 blit_dst_y;
105 u32 blit_dst_width;
106 u32 blit_dst_height;
107 u64 blit_du_dx;
108 u64 blit_dv_dy;
109 u64 blit_src_x;
110 u64 blit_src_y;
111
112 INSERT_PADDING_WORDS(0x21);
103 }; 113 };
104 std::array<u32, NUM_REGS> reg_array; 114 std::array<u32, NUM_REGS> reg_array;
105 }; 115 };
@@ -122,7 +132,16 @@ private:
122ASSERT_REG_POSITION(dst, 0x80); 132ASSERT_REG_POSITION(dst, 0x80);
123ASSERT_REG_POSITION(src, 0x8C); 133ASSERT_REG_POSITION(src, 0x8C);
124ASSERT_REG_POSITION(operation, 0xAB); 134ASSERT_REG_POSITION(operation, 0xAB);
125ASSERT_REG_POSITION(trigger, 0xB5); 135ASSERT_REG_POSITION(blit_control, 0x223);
136ASSERT_REG_POSITION(blit_dst_x, 0x22c);
137ASSERT_REG_POSITION(blit_dst_y, 0x22d);
138ASSERT_REG_POSITION(blit_dst_width, 0x22e);
139ASSERT_REG_POSITION(blit_dst_height, 0x22f);
140ASSERT_REG_POSITION(blit_du_dx, 0x230);
141ASSERT_REG_POSITION(blit_dv_dy, 0x232);
142ASSERT_REG_POSITION(blit_src_x, 0x234);
143ASSERT_REG_POSITION(blit_src_y, 0x236);
144
126#undef ASSERT_REG_POSITION 145#undef ASSERT_REG_POSITION
127 146
128} // namespace Tegra::Engines 147} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
new file mode 100644
index 000000000..b1d950460
--- /dev/null
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,33 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/engines/kepler_compute.h"
8#include "video_core/memory_manager.h"
9
10namespace Tegra::Engines {
11
12KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
13
14KeplerCompute::~KeplerCompute() = default;
15
16void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
17 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
18 "Invalid KeplerCompute register, increase the size of the Regs structure");
19
20 regs.reg_array[method_call.method] = method_call.argument;
21
22 switch (method_call.method) {
23 case KEPLER_COMPUTE_REG_INDEX(launch):
24 // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
25 // kernels)
26 UNREACHABLE_MSG("Compute shaders are not implemented");
27 break;
28 default:
29 break;
30 }
31}
32
33} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h
index 1d71f11bd..6575afd0f 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,52 +5,52 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h"
10#include "common/common_funcs.h" 9#include "common/common_funcs.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12#include "video_core/memory_manager.h"
13 13
14namespace Tegra::Engines { 14namespace Tegra::Engines {
15 15
16#define MAXWELL_COMPUTE_REG_INDEX(field_name) \ 16#define KEPLER_COMPUTE_REG_INDEX(field_name) \
17 (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32)) 17 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
18 18
19class MaxwellCompute final { 19class KeplerCompute final {
20public: 20public:
21 MaxwellCompute() = default; 21 explicit KeplerCompute(MemoryManager& memory_manager);
22 ~MaxwellCompute() = default; 22 ~KeplerCompute();
23
24 static constexpr std::size_t NumConstBuffers = 8;
23 25
24 struct Regs { 26 struct Regs {
25 static constexpr std::size_t NUM_REGS = 0xCF8; 27 static constexpr std::size_t NUM_REGS = 0xCF8;
26 28
27 union { 29 union {
28 struct { 30 struct {
29 INSERT_PADDING_WORDS(0x281); 31 INSERT_PADDING_WORDS(0xAF);
30 32
31 union { 33 u32 launch;
32 u32 compute_end;
33 BitField<0, 1, u32> unknown;
34 } compute;
35 34
36 INSERT_PADDING_WORDS(0xA76); 35 INSERT_PADDING_WORDS(0xC48);
37 }; 36 };
38 std::array<u32, NUM_REGS> reg_array; 37 std::array<u32, NUM_REGS> reg_array;
39 }; 38 };
40 } regs{}; 39 } regs{};
41
42 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), 40 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
43 "MaxwellCompute Regs has wrong size"); 41 "KeplerCompute Regs has wrong size");
42
43 MemoryManager& memory_manager;
44 44
45 /// Write the value to the register identified by method. 45 /// Write the value to the register identified by method.
46 void CallMethod(const GPU::MethodCall& method_call); 46 void CallMethod(const GPU::MethodCall& method_call);
47}; 47};
48 48
49#define ASSERT_REG_POSITION(field_name, position) \ 49#define ASSERT_REG_POSITION(field_name, position) \
50 static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \ 50 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
51 "Field " #field_name " has invalid position") 51 "Field " #field_name " has invalid position")
52 52
53ASSERT_REG_POSITION(compute, 0x281); 53ASSERT_REG_POSITION(launch, 0xAF);
54 54
55#undef ASSERT_REG_POSITION 55#undef ASSERT_REG_POSITION
56 56
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4880191fc..0931b9626 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,18 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/kepler_memory.h" 9#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
11 13
12namespace Tegra::Engines { 14namespace Tegra::Engines {
13 15
14KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer, 16KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 MemoryManager& memory_manager) 17 MemoryManager& memory_manager)
16 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
17 19
18KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
19 21
@@ -39,17 +41,14 @@ void KeplerMemory::ProcessData(u32 data) {
39 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
40 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
41 43
42 GPUVAddr address = regs.dest.Address();
43 VAddr dest_address =
44 *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
45
46 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 44 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
47 // We do this before actually writing the new data because the destination address might contain 45 // We do this before actually writing the new data because the destination address might
48 // a dirty surface that will have to be written back to memory. 46 // contain a dirty surface that will have to be written back to memory.
49 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 47 const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
48 rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
49 memory_manager.Write32(address, data);
50 50
51 Memory::Write32(dest_address, data); 51 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
53 52
54 state.write_offset++; 53 state.write_offset++;
55} 54}
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index fe9ebc5b9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,13 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace VideoCore { 19namespace VideoCore {
16class RasterizerInterface; 20class RasterizerInterface;
17} 21}
@@ -23,7 +27,8 @@ namespace Tegra::Engines {
23 27
24class KeplerMemory final { 28class KeplerMemory final {
25public: 29public:
26 KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 30 KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
31 MemoryManager& memory_manager);
27 ~KeplerMemory(); 32 ~KeplerMemory();
28 33
29 /// Write the value to the register identified by method. 34 /// Write the value to the register identified by method.
@@ -76,6 +81,7 @@ public:
76 } state{}; 81 } state{};
77 82
78private: 83private:
84 Core::System& system;
79 MemoryManager& memory_manager; 85 MemoryManager& memory_manager;
80 VideoCore::RasterizerInterface& rasterizer; 86 VideoCore::RasterizerInterface& rasterizer;
81 87
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a388b3944..c5d5be4ef 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -19,8 +19,10 @@ namespace Tegra::Engines {
19/// First register id that is actually a Macro call. 19/// First register id that is actually a Macro call.
20constexpr u32 MacroRegistersStart = 0xE00; 20constexpr u32 MacroRegistersStart = 0xE00;
21 21
22Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 22Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
23 : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) { 23 MemoryManager& memory_manager)
24 : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
25 macro_interpreter(*this) {
24 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
25} 27}
26 28
@@ -103,23 +105,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
103} 105}
104 106
105void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
106 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 108 auto debug_context = system.GetGPUDebugContext();
109
110 const u32 method = method_call.method;
107 111
108 // It is an error to write to a register other than the current macro's ARG register before it 112 // It is an error to write to a register other than the current macro's ARG register before it
109 // has finished execution. 113 // has finished execution.
110 if (executing_macro != 0) { 114 if (executing_macro != 0) {
111 ASSERT(method_call.method == executing_macro + 1); 115 ASSERT(method == executing_macro + 1);
112 } 116 }
113 117
114 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 118 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
115 // uploaded to the GPU during initialization. 119 // uploaded to the GPU during initialization.
116 if (method_call.method >= MacroRegistersStart) { 120 if (method >= MacroRegistersStart) {
117 // We're trying to execute a macro 121 // We're trying to execute a macro
118 if (executing_macro == 0) { 122 if (executing_macro == 0) {
119 // A macro call must begin by writing the macro method's register, not its argument. 123 // A macro call must begin by writing the macro method's register, not its argument.
120 ASSERT_MSG((method_call.method % 2) == 0, 124 ASSERT_MSG((method % 2) == 0,
121 "Can't start macro execution by writing to the ARGS register"); 125 "Can't start macro execution by writing to the ARGS register");
122 executing_macro = method_call.method; 126 executing_macro = method;
123 } 127 }
124 128
125 macro_params.push_back(method_call.argument); 129 macro_params.push_back(method_call.argument);
@@ -131,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
131 return; 135 return;
132 } 136 }
133 137
134 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 138 ASSERT_MSG(method < Regs::NUM_REGS,
135 "Invalid Maxwell3D register, increase the size of the Regs structure"); 139 "Invalid Maxwell3D register, increase the size of the Regs structure");
136 140
137 if (debug_context) { 141 if (debug_context) {
138 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 142 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
139 } 143 }
140 144
141 if (regs.reg_array[method_call.method] != method_call.argument) { 145 if (regs.reg_array[method] != method_call.argument) {
142 regs.reg_array[method_call.method] = method_call.argument; 146 regs.reg_array[method] = method_call.argument;
143 // Color buffers 147 // Color buffers
144 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 148 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
145 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 149 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
146 if (method_call.method >= first_rt_reg && 150 if (method >= first_rt_reg &&
147 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 151 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
148 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 152 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
149 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 153 dirty_flags.color_buffer.set(rt_index);
150 } 154 }
151 155
152 // Zeta buffer 156 // Zeta buffer
153 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 157 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
154 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 158 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
155 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 159 method == MAXWELL3D_REG_INDEX(zeta_width) ||
156 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 160 method == MAXWELL3D_REG_INDEX(zeta_height) ||
157 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 161 (method >= MAXWELL3D_REG_INDEX(zeta) &&
158 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 162 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
159 dirty_flags.zeta_buffer = true; 163 dirty_flags.zeta_buffer = true;
160 } 164 }
161 165
162 // Shader 166 // Shader
163 constexpr u32 shader_registers_count = 167 constexpr u32 shader_registers_count =
164 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 168 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
165 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 169 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
166 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 170 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
167 dirty_flags.shaders = true; 171 dirty_flags.shaders = true;
168 } 172 }
169 173
170 // Vertex format 174 // Vertex format
171 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 175 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
172 method_call.method < 176 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
173 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
174 dirty_flags.vertex_attrib_format = true; 177 dirty_flags.vertex_attrib_format = true;
175 } 178 }
176 179
177 // Vertex buffer 180 // Vertex buffer
178 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 181 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
179 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 182 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
180 dirty_flags.vertex_array |= 183 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
181 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 184 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
182 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 185 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
183 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 186 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
184 dirty_flags.vertex_array |= 187 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
185 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 188 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
186 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 189 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
187 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
188 dirty_flags.vertex_array |=
189 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
190 } 190 }
191 } 191 }
192 192
193 switch (method_call.method) { 193 switch (method) {
194 case MAXWELL3D_REG_INDEX(macros.data): { 194 case MAXWELL3D_REG_INDEX(macros.data): {
195 ProcessMacroUpload(method_call.argument); 195 ProcessMacroUpload(method_call.argument);
196 break; 196 break;
@@ -270,10 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
270} 270}
271 271
272void Maxwell3D::ProcessQueryGet() { 272void Maxwell3D::ProcessQueryGet() {
273 GPUVAddr sequence_address = regs.query.QueryAddress(); 273 const GPUVAddr sequence_address{regs.query.QueryAddress()};
274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
275 // VAddr before writing. 275 // VAddr before writing.
276 std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
277 276
278 // TODO(Subv): Support the other query units. 277 // TODO(Subv): Support the other query units.
279 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 278 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -308,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
308 // Write the current query sequence to the sequence address. 307 // Write the current query sequence to the sequence address.
309 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 308 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
310 // query. 309 // query.
311 Memory::Write32(*address, sequence); 310 memory_manager.Write32(sequence_address, sequence);
312 } else { 311 } else {
313 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast 312 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
314 // GPU, this command may actually take a while to complete in real hardware due to GPU 313 // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -316,8 +315,8 @@ void Maxwell3D::ProcessQueryGet() {
316 LongQueryResult query_result{}; 315 LongQueryResult query_result{};
317 query_result.value = result; 316 query_result.value = result;
318 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming 317 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
319 query_result.timestamp = CoreTiming::GetTicks(); 318 query_result.timestamp = system.CoreTiming().GetTicks();
320 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 319 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
321 } 320 }
322 dirty_flags.OnMemoryWrite(); 321 dirty_flags.OnMemoryWrite();
323 break; 322 break;
@@ -333,7 +332,7 @@ void Maxwell3D::DrawArrays() {
333 regs.vertex_buffer.count); 332 regs.vertex_buffer.count);
334 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 333 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
335 334
336 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 335 auto debug_context = system.GetGPUDebugContext();
337 336
338 if (debug_context) { 337 if (debug_context) {
339 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); 338 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
@@ -386,16 +385,18 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
386 385
387void Maxwell3D::ProcessCBData(u32 value) { 386void Maxwell3D::ProcessCBData(u32 value) {
388 // Write the input value to the current const buffer at the current position. 387 // Write the input value to the current const buffer at the current position.
389 GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 388 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
390 ASSERT(buffer_address != 0); 389 ASSERT(buffer_address != 0);
391 390
392 // Don't allow writing past the end of the buffer. 391 // Don't allow writing past the end of the buffer.
393 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 392 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
394 393
395 std::optional<VAddr> address = 394 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
396 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 395
396 u8* ptr{memory_manager.GetPointer(address)};
397 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
398 memory_manager.Write32(address, value);
397 399
398 Memory::Write32(*address, value);
399 dirty_flags.OnMemoryWrite(); 400 dirty_flags.OnMemoryWrite();
400 401
401 // Increment the current buffer position. 402 // Increment the current buffer position.
@@ -403,22 +404,19 @@ void Maxwell3D::ProcessCBData(u32 value) {
403} 404}
404 405
405Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 406Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
406 GPUVAddr tic_base_address = regs.tic.TICAddress(); 407 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
407
408 GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
409 std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
410 408
411 Texture::TICEntry tic_entry; 409 Texture::TICEntry tic_entry;
412 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 410 memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
413 411
414 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 412 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
415 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 413 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
416 "TIC versions other than BlockLinear or Pitch are unimplemented"); 414 "TIC versions other than BlockLinear or Pitch are unimplemented");
417 415
418 auto r_type = tic_entry.r_type.Value(); 416 const auto r_type = tic_entry.r_type.Value();
419 auto g_type = tic_entry.g_type.Value(); 417 const auto g_type = tic_entry.g_type.Value();
420 auto b_type = tic_entry.b_type.Value(); 418 const auto b_type = tic_entry.b_type.Value();
421 auto a_type = tic_entry.a_type.Value(); 419 const auto a_type = tic_entry.a_type.Value();
422 420
423 // TODO(Subv): Different data types for separate components are not supported 421 // TODO(Subv): Different data types for separate components are not supported
424 ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); 422 ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -427,13 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
427} 425}
428 426
429Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 427Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
430 GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 428 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
431
432 GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
433 std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
434 429
435 Texture::TSCEntry tsc_entry; 430 Texture::TSCEntry tsc_entry;
436 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 431 memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
437 return tsc_entry; 432 return tsc_entry;
438} 433}
439 434
@@ -452,8 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
452 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 447 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
453 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 448 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
454 449
455 Texture::TextureHandle tex_handle{ 450 const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
456 Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
457 451
458 Texture::FullTextureInfo tex_info{}; 452 Texture::FullTextureInfo tex_info{};
459 // TODO(Subv): Use the shader to determine which textures are actually accessed. 453 // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -462,23 +456,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
462 sizeof(Texture::TextureHandle); 456 sizeof(Texture::TextureHandle);
463 457
464 // Load the TIC data. 458 // Load the TIC data.
465 if (tex_handle.tic_id != 0) { 459 auto tic_entry = GetTICEntry(tex_handle.tic_id);
466 tex_info.enabled = true; 460 // TODO(Subv): Workaround for BitField's move constructor being deleted.
467 461 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
468 auto tic_entry = GetTICEntry(tex_handle.tic_id);
469 // TODO(Subv): Workaround for BitField's move constructor being deleted.
470 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
471 }
472 462
473 // Load the TSC data 463 // Load the TSC data
474 if (tex_handle.tsc_id != 0) { 464 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
475 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); 465 // TODO(Subv): Workaround for BitField's move constructor being deleted.
476 // TODO(Subv): Workaround for BitField's move constructor being deleted. 466 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
477 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
478 }
479 467
480 if (tex_info.enabled) 468 textures.push_back(tex_info);
481 textures.push_back(tex_info);
482 } 469 }
483 470
484 return textures; 471 return textures;
@@ -490,31 +477,25 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
490 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; 477 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
491 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); 478 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
492 479
493 GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); 480 const GPUVAddr tex_info_address =
481 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
494 482
495 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); 483 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
496 484
497 std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); 485 const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
498 Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
499 486
500 Texture::FullTextureInfo tex_info{}; 487 Texture::FullTextureInfo tex_info{};
501 tex_info.index = static_cast<u32>(offset); 488 tex_info.index = static_cast<u32>(offset);
502 489
503 // Load the TIC data. 490 // Load the TIC data.
504 if (tex_handle.tic_id != 0) { 491 auto tic_entry = GetTICEntry(tex_handle.tic_id);
505 tex_info.enabled = true; 492 // TODO(Subv): Workaround for BitField's move constructor being deleted.
506 493 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
507 auto tic_entry = GetTICEntry(tex_handle.tic_id);
508 // TODO(Subv): Workaround for BitField's move constructor being deleted.
509 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
510 }
511 494
512 // Load the TSC data 495 // Load the TSC data
513 if (tex_handle.tsc_id != 0) { 496 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
514 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); 497 // TODO(Subv): Workaround for BitField's move constructor being deleted.
515 // TODO(Subv): Workaround for BitField's move constructor being deleted. 498 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
516 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
517 }
518 499
519 return tex_info; 500 return tex_info;
520} 501}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1f76aa670..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -17,6 +19,10 @@
17#include "video_core/memory_manager.h" 19#include "video_core/memory_manager.h"
18#include "video_core/textures/texture.h" 20#include "video_core/textures/texture.h"
19 21
22namespace Core {
23class System;
24}
25
20namespace VideoCore { 26namespace VideoCore {
21class RasterizerInterface; 27class RasterizerInterface;
22} 28}
@@ -28,7 +34,8 @@ namespace Tegra::Engines {
28 34
29class Maxwell3D final { 35class Maxwell3D final {
30public: 36public:
31 explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 37 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
38 MemoryManager& memory_manager);
32 ~Maxwell3D() = default; 39 ~Maxwell3D() = default;
33 40
34 /// Register structure of the Maxwell3D engine. 41 /// Register structure of the Maxwell3D engine.
@@ -498,7 +505,7 @@ public:
498 f32 translate_z; 505 f32 translate_z;
499 INSERT_PADDING_WORDS(2); 506 INSERT_PADDING_WORDS(2);
500 507
501 MathUtil::Rectangle<s32> GetRect() const { 508 Common::Rectangle<s32> GetRect() const {
502 return { 509 return {
503 GetX(), // left 510 GetX(), // left
504 GetY() + GetHeight(), // top 511 GetY() + GetHeight(), // top
@@ -1089,19 +1096,18 @@ public:
1089 MemoryManager& memory_manager; 1096 MemoryManager& memory_manager;
1090 1097
1091 struct DirtyFlags { 1098 struct DirtyFlags {
1092 u8 color_buffer = 0xFF; 1099 std::bitset<8> color_buffer{0xFF};
1093 bool zeta_buffer = true; 1100 std::bitset<32> vertex_array{0xFFFFFFFF};
1094
1095 bool shaders = true;
1096 1101
1097 bool vertex_attrib_format = true; 1102 bool vertex_attrib_format = true;
1098 u32 vertex_array = 0xFFFFFFFF; 1103 bool zeta_buffer = true;
1104 bool shaders = true;
1099 1105
1100 void OnMemoryWrite() { 1106 void OnMemoryWrite() {
1101 color_buffer = 0xFF;
1102 zeta_buffer = true; 1107 zeta_buffer = true;
1103 shaders = true; 1108 shaders = true;
1104 vertex_array = 0xFFFFFFFF; 1109 color_buffer.set();
1110 vertex_array.set();
1105 } 1111 }
1106 }; 1112 };
1107 1113
@@ -1131,6 +1137,8 @@ public:
1131private: 1137private:
1132 void InitializeRegisterDefaults(); 1138 void InitializeRegisterDefaults();
1133 1139
1140 Core::System& system;
1141
1134 VideoCore::RasterizerInterface& rasterizer; 1142 VideoCore::RasterizerInterface& rasterizer;
1135 1143
1136 /// Start offsets of each macro in macro_memory 1144 /// Start offsets of each macro in macro_memory
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
deleted file mode 100644
index 656db6a61..000000000
--- a/src/video_core/engines/maxwell_compute.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/core.h"
7#include "video_core/engines/maxwell_compute.h"
8
9namespace Tegra::Engines {
10
11void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
12 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
13 "Invalid MaxwellCompute register, increase the size of the Regs structure");
14
15 regs.reg_array[method_call.method] = method_call.argument;
16
17 switch (method_call.method) {
18 case MAXWELL_COMPUTE_REG_INDEX(compute): {
19 LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
20 UNREACHABLE();
21 break;
22 }
23 default:
24 break;
25 }
26}
27
28} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 06462f570..a0ded4c25 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,17 +2,21 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
6#include "common/logging/log.h"
5#include "core/core.h" 7#include "core/core.h"
6#include "core/memory.h" 8#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
9#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
10#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
11 14
12namespace Tegra::Engines { 15namespace Tegra::Engines {
13 16
14MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 17MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 MemoryManager& memory_manager)
19 : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
16 20
17void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 21void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 22 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -39,9 +43,6 @@ void MaxwellDMA::HandleCopy() {
39 const GPUVAddr source = regs.src_address.Address(); 43 const GPUVAddr source = regs.src_address.Address();
40 const GPUVAddr dest = regs.dst_address.Address(); 44 const GPUVAddr dest = regs.dst_address.Address();
41 45
42 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
43 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
44
45 // TODO(Subv): Perform more research and implement all features of this engine. 46 // TODO(Subv): Perform more research and implement all features of this engine.
46 ASSERT(regs.exec.enable_swizzle == 0); 47 ASSERT(regs.exec.enable_swizzle == 0);
47 ASSERT(regs.exec.query_mode == Regs::QueryMode::None); 48 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -57,14 +58,14 @@ void MaxwellDMA::HandleCopy() {
57 } 58 }
58 59
59 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
61 62
62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 65 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
65 // y_count). 66 // y_count).
66 if (!regs.exec.enable_2d) { 67 if (!regs.exec.enable_2d) {
67 Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count); 68 memory_manager.CopyBlock(dest, source, regs.x_count);
68 return; 69 return;
69 } 70 }
70 71
@@ -73,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
73 // rectangle. There is no need to manually flush/invalidate the regions because 74 // rectangle. There is no need to manually flush/invalidate the regions because
74 // CopyBlock does that for us. 75 // CopyBlock does that for us.
75 for (u32 line = 0; line < regs.y_count; ++line) { 76 for (u32 line = 0; line < regs.y_count; ++line) {
76 const VAddr source_line = source_cpu + line * regs.src_pitch; 77 const GPUVAddr source_line = source + line * regs.src_pitch;
77 const VAddr dest_line = dest_cpu + line * regs.dst_pitch; 78 const GPUVAddr dest_line = dest + line * regs.dst_pitch;
78 Memory::CopyBlock(dest_line, source_line, regs.x_count); 79 memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
79 } 80 }
80 return; 81 return;
81 } 82 }
@@ -84,15 +85,18 @@ void MaxwellDMA::HandleCopy() {
84 85
85 const std::size_t copy_size = regs.x_count * regs.y_count; 86 const std::size_t copy_size = regs.x_count * regs.y_count;
86 87
88 auto source_ptr{memory_manager.GetPointer(source)};
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90
87 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 91 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
88 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 92 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
89 // copying. 93 // copying.
90 rasterizer.FlushRegion(source_cpu, src_size); 94 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
91 95
92 // We have to invalidate the destination region to evict any outdated surfaces from the 96 // We have to invalidate the destination region to evict any outdated surfaces from the
93 // cache. We do this before actually writing the new data because the destination address 97 // cache. We do this before actually writing the new data because the destination address
94 // might contain a dirty surface that will have to be written back to memory. 98 // might contain a dirty surface that will have to be written back to memory.
95 rasterizer.InvalidateRegion(dest_cpu, dst_size); 99 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
96 }; 100 };
97 101
98 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 102 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,7 +109,7 @@ void MaxwellDMA::HandleCopy() {
105 copy_size * src_bytes_per_pixel); 109 copy_size * src_bytes_per_pixel);
106 110
107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 111 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
108 regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu, 112 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
109 regs.src_params.BlockHeight(), regs.src_params.pos_x, 113 regs.src_params.BlockHeight(), regs.src_params.pos_x,
110 regs.src_params.pos_y); 114 regs.src_params.pos_y);
111 } else { 115 } else {
@@ -119,7 +123,7 @@ void MaxwellDMA::HandleCopy() {
119 123
120 // If the input is linear and the output is tiled, swizzle the input and copy it over. 124 // If the input is linear and the output is tiled, swizzle the input and copy it over.
121 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 125 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
122 src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight()); 126 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
123 } 127 }
124} 128}
125 129
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 1f8cd65d2..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,13 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace VideoCore { 19namespace VideoCore {
16class RasterizerInterface; 20class RasterizerInterface;
17} 21}
@@ -20,7 +24,8 @@ namespace Tegra::Engines {
20 24
21class MaxwellDMA final { 25class MaxwellDMA final {
22public: 26public:
23 explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 27 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
28 MemoryManager& memory_manager);
24 ~MaxwellDMA() = default; 29 ~MaxwellDMA() = default;
25 30
26 /// Write the value to the register identified by method. 31 /// Write the value to the register identified by method.
@@ -137,6 +142,8 @@ public:
137 MemoryManager& memory_manager; 142 MemoryManager& memory_manager;
138 143
139private: 144private:
145 Core::System& system;
146
140 VideoCore::RasterizerInterface& rasterizer; 147 VideoCore::RasterizerInterface& rasterizer;
141 148
142 /// Performs the copy from the source buffer to the destination buffer as configured in the 149 /// Performs the copy from the source buffer to the destination buffer as configured in the
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 269df9437..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
6 6
7#include <bitset> 7#include <bitset>
8#include <optional> 8#include <optional>
9#include <string>
10#include <tuple> 9#include <tuple>
11#include <vector> 10#include <vector>
12 11
@@ -186,7 +185,7 @@ enum class SubOp : u64 {
186}; 185};
187 186
188enum class F2iRoundingOp : u64 { 187enum class F2iRoundingOp : u64 {
189 None = 0, 188 RoundEven = 0,
190 Floor = 1, 189 Floor = 1,
191 Ceil = 2, 190 Ceil = 2,
192 Trunc = 3, 191 Trunc = 3,
@@ -325,11 +324,11 @@ enum class TextureQueryType : u64 {
325 324
326enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
327 None = 0, 326 None = 0,
328 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
329 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
330 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
331 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
332 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
333}; 332};
334 333
335enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -376,9 +375,9 @@ enum class R2pMode : u64 {
376}; 375};
377 376
378enum class IpaInterpMode : u64 { 377enum class IpaInterpMode : u64 {
379 Linear = 0, 378 Pass = 0,
380 Perspective = 1, 379 Multiply = 1,
381 Flat = 2, 380 Constant = 2,
382 Sc = 3, 381 Sc = 3,
383}; 382};
384 383
@@ -1446,6 +1445,7 @@ public:
1446 Flow, 1445 Flow,
1447 Synch, 1446 Synch,
1448 Memory, 1447 Memory,
1448 Texture,
1449 FloatSet, 1449 FloatSet,
1450 FloatSetPredicate, 1450 FloatSetPredicate,
1451 IntegerSet, 1451 IntegerSet,
@@ -1576,14 +1576,14 @@ private:
1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1579 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1579 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1580 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1580 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1581 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1581 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1582 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1582 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1583 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1583 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1584 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1584 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1585 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1585 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1586 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1586 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index cf2b76ff6..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 16 TriangleStrip = 7,
17}; 17};
18 18
19enum class AttributeUse : u8 {
20 Unused = 0,
21 Constant = 1,
22 Perspective = 2,
23 ScreenLinear = 3,
24};
25
19// Documentation in: 26// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header { 28struct Header {
@@ -84,9 +91,15 @@ struct Header {
84 } vtg; 91 } vtg;
85 92
86 struct { 93 struct {
87 INSERT_PADDING_BYTES(3); // ImapSystemValuesA 94 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
88 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 95 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
89 INSERT_PADDING_BYTES(32); // ImapGenericVector[32] 96 union {
97 BitField<0, 2, AttributeUse> x;
98 BitField<2, 2, AttributeUse> y;
99 BitField<4, 2, AttributeUse> w;
100 BitField<6, 2, AttributeUse> z;
101 u8 raw;
102 } imap_generic_vector[32];
90 INSERT_PADDING_BYTES(2); // ImapColor 103 INSERT_PADDING_BYTES(2); // ImapColor
91 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 104 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
92 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] 105 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
103 const u32 bit = render_target * 4 + component; 116 const u32 bit = render_target * 4 + component;
104 return omap.target & (1 << bit); 117 return omap.target & (1 << bit);
105 } 118 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
120 return static_cast<AttributeUse>(
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
122 }
123 AttributeUse GetAttributeUse(u32 attribute) const {
124 AttributeUse result = AttributeUse::Unused;
125 for (u32 i = 0; i < 4; i++) {
126 const auto index = GetAttributeIndexUse(attribute, i);
127 if (index == AttributeUse::Unused) {
128 continue;
129 }
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue;
133 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
135 if (index == AttributeUse::Perspective) {
136 result = index;
137 }
138 }
139 return result;
140 }
106 } ps; 141 } ps;
107 }; 142 };
108 143
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d3d32a359..66c690494 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,21 +3,23 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/core.h"
6#include "core/core_timing.h" 7#include "core/core_timing.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/fermi_2d.h" 9#include "video_core/engines/fermi_2d.h"
10#include "video_core/engines/kepler_compute.h"
9#include "video_core/engines/kepler_memory.h" 11#include "video_core/engines/kepler_memory.h"
10#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/maxwell_compute.h"
12#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/renderer_base.h"
15 16
16namespace Tegra { 17namespace Tegra {
17 18
18u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { 19u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
19 switch (format) { 20 switch (format) {
20 case PixelFormat::ABGR8: 21 case PixelFormat::ABGR8:
22 case PixelFormat::BGRA8:
21 return 4; 23 return 4;
22 default: 24 default:
23 return 4; 25 return 4;
@@ -26,14 +28,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
26 UNREACHABLE(); 28 UNREACHABLE();
27} 29}
28 30
29GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { 31GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
32 auto& rasterizer{renderer.Rasterizer()};
30 memory_manager = std::make_unique<Tegra::MemoryManager>(); 33 memory_manager = std::make_unique<Tegra::MemoryManager>();
31 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 34 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
32 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); 35 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
33 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 36 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
34 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 37 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
35 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); 38 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
36 kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); 39 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
37} 40}
38 41
39GPU::~GPU() = default; 42GPU::~GPU() = default;
@@ -245,8 +248,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
245 case EngineID::MAXWELL_B: 248 case EngineID::MAXWELL_B:
246 maxwell_3d->CallMethod(method_call); 249 maxwell_3d->CallMethod(method_call);
247 break; 250 break;
248 case EngineID::MAXWELL_COMPUTE_B: 251 case EngineID::KEPLER_COMPUTE_B:
249 maxwell_compute->CallMethod(method_call); 252 kepler_compute->CallMethod(method_call);
250 break; 253 break;
251 case EngineID::MAXWELL_DMA_COPY_A: 254 case EngineID::MAXWELL_DMA_COPY_A:
252 maxwell_dma->CallMethod(method_call); 255 maxwell_dma->CallMethod(method_call);
@@ -271,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
271 const auto op = 274 const auto op =
272 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); 275 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
273 if (op == GpuSemaphoreOperation::WriteLong) { 276 if (op == GpuSemaphoreOperation::WriteLong) {
274 auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
275 struct Block { 277 struct Block {
276 u32 sequence; 278 u32 sequence;
277 u32 zeros = 0; 279 u32 zeros = 0;
@@ -282,12 +284,10 @@ void GPU::ProcessSemaphoreTriggerMethod() {
282 block.sequence = regs.semaphore_sequence; 284 block.sequence = regs.semaphore_sequence;
283 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of 285 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
284 // CoreTiming 286 // CoreTiming
285 block.timestamp = CoreTiming::GetTicks(); 287 block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
286 Memory::WriteBlock(*address, &block, sizeof(block)); 288 memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
287 } else { 289 } else {
288 const auto address = 290 const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
289 memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
290 const u32 word = Memory::Read32(*address);
291 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || 291 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
292 (op == GpuSemaphoreOperation::AcquireGequal && 292 (op == GpuSemaphoreOperation::AcquireGequal &&
293 static_cast<s32>(word - regs.semaphore_sequence) > 0) || 293 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -314,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
314} 314}
315 315
316void GPU::ProcessSemaphoreRelease() { 316void GPU::ProcessSemaphoreRelease() {
317 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); 317 memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
318 Memory::Write32(*address, regs.semaphore_release);
319} 318}
320 319
321void GPU::ProcessSemaphoreAcquire() { 320void GPU::ProcessSemaphoreAcquire() {
322 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); 321 const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
323 const u32 word = Memory::Read32(*address);
324 const auto value = regs.semaphore_acquire; 322 const auto value = regs.semaphore_acquire;
325 if (word != value) { 323 if (word != value) {
326 regs.acquire_active = true; 324 regs.acquire_active = true;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fb8975811..a14b95c30 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,16 +6,24 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <vector>
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "core/hle/service/nvflinger/buffer_queue.h" 10#include "core/hle/service/nvflinger/buffer_queue.h"
12#include "video_core/dma_pusher.h" 11#include "video_core/dma_pusher.h"
13#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
14 13
15namespace VideoCore { 14using CacheAddr = std::uintptr_t;
16class RasterizerInterface; 15inline CacheAddr ToCacheAddr(const void* host_ptr) {
16 return reinterpret_cast<CacheAddr>(host_ptr);
17}
18
19namespace Core {
20class System;
17} 21}
18 22
23namespace VideoCore {
24class RendererBase;
25} // namespace VideoCore
26
19namespace Tegra { 27namespace Tegra {
20 28
21enum class RenderTargetFormat : u32 { 29enum class RenderTargetFormat : u32 {
@@ -80,6 +88,7 @@ class DebugContext;
80struct FramebufferConfig { 88struct FramebufferConfig {
81 enum class PixelFormat : u32 { 89 enum class PixelFormat : u32 {
82 ABGR8 = 1, 90 ABGR8 = 1,
91 BGRA8 = 5,
83 }; 92 };
84 93
85 /** 94 /**
@@ -96,29 +105,30 @@ struct FramebufferConfig {
96 105
97 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; 106 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
98 TransformFlags transform_flags; 107 TransformFlags transform_flags;
99 MathUtil::Rectangle<int> crop_rect; 108 Common::Rectangle<int> crop_rect;
100}; 109};
101 110
102namespace Engines { 111namespace Engines {
103class Fermi2D; 112class Fermi2D;
104class Maxwell3D; 113class Maxwell3D;
105class MaxwellCompute;
106class MaxwellDMA; 114class MaxwellDMA;
115class KeplerCompute;
107class KeplerMemory; 116class KeplerMemory;
108} // namespace Engines 117} // namespace Engines
109 118
110enum class EngineID { 119enum class EngineID {
111 FERMI_TWOD_A = 0x902D, // 2D Engine 120 FERMI_TWOD_A = 0x902D, // 2D Engine
112 MAXWELL_B = 0xB197, // 3D Engine 121 MAXWELL_B = 0xB197, // 3D Engine
113 MAXWELL_COMPUTE_B = 0xB1C0, 122 KEPLER_COMPUTE_B = 0xB1C0,
114 KEPLER_INLINE_TO_MEMORY_B = 0xA140, 123 KEPLER_INLINE_TO_MEMORY_B = 0xA140,
115 MAXWELL_DMA_COPY_A = 0xB0B5, 124 MAXWELL_DMA_COPY_A = 0xB0B5,
116}; 125};
117 126
118class GPU final { 127class GPU {
119public: 128public:
120 explicit GPU(VideoCore::RasterizerInterface& rasterizer); 129 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
121 ~GPU(); 130
131 virtual ~GPU();
122 132
123 struct MethodCall { 133 struct MethodCall {
124 u32 method{}; 134 u32 method{};
@@ -196,8 +206,42 @@ public:
196 }; 206 };
197 } regs{}; 207 } regs{};
198 208
209 /// Push GPU command entries to be processed
210 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
211
212 /// Swap buffers (render frame)
213 virtual void SwapBuffers(
214 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
215
216 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
217 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
218
219 /// Notify rasterizer that any caches of the specified region should be invalidated
220 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
221
222 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
223 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
224
199private: 225private:
226 void ProcessBindMethod(const MethodCall& method_call);
227 void ProcessSemaphoreTriggerMethod();
228 void ProcessSemaphoreRelease();
229 void ProcessSemaphoreAcquire();
230
231 /// Calls a GPU puller method.
232 void CallPullerMethod(const MethodCall& method_call);
233
234 /// Calls a GPU engine method.
235 void CallEngineMethod(const MethodCall& method_call);
236
237 /// Determines where the method should be executed.
238 bool ExecuteMethodOnEngine(const MethodCall& method_call);
239
240protected:
200 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 241 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
242 VideoCore::RendererBase& renderer;
243
244private:
201 std::unique_ptr<Tegra::MemoryManager> memory_manager; 245 std::unique_ptr<Tegra::MemoryManager> memory_manager;
202 246
203 /// Mapping of command subchannels to their bound engine ids. 247 /// Mapping of command subchannels to their bound engine ids.
@@ -208,23 +252,11 @@ private:
208 /// 2D engine 252 /// 2D engine
209 std::unique_ptr<Engines::Fermi2D> fermi_2d; 253 std::unique_ptr<Engines::Fermi2D> fermi_2d;
210 /// Compute engine 254 /// Compute engine
211 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; 255 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
212 /// DMA engine 256 /// DMA engine
213 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 257 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
214 /// Inline memory engine 258 /// Inline memory engine
215 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 259 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
216
217 void ProcessBindMethod(const MethodCall& method_call);
218 void ProcessSemaphoreTriggerMethod();
219 void ProcessSemaphoreRelease();
220 void ProcessSemaphoreAcquire();
221
222 // Calls a GPU puller method.
223 void CallPullerMethod(const MethodCall& method_call);
224 // Calls a GPU engine method.
225 void CallEngineMethod(const MethodCall& method_call);
226 // Determines where the method should be executed.
227 bool ExecuteMethodOnEngine(const MethodCall& method_call);
228}; 260};
229 261
230#define ASSERT_REG_POSITION(field_name, position) \ 262#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..8b355cf7b
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..1dcc61a6c
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..2cfc900ed
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..766b5631c
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..086b2f625
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h"
8#include "video_core/dma_pusher.h"
9#include "video_core/gpu.h"
10#include "video_core/gpu_thread.h"
11#include "video_core/renderer_base.h"
12
13namespace VideoCommon::GPUThread {
14
15/// Runs the GPU thread
16static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
17 SynchState& state) {
18 MicroProfileOnThreadCreate("GpuThread");
19
20 // Wait for first GPU command before acquiring the window context
21 state.WaitForCommands();
22
23 // If emulation was stopped during disk shader loading, abort before trying to acquire context
24 if (!state.is_running) {
25 return;
26 }
27
28 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
29
30 CommandDataContainer next;
31 while (state.is_running) {
32 state.WaitForCommands();
33 while (!state.queue.Empty()) {
34 state.queue.Pop(next);
35 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
36 dma_pusher.Push(std::move(submit_list->entries));
37 dma_pusher.DispatchCalls();
38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
39 state.DecrementFramesCounter();
40 renderer.SwapBuffers(std::move(data->framebuffer));
41 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
42 renderer.Rasterizer().FlushRegion(data->addr, data->size);
43 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
44 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
46 return;
47 } else {
48 UNREACHABLE();
49 }
50 }
51 }
52}
53
54ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
55 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
56 std::ref(dma_pusher), std::ref(state)} {}
57
58ThreadManager::~ThreadManager() {
59 // Notify GPU thread that a shutdown is pending
60 PushCommand(EndProcessingCommand());
61 thread.join();
62}
63
64void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
65 PushCommand(SubmitListCommand(std::move(entries)));
66}
67
68void ThreadManager::SwapBuffers(
69 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
70 state.IncrementFramesCounter();
71 PushCommand(SwapBuffersCommand(std::move(framebuffer)));
72 state.WaitForFrames();
73}
74
75void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
76 PushCommand(FlushRegionCommand(addr, size));
77}
78
79void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
80 if (state.queue.Empty()) {
81 // It's quicker to invalidate a single region on the CPU if the queue is already empty
82 renderer.Rasterizer().InvalidateRegion(addr, size);
83 } else {
84 PushCommand(InvalidateRegionCommand(addr, size));
85 }
86}
87
88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size);
91}
92
93void ThreadManager::PushCommand(CommandData&& command_data) {
94 state.queue.Push(CommandDataContainer(std::move(command_data)));
95 state.SignalCommands();
96}
97
98} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..8cd7db1c6
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,185 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <thread>
14#include <variant>
15
16#include "common/threadsafe_queue.h"
17#include "video_core/gpu.h"
18
19namespace Tegra {
20struct FramebufferConfig;
21class DmaPusher;
22} // namespace Tegra
23
24namespace VideoCore {
25class RendererBase;
26} // namespace VideoCore
27
28namespace VideoCommon::GPUThread {
29
30/// Command to signal to the GPU thread that processing has ended
31struct EndProcessingCommand final {};
32
33/// Command to signal to the GPU thread that a command list is ready for processing
34struct SubmitListCommand final {
35 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
36
37 Tegra::CommandList entries;
38};
39
40/// Command to signal to the GPU thread that a swap buffers is pending
41struct SwapBuffersCommand final {
42 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
43 : framebuffer{std::move(framebuffer)} {}
44
45 std::optional<Tegra::FramebufferConfig> framebuffer;
46};
47
48/// Command to signal to the GPU thread to flush a region
49struct FlushRegionCommand final {
50 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
51
52 CacheAddr addr;
53 u64 size;
54};
55
56/// Command to signal to the GPU thread to invalidate a region
57struct InvalidateRegionCommand final {
58 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
59
60 CacheAddr addr;
61 u64 size;
62};
63
64/// Command to signal to the GPU thread to flush and invalidate a region
65struct FlushAndInvalidateRegionCommand final {
66 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
67 : addr{addr}, size{size} {}
68
69 CacheAddr addr;
70 u64 size;
71};
72
73using CommandData =
74 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
75 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
76
77struct CommandDataContainer {
78 CommandDataContainer() = default;
79
80 CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
81
82 CommandDataContainer& operator=(const CommandDataContainer& t) {
83 data = std::move(t.data);
84 return *this;
85 }
86
87 CommandData data;
88};
89
90/// Struct used to synchronize the GPU thread
91struct SynchState final {
92 std::atomic_bool is_running{true};
93 std::atomic_int queued_frame_count{};
94 std::mutex frames_mutex;
95 std::mutex commands_mutex;
96 std::condition_variable commands_condition;
97 std::condition_variable frames_condition;
98
99 void IncrementFramesCounter() {
100 std::lock_guard<std::mutex> lock{frames_mutex};
101 ++queued_frame_count;
102 }
103
104 void DecrementFramesCounter() {
105 {
106 std::lock_guard<std::mutex> lock{frames_mutex};
107 --queued_frame_count;
108
109 if (queued_frame_count) {
110 return;
111 }
112 }
113 frames_condition.notify_one();
114 }
115
116 void WaitForFrames() {
117 {
118 std::lock_guard<std::mutex> lock{frames_mutex};
119 if (!queued_frame_count) {
120 return;
121 }
122 }
123
124 // Wait for the GPU to be idle (all commands to be executed)
125 {
126 std::unique_lock<std::mutex> lock{frames_mutex};
127 frames_condition.wait(lock, [this] { return !queued_frame_count; });
128 }
129 }
130
131 void SignalCommands() {
132 {
133 std::unique_lock<std::mutex> lock{commands_mutex};
134 if (queue.Empty()) {
135 return;
136 }
137 }
138
139 commands_condition.notify_one();
140 }
141
142 void WaitForCommands() {
143 std::unique_lock<std::mutex> lock{commands_mutex};
144 commands_condition.wait(lock, [this] { return !queue.Empty(); });
145 }
146
147 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
148 CommandQueue queue;
149};
150
151/// Class used to manage the GPU thread
152class ThreadManager final {
153public:
154 explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
155 ~ThreadManager();
156
157 /// Push GPU command entries to be processed
158 void SubmitList(Tegra::CommandList&& entries);
159
160 /// Swap buffers (render frame)
161 void SwapBuffers(
162 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
163
164 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
165 void FlushRegion(CacheAddr addr, u64 size);
166
167 /// Notify rasterizer that any caches of the specified region should be invalidated
168 void InvalidateRegion(CacheAddr addr, u64 size);
169
170 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
171 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
172
173private:
174 /// Pushes a command to be executed by the GPU thread
175 void PushCommand(CommandData&& command_data);
176
177private:
178 SynchState state;
179 VideoCore::RendererBase& renderer;
180 Tegra::DmaPusher& dma_pusher;
181 std::thread thread;
182 std::thread::id thread_id;
183};
184
185} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 47247f097..8e8f36f28 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,6 +5,7 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/memory.h"
8#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
9 10
10namespace Tegra { 11namespace Tegra {
@@ -154,22 +155,59 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
154 const VAddr base_addr{PageSlot(gpu_addr)}; 155 const VAddr base_addr{PageSlot(gpu_addr)};
155 156
156 if (base_addr == static_cast<u64>(PageStatus::Allocated) || 157 if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
157 base_addr == static_cast<u64>(PageStatus::Unmapped)) { 158 base_addr == static_cast<u64>(PageStatus::Unmapped) ||
159 base_addr == static_cast<u64>(PageStatus::Reserved)) {
158 return {}; 160 return {};
159 } 161 }
160 162
161 return base_addr + (gpu_addr & PAGE_MASK); 163 return base_addr + (gpu_addr & PAGE_MASK);
162} 164}
163 165
164std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { 166u8 MemoryManager::Read8(GPUVAddr addr) {
165 std::vector<GPUVAddr> results; 167 return Memory::Read8(*GpuToCpuAddress(addr));
166 for (const auto& region : mapped_regions) { 168}
167 if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { 169
168 const u64 offset{cpu_addr - region.cpu_addr}; 170u16 MemoryManager::Read16(GPUVAddr addr) {
169 results.push_back(region.gpu_addr + offset); 171 return Memory::Read16(*GpuToCpuAddress(addr));
170 } 172}
171 } 173
172 return results; 174u32 MemoryManager::Read32(GPUVAddr addr) {
175 return Memory::Read32(*GpuToCpuAddress(addr));
176}
177
178u64 MemoryManager::Read64(GPUVAddr addr) {
179 return Memory::Read64(*GpuToCpuAddress(addr));
180}
181
182void MemoryManager::Write8(GPUVAddr addr, u8 data) {
183 Memory::Write8(*GpuToCpuAddress(addr), data);
184}
185
186void MemoryManager::Write16(GPUVAddr addr, u16 data) {
187 Memory::Write16(*GpuToCpuAddress(addr), data);
188}
189
190void MemoryManager::Write32(GPUVAddr addr, u32 data) {
191 Memory::Write32(*GpuToCpuAddress(addr), data);
192}
193
194void MemoryManager::Write64(GPUVAddr addr, u64 data) {
195 Memory::Write64(*GpuToCpuAddress(addr), data);
196}
197
198u8* MemoryManager::GetPointer(GPUVAddr addr) {
199 return Memory::GetPointer(*GpuToCpuAddress(addr));
200}
201
202void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
203 std::memcpy(dest_buffer, GetPointer(src_addr), size);
204}
205void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
206 std::memcpy(GetPointer(dest_addr), src_buffer, size);
207}
208
209void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
210 std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
173} 211}
174 212
175VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { 213VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..425e2f31c 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -27,12 +27,27 @@ public:
27 GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); 27 GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
28 GPUVAddr GetRegionEnd(GPUVAddr region_start) const; 28 GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
29 std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); 29 std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
30 std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
31 30
32 static constexpr u64 PAGE_BITS = 16; 31 static constexpr u64 PAGE_BITS = 16;
33 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; 32 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
34 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 33 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
35 34
35 u8 Read8(GPUVAddr addr);
36 u16 Read16(GPUVAddr addr);
37 u32 Read32(GPUVAddr addr);
38 u64 Read64(GPUVAddr addr);
39
40 void Write8(GPUVAddr addr, u8 data);
41 void Write16(GPUVAddr addr, u16 data);
42 void Write32(GPUVAddr addr, u32 data);
43 void Write64(GPUVAddr addr, u64 data);
44
45 u8* GetPointer(GPUVAddr vaddr);
46
47 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
48 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
49 void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
50
36private: 51private:
37 enum class PageStatus : u64 { 52 enum class PageStatus : u64 {
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 53 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index b68f4fb13..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h" 9#include "video_core/morton.h"
11#include "video_core/surface.h" 10#include "video_core/surface.h"
12#include "video_core/textures/decoders.h" 11#include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
16using Surface::GetBytesPerPixel; 15using Surface::GetBytesPerPixel;
17using Surface::PixelFormat; 16using Surface::PixelFormat;
18 17
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); 18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; 19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21 20
22template <bool morton_to_linear, PixelFormat format> 21template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, 22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) { 23 u32 tile_width_spacing, u8* buffer, u8* addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); 24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26 25
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
34 stride, height, depth, block_height, block_depth, 33 stride, height, depth, block_height, block_depth,
35 tile_width_spacing); 34 tile_width_spacing);
36 } else { 35 } else {
37 Tegra::Texture::CopySwizzledData( 36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
38 (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, 37 (height + tile_size_y - 1) / tile_size_y, depth,
39 depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, 38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
40 block_height, block_depth, tile_width_spacing); 39 block_height, block_depth, tile_width_spacing);
41 } 40 }
42} 41}
43 42
44static constexpr ConversionArray morton_to_linear_fns = { 43static constexpr ConversionArray morton_to_linear_fns = {
45 // clang-format off 44 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8U>, 45 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8S>, 46 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::ABGR8UI>, 47 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::B5G6R5U>, 48 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A2B10G10R10U>, 49 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::A1B5G5R5U>, 50 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8U>, 51 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::R8UI>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16F>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16U>, 54 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT1>, 58 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT23>, 59 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXT45>, 60 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN1>, 61 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2UNORM>, 62 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::DXN2SNORM>, 63 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC7U>, 64 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_UF16>, 65 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::BC6H_SF16>, 66 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::ASTC_2D_4X4>, 67 MortonCopy<true, PixelFormat::BGRA8>,
69 MortonCopy<true, PixelFormat::BGRA8>, 68 MortonCopy<true, PixelFormat::RGBA32F>,
70 MortonCopy<true, PixelFormat::RGBA32F>, 69 MortonCopy<true, PixelFormat::RG32F>,
71 MortonCopy<true, PixelFormat::RG32F>, 70 MortonCopy<true, PixelFormat::R32F>,
72 MortonCopy<true, PixelFormat::R32F>, 71 MortonCopy<true, PixelFormat::R16F>,
73 MortonCopy<true, PixelFormat::R16F>, 72 MortonCopy<true, PixelFormat::R16U>,
74 MortonCopy<true, PixelFormat::R16U>, 73 MortonCopy<true, PixelFormat::R16S>,
75 MortonCopy<true, PixelFormat::R16S>, 74 MortonCopy<true, PixelFormat::R16UI>,
76 MortonCopy<true, PixelFormat::R16UI>, 75 MortonCopy<true, PixelFormat::R16I>,
77 MortonCopy<true, PixelFormat::R16I>, 76 MortonCopy<true, PixelFormat::RG16>,
78 MortonCopy<true, PixelFormat::RG16>, 77 MortonCopy<true, PixelFormat::RG16F>,
79 MortonCopy<true, PixelFormat::RG16F>, 78 MortonCopy<true, PixelFormat::RG16UI>,
80 MortonCopy<true, PixelFormat::RG16UI>, 79 MortonCopy<true, PixelFormat::RG16I>,
81 MortonCopy<true, PixelFormat::RG16I>, 80 MortonCopy<true, PixelFormat::RG16S>,
82 MortonCopy<true, PixelFormat::RG16S>, 81 MortonCopy<true, PixelFormat::RGB32F>,
83 MortonCopy<true, PixelFormat::RGB32F>, 82 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
84 MortonCopy<true, PixelFormat::RGBA8_SRGB>, 83 MortonCopy<true, PixelFormat::RG8U>,
85 MortonCopy<true, PixelFormat::RG8U>, 84 MortonCopy<true, PixelFormat::RG8S>,
86 MortonCopy<true, PixelFormat::RG8S>, 85 MortonCopy<true, PixelFormat::RG32UI>,
87 MortonCopy<true, PixelFormat::RG32UI>, 86 MortonCopy<true, PixelFormat::R32UI>,
88 MortonCopy<true, PixelFormat::R32UI>, 87 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
89 MortonCopy<true, PixelFormat::ASTC_2D_8X8>, 88 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X5>, 89 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
91 MortonCopy<true, PixelFormat::ASTC_2D_5X4>, 90 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
92 MortonCopy<true, PixelFormat::BGRA8_SRGB>, 91 MortonCopy<true, PixelFormat::DXT1_SRGB>,
93 MortonCopy<true, PixelFormat::DXT1_SRGB>, 92 MortonCopy<true, PixelFormat::DXT23_SRGB>,
94 MortonCopy<true, PixelFormat::DXT23_SRGB>, 93 MortonCopy<true, PixelFormat::DXT45_SRGB>,
95 MortonCopy<true, PixelFormat::DXT45_SRGB>, 94 MortonCopy<true, PixelFormat::BC7U_SRGB>,
96 MortonCopy<true, PixelFormat::BC7U_SRGB>, 95 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
97 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, 96 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, 97 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, 98 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, 99 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X5>, 100 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, 101 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
103 MortonCopy<true, PixelFormat::ASTC_2D_10X8>, 102 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, 103 MortonCopy<true, PixelFormat::Z32F>,
105 MortonCopy<true, PixelFormat::Z32F>, 104 MortonCopy<true, PixelFormat::Z16>,
106 MortonCopy<true, PixelFormat::Z16>, 105 MortonCopy<true, PixelFormat::Z24S8>,
107 MortonCopy<true, PixelFormat::Z24S8>, 106 MortonCopy<true, PixelFormat::S8Z24>,
108 MortonCopy<true, PixelFormat::S8Z24>, 107 MortonCopy<true, PixelFormat::Z32FS8>,
109 MortonCopy<true, PixelFormat::Z32FS8>,
110 // clang-format on
111}; 108};
112 109
113static constexpr ConversionArray linear_to_morton_fns = { 110static constexpr ConversionArray linear_to_morton_fns = {
114 // clang-format off 111 MortonCopy<false, PixelFormat::ABGR8U>,
115 MortonCopy<false, PixelFormat::ABGR8U>, 112 MortonCopy<false, PixelFormat::ABGR8S>,
116 MortonCopy<false, PixelFormat::ABGR8S>, 113 MortonCopy<false, PixelFormat::ABGR8UI>,
117 MortonCopy<false, PixelFormat::ABGR8UI>, 114 MortonCopy<false, PixelFormat::B5G6R5U>,
118 MortonCopy<false, PixelFormat::B5G6R5U>, 115 MortonCopy<false, PixelFormat::A2B10G10R10U>,
119 MortonCopy<false, PixelFormat::A2B10G10R10U>, 116 MortonCopy<false, PixelFormat::A1B5G5R5U>,
120 MortonCopy<false, PixelFormat::A1B5G5R5U>, 117 MortonCopy<false, PixelFormat::R8U>,
121 MortonCopy<false, PixelFormat::R8U>, 118 MortonCopy<false, PixelFormat::R8UI>,
122 MortonCopy<false, PixelFormat::R8UI>, 119 MortonCopy<false, PixelFormat::RGBA16F>,
123 MortonCopy<false, PixelFormat::RGBA16F>, 120 MortonCopy<false, PixelFormat::RGBA16U>,
124 MortonCopy<false, PixelFormat::RGBA16U>, 121 MortonCopy<false, PixelFormat::RGBA16UI>,
125 MortonCopy<false, PixelFormat::RGBA16UI>, 122 MortonCopy<false, PixelFormat::R11FG11FB10F>,
126 MortonCopy<false, PixelFormat::R11FG11FB10F>, 123 MortonCopy<false, PixelFormat::RGBA32UI>,
127 MortonCopy<false, PixelFormat::RGBA32UI>, 124 MortonCopy<false, PixelFormat::DXT1>,
128 MortonCopy<false, PixelFormat::DXT1>, 125 MortonCopy<false, PixelFormat::DXT23>,
129 MortonCopy<false, PixelFormat::DXT23>, 126 MortonCopy<false, PixelFormat::DXT45>,
130 MortonCopy<false, PixelFormat::DXT45>, 127 MortonCopy<false, PixelFormat::DXN1>,
131 MortonCopy<false, PixelFormat::DXN1>, 128 MortonCopy<false, PixelFormat::DXN2UNORM>,
132 MortonCopy<false, PixelFormat::DXN2UNORM>, 129 MortonCopy<false, PixelFormat::DXN2SNORM>,
133 MortonCopy<false, PixelFormat::DXN2SNORM>, 130 MortonCopy<false, PixelFormat::BC7U>,
134 MortonCopy<false, PixelFormat::BC7U>, 131 MortonCopy<false, PixelFormat::BC6H_UF16>,
135 MortonCopy<false, PixelFormat::BC6H_UF16>, 132 MortonCopy<false, PixelFormat::BC6H_SF16>,
136 MortonCopy<false, PixelFormat::BC6H_SF16>, 133 // TODO(Subv): Swizzling ASTC formats are not supported
137 // TODO(Subv): Swizzling ASTC formats are not supported 134 nullptr,
138 nullptr, 135 MortonCopy<false, PixelFormat::BGRA8>,
139 MortonCopy<false, PixelFormat::BGRA8>, 136 MortonCopy<false, PixelFormat::RGBA32F>,
140 MortonCopy<false, PixelFormat::RGBA32F>, 137 MortonCopy<false, PixelFormat::RG32F>,
141 MortonCopy<false, PixelFormat::RG32F>, 138 MortonCopy<false, PixelFormat::R32F>,
142 MortonCopy<false, PixelFormat::R32F>, 139 MortonCopy<false, PixelFormat::R16F>,
143 MortonCopy<false, PixelFormat::R16F>, 140 MortonCopy<false, PixelFormat::R16U>,
144 MortonCopy<false, PixelFormat::R16U>, 141 MortonCopy<false, PixelFormat::R16S>,
145 MortonCopy<false, PixelFormat::R16S>, 142 MortonCopy<false, PixelFormat::R16UI>,
146 MortonCopy<false, PixelFormat::R16UI>, 143 MortonCopy<false, PixelFormat::R16I>,
147 MortonCopy<false, PixelFormat::R16I>, 144 MortonCopy<false, PixelFormat::RG16>,
148 MortonCopy<false, PixelFormat::RG16>, 145 MortonCopy<false, PixelFormat::RG16F>,
149 MortonCopy<false, PixelFormat::RG16F>, 146 MortonCopy<false, PixelFormat::RG16UI>,
150 MortonCopy<false, PixelFormat::RG16UI>, 147 MortonCopy<false, PixelFormat::RG16I>,
151 MortonCopy<false, PixelFormat::RG16I>, 148 MortonCopy<false, PixelFormat::RG16S>,
152 MortonCopy<false, PixelFormat::RG16S>, 149 MortonCopy<false, PixelFormat::RGB32F>,
153 MortonCopy<false, PixelFormat::RGB32F>, 150 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
154 MortonCopy<false, PixelFormat::RGBA8_SRGB>, 151 MortonCopy<false, PixelFormat::RG8U>,
155 MortonCopy<false, PixelFormat::RG8U>, 152 MortonCopy<false, PixelFormat::RG8S>,
156 MortonCopy<false, PixelFormat::RG8S>, 153 MortonCopy<false, PixelFormat::RG32UI>,
157 MortonCopy<false, PixelFormat::RG32UI>, 154 MortonCopy<false, PixelFormat::R32UI>,
158 MortonCopy<false, PixelFormat::R32UI>, 155 nullptr,
159 nullptr, 156 nullptr,
160 nullptr, 157 nullptr,
161 nullptr, 158 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
162 MortonCopy<false, PixelFormat::BGRA8_SRGB>, 159 MortonCopy<false, PixelFormat::DXT1_SRGB>,
163 MortonCopy<false, PixelFormat::DXT1_SRGB>, 160 MortonCopy<false, PixelFormat::DXT23_SRGB>,
164 MortonCopy<false, PixelFormat::DXT23_SRGB>, 161 MortonCopy<false, PixelFormat::DXT45_SRGB>,
165 MortonCopy<false, PixelFormat::DXT45_SRGB>, 162 MortonCopy<false, PixelFormat::BC7U_SRGB>,
166 MortonCopy<false, PixelFormat::BC7U_SRGB>, 163 nullptr,
167 nullptr, 164 nullptr,
168 nullptr, 165 nullptr,
169 nullptr, 166 nullptr,
170 nullptr, 167 nullptr,
171 nullptr, 168 nullptr,
172 nullptr, 169 nullptr,
173 nullptr, 170 nullptr,
174 nullptr, 171 MortonCopy<false, PixelFormat::Z32F>,
175 MortonCopy<false, PixelFormat::Z32F>, 172 MortonCopy<false, PixelFormat::Z16>,
176 MortonCopy<false, PixelFormat::Z16>, 173 MortonCopy<false, PixelFormat::Z24S8>,
177 MortonCopy<false, PixelFormat::Z24S8>, 174 MortonCopy<false, PixelFormat::S8Z24>,
178 MortonCopy<false, PixelFormat::S8Z24>, 175 MortonCopy<false, PixelFormat::Z32FS8>,
179 MortonCopy<false, PixelFormat::Z32FS8>,
180 // clang-format on
181}; 176};
182 177
183static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { 178static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
191 return morton_to_linear_fns[static_cast<std::size_t>(format)]; 186 return morton_to_linear_fns[static_cast<std::size_t>(format)];
192} 187}
193 188
194/// 8x8 Z-Order coordinate from 2D coordinates
195static u32 MortonInterleave(u32 x, u32 y) {
196 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
197 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
198 return xlut[x % 8] + ylut[y % 8];
199}
200
201/// Calculates the offset of the position of the pixel in Morton order
202static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
203 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
204 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
205 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
206 // texels are laid out in a 2x2 subtile like this:
207 // 2 3
208 // 0 1
209 //
210 // The full 8x8 tile has the texels arranged like this:
211 //
212 // 42 43 46 47 58 59 62 63
213 // 40 41 44 45 56 57 60 61
214 // 34 35 38 39 50 51 54 55
215 // 32 33 36 37 48 49 52 53
216 // 10 11 14 15 26 27 30 31
217 // 08 09 12 13 24 25 28 29
218 // 02 03 06 07 18 19 22 23
219 // 00 01 04 05 16 17 20 21
220 //
221 // This pattern is what's called Z-order curve, or Morton order.
222
223 const unsigned int block_height = 8;
224 const unsigned int coarse_x = x & ~7;
225
226 u32 i = MortonInterleave(x, y);
227
228 const unsigned int offset = coarse_x * block_height;
229
230 return (i + offset) * bytes_per_pixel;
231}
232
233static u32 MortonInterleave128(u32 x, u32 y) { 189static u32 MortonInterleave128(u32 x, u32 y) {
234 // 128x128 Z-Order coordinate from 2D coordinates 190 // 128x128 Z-Order coordinate from 2D coordinates
235 static constexpr u32 xlut[] = { 191 static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
325 281
326void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 282void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
327 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 283 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
328 u8* buffer, std::size_t buffer_size, VAddr addr) { 284 u8* buffer, u8* addr) {
329
330 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, 285 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
331 tile_width_spacing, buffer, buffer_size, addr); 286 tile_width_spacing, buffer, addr);
332} 287}
333 288
334void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 289void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
335 u8* morton_data, u8* linear_data, bool morton_to_linear) { 290 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
291 const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
336 u8* data_ptrs[2]; 292 u8* data_ptrs[2];
337 for (u32 y = 0; y < height; ++y) { 293 for (u32 y = 0; y < height; ++y) {
338 for (u32 x = 0; x < width; ++x) { 294 for (u32 x = 0; x < width; ++x) {
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index 065f59ce3..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13 13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, 14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, std::size_t buffer_size, VAddr addr); 16 u8* buffer, u8* addr);
17 17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear); 19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
20 20
21} // namespace VideoCore 21} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..ecd9986a0 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <mutex>
7#include <set> 8#include <set>
8#include <unordered_map> 9#include <unordered_map>
9 10
@@ -12,14 +13,26 @@
12 13
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "core/settings.h" 15#include "core/settings.h"
16#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16 18
17class RasterizerCacheObject { 19class RasterizerCacheObject {
18public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr)
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23
19 virtual ~RasterizerCacheObject(); 24 virtual ~RasterizerCacheObject();
20 25
26 CacheAddr GetCacheAddr() const {
27 return cache_addr;
28 }
29
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
21 /// Gets the address of the shader in guest memory, required for cache management 34 /// Gets the address of the shader in guest memory, required for cache management
22 virtual VAddr GetAddr() const = 0; 35 virtual VAddr GetCpuAddr() const = 0;
23 36
24 /// Gets the size of the shader in guest memory, required for cache management 37 /// Gets the size of the shader in guest memory, required for cache management
25 virtual std::size_t GetSizeInBytes() const = 0; 38 virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
58 bool is_registered{}; ///< Whether the object is currently registered with the cache 71 bool is_registered{}; ///< Whether the object is currently registered with the cache
59 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 72 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
60 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 73 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
74 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
75 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
61}; 76};
62 77
63template <class T> 78template <class T>
@@ -68,7 +83,9 @@ public:
68 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 83 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
69 84
70 /// Write any cached resources overlapping the specified region back to memory 85 /// Write any cached resources overlapping the specified region back to memory
71 void FlushRegion(Tegra::GPUVAddr addr, size_t size) { 86 void FlushRegion(CacheAddr addr, std::size_t size) {
87 std::lock_guard<std::recursive_mutex> lock{mutex};
88
72 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 89 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
73 for (auto& object : objects) { 90 for (auto& object : objects) {
74 FlushObject(object); 91 FlushObject(object);
@@ -76,7 +93,9 @@ public:
76 } 93 }
77 94
78 /// Mark the specified region as being invalidated 95 /// Mark the specified region as being invalidated
79 void InvalidateRegion(VAddr addr, u64 size) { 96 void InvalidateRegion(CacheAddr addr, u64 size) {
97 std::lock_guard<std::recursive_mutex> lock{mutex};
98
80 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 99 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
81 for (auto& object : objects) { 100 for (auto& object : objects) {
82 if (!object->IsRegistered()) { 101 if (!object->IsRegistered()) {
@@ -89,49 +108,70 @@ public:
89 108
90 /// Invalidates everything in the cache 109 /// Invalidates everything in the cache
91 void InvalidateAll() { 110 void InvalidateAll() {
111 std::lock_guard<std::recursive_mutex> lock{mutex};
112
92 while (interval_cache.begin() != interval_cache.end()) { 113 while (interval_cache.begin() != interval_cache.end()) {
93 Unregister(*interval_cache.begin()->second.begin()); 114 Unregister(*interval_cache.begin()->second.begin());
94 } 115 }
95 } 116 }
96 117
97protected: 118protected:
98 /// Tries to get an object from the cache with the specified address 119 /// Tries to get an object from the cache with the specified cache address
99 T TryGet(VAddr addr) const { 120 T TryGet(CacheAddr addr) const {
100 const auto iter = map_cache.find(addr); 121 const auto iter = map_cache.find(addr);
101 if (iter != map_cache.end()) 122 if (iter != map_cache.end())
102 return iter->second; 123 return iter->second;
103 return nullptr; 124 return nullptr;
104 } 125 }
105 126
127 T TryGet(const void* addr) const {
128 const auto iter = map_cache.find(ToCacheAddr(addr));
129 if (iter != map_cache.end())
130 return iter->second;
131 return nullptr;
132 }
133
106 /// Register an object into the cache 134 /// Register an object into the cache
107 void Register(const T& object) { 135 void Register(const T& object) {
136 std::lock_guard<std::recursive_mutex> lock{mutex};
137
108 object->SetIsRegistered(true); 138 object->SetIsRegistered(true);
109 interval_cache.add({GetInterval(object), ObjectSet{object}}); 139 interval_cache.add({GetInterval(object), ObjectSet{object}});
110 map_cache.insert({object->GetAddr(), object}); 140 map_cache.insert({object->GetCacheAddr(), object});
111 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); 141 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
112 } 142 }
113 143
114 /// Unregisters an object from the cache 144 /// Unregisters an object from the cache
115 void Unregister(const T& object) { 145 void Unregister(const T& object) {
116 object->SetIsRegistered(false); 146 std::lock_guard<std::recursive_mutex> lock{mutex};
117 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
118 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
119 if (Settings::values.use_accurate_gpu_emulation) {
120 FlushObject(object);
121 }
122 147
148 object->SetIsRegistered(false);
149 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
123 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 150 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
124 map_cache.erase(object->GetAddr()); 151 map_cache.erase(object->GetCacheAddr());
125 } 152 }
126 153
127 /// Returns a ticks counter used for tracking when cached objects were last modified 154 /// Returns a ticks counter used for tracking when cached objects were last modified
128 u64 GetModifiedTicks() { 155 u64 GetModifiedTicks() {
156 std::lock_guard<std::recursive_mutex> lock{mutex};
157
129 return ++modified_ticks; 158 return ++modified_ticks;
130 } 159 }
131 160
161 /// Flushes the specified object, updating appropriate cache state as needed
162 void FlushObject(const T& object) {
163 std::lock_guard<std::recursive_mutex> lock{mutex};
164
165 if (!object->IsDirty()) {
166 return;
167 }
168 object->Flush();
169 object->MarkAsModified(false, *this);
170 }
171
132private: 172private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 173 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 174 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
135 if (size == 0) { 175 if (size == 0) {
136 return {}; 176 return {};
137 } 177 }
@@ -154,27 +194,19 @@ private:
154 return objects; 194 return objects;
155 } 195 }
156 196
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 197 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 198 using ObjectCache = std::unordered_map<CacheAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 199 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
169 using ObjectInterval = typename IntervalCache::interval_type; 200 using ObjectInterval = typename IntervalCache::interval_type;
170 201
171 static auto GetInterval(const T& object) { 202 static auto GetInterval(const T& object) {
172 return ObjectInterval::right_open(object->GetAddr(), 203 return ObjectInterval::right_open(object->GetCacheAddr(),
173 object->GetAddr() + object->GetSizeInBytes()); 204 object->GetCacheAddr() + object->GetSizeInBytes());
174 } 205 }
175 206
176 ObjectCache map_cache; 207 ObjectCache map_cache;
177 IntervalCache interval_cache; ///< Cache of objects 208 IntervalCache interval_cache; ///< Cache of objects
178 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing 209 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
179 VideoCore::RasterizerInterface& rasterizer; 210 VideoCore::RasterizerInterface& rasterizer;
211 std::recursive_mutex mutex;
180}; 212};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 4c08bb148..76e292e87 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <functional> 8#include <functional>
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
@@ -34,18 +35,20 @@ public:
34 virtual void FlushAll() = 0; 35 virtual void FlushAll() = 0;
35 36
36 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 37 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
37 virtual void FlushRegion(VAddr addr, u64 size) = 0; 38 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
38 39
39 /// Notify rasterizer that any caches of the specified region should be invalidated 40 /// Notify rasterizer that any caches of the specified region should be invalidated
40 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 41 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
41 42
42 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 43 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
43 /// and invalidated 44 /// and invalidated
44 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 45 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
45 46
46 /// Attempt to use a faster method to perform a surface copy 47 /// Attempt to use a faster method to perform a surface copy
47 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
48 const Tegra::Engines::Fermi2D::Regs::Surface& dst) { 49 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
50 const Common::Rectangle<u32>& src_rect,
51 const Common::Rectangle<u32>& dst_rect) {
49 return false; 52 return false;
50 } 53 }
51 54
@@ -60,6 +63,10 @@ public:
60 } 63 }
61 64
62 /// Increase/decrease the number of object in pages touching the specified region 65 /// Increase/decrease the number of object in pages touching the specified region
63 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} 66 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
67
68 /// Initialize disk cached resources for the game being emulated
69 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
70 const DiskResourceLoadCallback& callback = {}) {}
64}; 71};
65} // namespace VideoCore 72} // namespace VideoCore
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/emu_window.h" 6#include "core/frontend/emu_window.h"
6#include "core/settings.h" 7#include "core/settings.h"
7#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index bd2b30e77..5048ed6ce 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -13,23 +13,28 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
17 std::size_t alignment, u8* host_ptr)
18 : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
19 host_ptr} {}
20
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
17 : RasterizerCache{rasterizer}, stream_buffer(size, true) {} 22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
18 23
19GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, 24GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
20 std::size_t alignment, bool cache) { 25 std::size_t alignment, bool cache) {
21 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 26 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
22 const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
23 27
24 // Cache management is a big overhead, so only cache entries with a given size. 28 // Cache management is a big overhead, so only cache entries with a given size.
25 // TODO: Figure out which size is the best for given games. 29 // TODO: Figure out which size is the best for given games.
26 cache &= size >= 2048; 30 cache &= size >= 2048;
27 31
32 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
28 if (cache) { 33 if (cache) {
29 auto entry = TryGet(*cpu_addr); 34 auto entry = TryGet(host_ptr);
30 if (entry) { 35 if (entry) {
31 if (entry->size >= size && entry->alignment == alignment) { 36 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
32 return entry->offset; 37 return entry->GetOffset();
33 } 38 }
34 Unregister(entry); 39 Unregister(entry);
35 } 40 }
@@ -38,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
38 AlignBuffer(alignment); 43 AlignBuffer(alignment);
39 const GLintptr uploaded_offset = buffer_offset; 44 const GLintptr uploaded_offset = buffer_offset;
40 45
41 Memory::ReadBlock(*cpu_addr, buffer_ptr, size); 46 if (!host_ptr) {
47 return uploaded_offset;
48 }
42 49
50 std::memcpy(buffer_ptr, host_ptr, size);
43 buffer_ptr += size; 51 buffer_ptr += size;
44 buffer_offset += size; 52 buffer_offset += size;
45 53
46 if (cache) { 54 if (cache) {
47 auto entry = std::make_shared<CachedBufferEntry>(); 55 auto entry = std::make_shared<CachedBufferEntry>(
48 entry->offset = uploaded_offset; 56 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
49 entry->size = size;
50 entry->alignment = alignment;
51 entry->addr = *cpu_addr;
52 Register(entry); 57 Register(entry);
53 } 58 }
54 59
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..1de1f84ae 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
17 17
18class RasterizerOpenGL; 18class RasterizerOpenGL;
19 19
20struct CachedBufferEntry final : public RasterizerCacheObject { 20class CachedBufferEntry final : public RasterizerCacheObject {
21 VAddr GetAddr() const override { 21public:
22 return addr; 22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
23 } 27 }
24 28
25 std::size_t GetSizeInBytes() const override { 29 std::size_t GetSizeInBytes() const override {
26 return size; 30 return size;
27 } 31 }
28 32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
29 // We do not have to flush this cache as things in it are never modified by us. 45 // We do not have to flush this cache as things in it are never modified by us.
30 void Flush() override {} 46 void Flush() override {}
31 47
32 VAddr addr; 48private:
33 std::size_t size; 49 VAddr cpu_addr{};
34 GLintptr offset; 50 std::size_t size{};
35 std::size_t alignment; 51 GLintptr offset{};
52 std::size_t alignment{};
36}; 53};
37 54
38class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 55class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index c7f32feaa..c8dbcacbd 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -7,7 +7,6 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/memory.h"
11#include "video_core/renderer_opengl/gl_global_cache.h" 10#include "video_core/renderer_opengl/gl_global_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h" 12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +14,13 @@
15 14
16namespace OpenGL { 15namespace OpenGL {
17 16
18CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { 17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
18 : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
19 buffer.Create(); 19 buffer.Create();
20 // Bind and unbind the buffer so it gets allocated by the driver 20 // Bind and unbind the buffer so it gets allocated by the driver
21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
23 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 23 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
24} 24}
25 25
26void CachedGlobalRegion::Reload(u32 size_) { 26void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +35,10 @@ void CachedGlobalRegion::Reload(u32 size_) {
35 35
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer 36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); 38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
39} 39}
40 40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { 41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)}; 42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) { 43 if (search == reserve.end()) {
44 return {}; 44 return {};
@@ -46,19 +46,22 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
46 return search->second; 46 return search->second;
47} 47}
48 48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { 49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
50 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; 50 u8* host_ptr) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
51 if (!region) { 52 if (!region) {
52 // No reserved surface available, create a new one and reserve it 53 // No reserved surface available, create a new one and reserve it
53 region = std::make_shared<CachedGlobalRegion>(addr, size); 54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
56 region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
54 ReserveGlobalRegion(region); 57 ReserveGlobalRegion(region);
55 } 58 }
56 region->Reload(size); 59 region->Reload(size);
57 return region; 60 return region;
58} 61}
59 62
60void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { 63void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
61 reserve[region->GetAddr()] = region; 64 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
62} 65}
63 66
64GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 67GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
69 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { 72 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
70 73
71 auto& gpu{Core::System::GetInstance().GPU()}; 74 auto& gpu{Core::System::GetInstance().GPU()};
72 const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; 75 auto& memory_manager{gpu.MemoryManager()};
73 const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( 76 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
74 cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); 77 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
75 ASSERT(cbuf_addr); 78 global_region.GetCbufOffset()};
76 79 const auto actual_addr{memory_manager.Read64(addr)};
77 const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); 80 const auto size{memory_manager.Read32(addr + 8)};
78 const auto size = Memory::Read32(*cbuf_addr + 8);
79 const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
80 ASSERT(actual_addr);
81 81
82 // Look up global region in the cache based on address 82 // Look up global region in the cache based on address
83 GlobalRegion region = TryGet(*actual_addr); 83 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
84 GlobalRegion region{TryGet(host_ptr)};
84 85
85 if (!region) { 86 if (!region) {
86 // No global region found - create a new one 87 // No global region found - create a new one
87 region = GetUncachedGlobalRegion(*actual_addr, size); 88 region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
88 Register(region); 89 Register(region);
89 } 90 }
90 91
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 37830bb7c..a840491f7 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27 27
28class CachedGlobalRegion final : public RasterizerCacheObject { 28class CachedGlobalRegion final : public RasterizerCacheObject {
29public: 29public:
30 explicit CachedGlobalRegion(VAddr addr, u32 size); 30 explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
31 31
32 /// Gets the address of the shader in guest memory, required for cache management 32 VAddr GetCpuAddr() const override {
33 VAddr GetAddr() const { 33 return cpu_addr;
34 return addr;
35 } 34 }
36 35
37 /// Gets the size of the shader in guest memory, required for cache management 36 std::size_t GetSizeInBytes() const override {
38 std::size_t GetSizeInBytes() const {
39 return size; 37 return size;
40 } 38 }
41 39
@@ -53,9 +51,8 @@ public:
53 } 51 }
54 52
55private: 53private:
56 VAddr addr{}; 54 VAddr cpu_addr{};
57 u32 size{}; 55 u32 size{};
58
59 OGLBuffer buffer; 56 OGLBuffer buffer;
60}; 57};
61 58
@@ -68,11 +65,11 @@ public:
68 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); 65 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
69 66
70private: 67private:
71 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; 68 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
72 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); 69 GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
73 void ReserveGlobalRegion(const GlobalRegion& region); 70 void ReserveGlobalRegion(GlobalRegion region);
74 71
75 std::unordered_map<VAddr, GlobalRegion> reserve; 72 std::unordered_map<CacheAddr, GlobalRegion> reserve;
76}; 73};
77 74
78} // namespace OpenGL 75} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index d9ed08437..75d816795 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,8 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); 46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
47 47
48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
49 const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 49 const u8* source{memory_manager.GetPointer(gpu_addr)};
50 const u8* source{Memory::GetPointer(*cpu_addr)};
51 50
52 for (u32 primitive = 0; primitive < count / 4; ++primitive) { 51 for (u32 primitive = 0; primitive < count / 4; ++primitive) {
53 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { 52 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -62,4 +61,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
62 return index_offset; 61 return index_offset;
63} 62}
64 63
65} // namespace OpenGL \ No newline at end of file 64} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9f7c837d6..198c54872 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -22,6 +22,7 @@
22#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 26#include "video_core/renderer_opengl/gl_shader_gen.h"
26#include "video_core/renderer_opengl/maxwell_to_gl.h" 27#include "video_core/renderer_opengl/maxwell_to_gl.h"
27#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -99,9 +100,11 @@ struct FramebufferCacheKey {
99 } 100 }
100}; 101};
101 102
102RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) 103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
103 : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info}, 104 ScreenInfo& info)
104 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 105 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
106 emu_window{window}, system{system}, screen_info{info},
107 buffer_cache(*this, STREAM_BUFFER_SIZE) {
105 // Create sampler objects 108 // Create sampler objects
106 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 109 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
107 texture_samplers[i].Create(); 110 texture_samplers[i].Create();
@@ -116,7 +119,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
116 119
117 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 120 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
118 121
119 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 122 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
120 CheckExtensions(); 123 CheckExtensions();
121} 124}
122 125
@@ -136,7 +139,7 @@ void RasterizerOpenGL::CheckExtensions() {
136} 139}
137 140
138GLuint RasterizerOpenGL::SetupVertexFormat() { 141GLuint RasterizerOpenGL::SetupVertexFormat() {
139 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 142 auto& gpu = system.GPU().Maxwell3D();
140 const auto& regs = gpu.regs; 143 const auto& regs = gpu.regs;
141 144
142 if (!gpu.dirty_flags.vertex_attrib_format) { 145 if (!gpu.dirty_flags.vertex_attrib_format) {
@@ -175,7 +178,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
175 continue; 178 continue;
176 179
177 const auto& buffer = regs.vertex_array[attrib.buffer]; 180 const auto& buffer = regs.vertex_array[attrib.buffer];
178 LOG_TRACE(HW_GPU, 181 LOG_TRACE(Render_OpenGL,
179 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 182 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
180 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 183 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
181 attrib.offset.Value(), attrib.IsNormalized()); 184 attrib.offset.Value(), attrib.IsNormalized());
@@ -198,24 +201,24 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
198 } 201 }
199 202
200 // Rebinding the VAO invalidates the vertex buffer bindings. 203 // Rebinding the VAO invalidates the vertex buffer bindings.
201 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 204 gpu.dirty_flags.vertex_array.set();
202 205
203 state.draw.vertex_array = vao_entry.handle; 206 state.draw.vertex_array = vao_entry.handle;
204 return vao_entry.handle; 207 return vao_entry.handle;
205} 208}
206 209
207void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 210void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
208 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 211 auto& gpu = system.GPU().Maxwell3D();
209 const auto& regs = gpu.regs; 212 const auto& regs = gpu.regs;
210 213
211 if (!gpu.dirty_flags.vertex_array) 214 if (gpu.dirty_flags.vertex_array.none())
212 return; 215 return;
213 216
214 MICROPROFILE_SCOPE(OpenGL_VB); 217 MICROPROFILE_SCOPE(OpenGL_VB);
215 218
216 // Upload all guest vertex arrays sequentially to our buffer 219 // Upload all guest vertex arrays sequentially to our buffer
217 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 220 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
218 if (~gpu.dirty_flags.vertex_array & (1u << index)) 221 if (!gpu.dirty_flags.vertex_array[index])
219 continue; 222 continue;
220 223
221 const auto& vertex_array = regs.vertex_array[index]; 224 const auto& vertex_array = regs.vertex_array[index];
@@ -242,11 +245,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
242 } 245 }
243 } 246 }
244 247
245 gpu.dirty_flags.vertex_array = 0; 248 gpu.dirty_flags.vertex_array.reset();
246} 249}
247 250
248DrawParameters RasterizerOpenGL::SetupDraw() { 251DrawParameters RasterizerOpenGL::SetupDraw() {
249 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 252 const auto& gpu = system.GPU().Maxwell3D();
250 const auto& regs = gpu.regs; 253 const auto& regs = gpu.regs;
251 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 254 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
252 255
@@ -295,7 +298,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
295 298
296void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 299void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
297 MICROPROFILE_SCOPE(OpenGL_Shader); 300 MICROPROFILE_SCOPE(OpenGL_Shader);
298 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 301 auto& gpu = system.GPU().Maxwell3D();
299 302
300 BaseBindings base_bindings; 303 BaseBindings base_bindings;
301 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 304 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -341,9 +344,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
341 shader_program_manager->UseProgrammableFragmentShader(program_handle); 344 shader_program_manager->UseProgrammableFragmentShader(program_handle);
342 break; 345 break;
343 default: 346 default:
344 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 347 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
345 shader_config.enable.Value(), shader_config.offset); 348 shader_config.enable.Value(), shader_config.offset);
346 UNREACHABLE();
347 } 349 }
348 350
349 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -412,7 +414,7 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
412} 414}
413 415
414std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 416std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
415 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 417 const auto& regs = system.GPU().Maxwell3D().regs;
416 418
417 std::size_t size = 0; 419 std::size_t size = 0;
418 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 420 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -430,7 +432,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
430} 432}
431 433
432std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { 434std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
433 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 435 const auto& regs = system.GPU().Maxwell3D().regs;
434 436
435 return static_cast<std::size_t>(regs.index_array.count) * 437 return static_cast<std::size_t>(regs.index_array.count) *
436 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); 438 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
@@ -477,17 +479,22 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
477 cached_pages.add({pages_interval, delta}); 479 cached_pages.add({pages_interval, delta});
478} 480}
479 481
482void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
483 const VideoCore::DiskResourceLoadCallback& callback) {
484 shader_cache.LoadDiskCache(stop_loading, callback);
485}
486
480std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( 487std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
481 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
482 std::optional<std::size_t> single_color_target) { 489 std::optional<std::size_t> single_color_target) {
483 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 490 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
484 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 491 auto& gpu = system.GPU().Maxwell3D();
485 const auto& regs = gpu.regs; 492 const auto& regs = gpu.regs;
486 493
487 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
488 single_color_target}; 495 single_color_target};
489 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 496 if (fb_config_state == current_framebuffer_config_state &&
490 !gpu.dirty_flags.zeta_buffer) { 497 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
491 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
492 // single color targets). This is done because the guest registers may not change but the 499 // single color targets). This is done because the guest registers may not change but the
493 // host framebuffer may contain different attachments 500 // host framebuffer may contain different attachments
@@ -575,7 +582,7 @@ void RasterizerOpenGL::Clear() {
575 const auto prev_state{state}; 582 const auto prev_state{state};
576 SCOPE_EXIT({ prev_state.Apply(); }); 583 SCOPE_EXIT({ prev_state.Apply(); });
577 584
578 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 585 const auto& regs = system.GPU().Maxwell3D().regs;
579 bool use_color{}; 586 bool use_color{};
580 bool use_depth{}; 587 bool use_depth{};
581 bool use_stencil{}; 588 bool use_stencil{};
@@ -666,7 +673,7 @@ void RasterizerOpenGL::DrawArrays() {
666 return; 673 return;
667 674
668 MICROPROFILE_SCOPE(OpenGL_Drawing); 675 MICROPROFILE_SCOPE(OpenGL_Drawing);
669 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 676 auto& gpu = system.GPU().Maxwell3D();
670 const auto& regs = gpu.regs; 677 const auto& regs = gpu.regs;
671 678
672 ConfigureFramebuffers(state); 679 ConfigureFramebuffers(state);
@@ -714,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
714 // Add space for at least 18 constant buffers 721 // Add space for at least 18 constant buffers
715 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
716 723
717 bool invalidate = buffer_cache.Map(buffer_size); 724 const bool invalidate = buffer_cache.Map(buffer_size);
718 if (invalidate) { 725 if (invalidate) {
719 // As all cached buffers are invalidated, we need to recheck their state. 726 // As all cached buffers are invalidated, we need to recheck their state.
720 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 727 gpu.dirty_flags.vertex_array.set();
721 } 728 }
722 729
723 const GLuint vao = SetupVertexFormat(); 730 const GLuint vao = SetupVertexFormat();
@@ -731,55 +738,45 @@ void RasterizerOpenGL::DrawArrays() {
731 shader_program_manager->ApplyTo(state); 738 shader_program_manager->ApplyTo(state);
732 state.Apply(); 739 state.Apply();
733 740
734 // Execute draw call 741 res_cache.SignalPreDrawCall();
735 params.DispatchDraw(); 742 params.DispatchDraw();
736 743 res_cache.SignalPostDrawCall();
737 // Disable scissor test
738 state.viewports[0].scissor.enabled = false;
739 744
740 accelerate_draw = AccelDraw::Disabled; 745 accelerate_draw = AccelDraw::Disabled;
741
742 // Unbind textures for potential future use as framebuffer attachments
743 for (auto& texture_unit : state.texture_units) {
744 texture_unit.Unbind();
745 }
746 state.Apply();
747} 746}
748 747
749void RasterizerOpenGL::FlushAll() {} 748void RasterizerOpenGL::FlushAll() {}
750 749
751void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 750void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
752 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 751 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
753 752 if (!addr || !size) {
754 if (Settings::values.use_accurate_gpu_emulation) { 753 return;
755 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
756 res_cache.FlushRegion(addr, size);
757 } 754 }
755 res_cache.FlushRegion(addr, size);
758} 756}
759 757
760void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 758void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
761 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 759 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760 if (!addr || !size) {
761 return;
762 }
762 res_cache.InvalidateRegion(addr, size); 763 res_cache.InvalidateRegion(addr, size);
763 shader_cache.InvalidateRegion(addr, size); 764 shader_cache.InvalidateRegion(addr, size);
764 global_cache.InvalidateRegion(addr, size); 765 global_cache.InvalidateRegion(addr, size);
765 buffer_cache.InvalidateRegion(addr, size); 766 buffer_cache.InvalidateRegion(addr, size);
766} 767}
767 768
768void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 769void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
769 FlushRegion(addr, size); 770 FlushRegion(addr, size);
770 InvalidateRegion(addr, size); 771 InvalidateRegion(addr, size);
771} 772}
772 773
773bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 774bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
774 const Tegra::Engines::Fermi2D::Regs::Surface& dst) { 775 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
776 const Common::Rectangle<u32>& src_rect,
777 const Common::Rectangle<u32>& dst_rect) {
775 MICROPROFILE_SCOPE(OpenGL_Blits); 778 MICROPROFILE_SCOPE(OpenGL_Blits);
776 779 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
777 if (Settings::values.use_accurate_gpu_emulation) {
778 // Skip the accelerated copy and perform a slow but more accurate copy
779 return false;
780 }
781
782 res_cache.FermiCopySurface(src, dst);
783 return true; 780 return true;
784} 781}
785 782
@@ -791,7 +788,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
791 788
792 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 789 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
793 790
794 const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; 791 const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
795 if (!surface) { 792 if (!surface) {
796 return {}; 793 return {};
797 } 794 }
@@ -802,7 +799,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
802 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 799 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
803 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 800 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
804 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 801 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
805 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 802
803 if (params.pixel_format != pixel_format) {
804 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
805 }
806 806
807 screen_info.display_texture = surface->Texture().handle; 807 screen_info.display_texture = surface->Texture().handle;
808 808
@@ -811,104 +811,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
811 811
812void RasterizerOpenGL::SamplerInfo::Create() { 812void RasterizerOpenGL::SamplerInfo::Create() {
813 sampler.Create(); 813 sampler.Create();
814 mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; 814 mag_filter = Tegra::Texture::TextureFilter::Linear;
815 wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; 815 min_filter = Tegra::Texture::TextureFilter::Linear;
816 uses_depth_compare = false; 816 wrap_u = Tegra::Texture::WrapMode::Wrap;
817 wrap_v = Tegra::Texture::WrapMode::Wrap;
818 wrap_p = Tegra::Texture::WrapMode::Wrap;
819 use_depth_compare = false;
817 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; 820 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
818 821
819 // default is GL_LINEAR_MIPMAP_LINEAR 822 // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
820 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 823 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
821 // Other attributes have correct defaults
822 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); 824 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
825
826 // Other attributes have correct defaults
823} 827}
824 828
825void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { 829void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
826 const GLuint s = sampler.handle; 830 const GLuint sampler_id = sampler.handle;
827 if (mag_filter != config.mag_filter) { 831 if (mag_filter != config.mag_filter) {
828 mag_filter = config.mag_filter; 832 mag_filter = config.mag_filter;
829 glSamplerParameteri( 833 glSamplerParameteri(
830 s, GL_TEXTURE_MAG_FILTER, 834 sampler_id, GL_TEXTURE_MAG_FILTER,
831 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); 835 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
832 } 836 }
833 if (min_filter != config.min_filter || mip_filter != config.mip_filter) { 837 if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
834 min_filter = config.min_filter; 838 min_filter = config.min_filter;
835 mip_filter = config.mip_filter; 839 mipmap_filter = config.mipmap_filter;
836 glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, 840 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
837 MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); 841 MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
838 } 842 }
839 843
840 if (wrap_u != config.wrap_u) { 844 if (wrap_u != config.wrap_u) {
841 wrap_u = config.wrap_u; 845 wrap_u = config.wrap_u;
842 glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); 846 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
843 } 847 }
844 if (wrap_v != config.wrap_v) { 848 if (wrap_v != config.wrap_v) {
845 wrap_v = config.wrap_v; 849 wrap_v = config.wrap_v;
846 glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); 850 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
847 } 851 }
848 if (wrap_p != config.wrap_p) { 852 if (wrap_p != config.wrap_p) {
849 wrap_p = config.wrap_p; 853 wrap_p = config.wrap_p;
850 glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); 854 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
851 } 855 }
852 856
853 if (uses_depth_compare != (config.depth_compare_enabled == 1)) { 857 if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
854 uses_depth_compare = (config.depth_compare_enabled == 1); 858 use_depth_compare = enabled;
855 if (uses_depth_compare) { 859 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
856 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); 860 use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
857 } else {
858 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
859 }
860 } 861 }
861 862
862 if (depth_compare_func != config.depth_compare_func) { 863 if (depth_compare_func != config.depth_compare_func) {
863 depth_compare_func = config.depth_compare_func; 864 depth_compare_func = config.depth_compare_func;
864 glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, 865 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
865 MaxwellToGL::DepthCompareFunc(depth_compare_func)); 866 MaxwellToGL::DepthCompareFunc(depth_compare_func));
866 } 867 }
867 868
868 GLvec4 new_border_color; 869 if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
869 if (config.srgb_conversion) {
870 new_border_color[0] = config.srgb_border_color_r / 255.0f;
871 new_border_color[1] = config.srgb_border_color_g / 255.0f;
872 new_border_color[2] = config.srgb_border_color_g / 255.0f;
873 } else {
874 new_border_color[0] = config.border_color_r;
875 new_border_color[1] = config.border_color_g;
876 new_border_color[2] = config.border_color_b;
877 }
878 new_border_color[3] = config.border_color_a;
879
880 if (border_color != new_border_color) {
881 border_color = new_border_color; 870 border_color = new_border_color;
882 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); 871 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
883 } 872 }
884 873
885 const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); 874 if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
886 if (anisotropic_max != max_anisotropic) { 875 max_anisotropic = anisotropic;
887 max_anisotropic = anisotropic_max;
888 if (GLAD_GL_ARB_texture_filter_anisotropic) { 876 if (GLAD_GL_ARB_texture_filter_anisotropic) {
889 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); 877 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
890 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 878 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
891 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); 879 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
892 } 880 }
893 } 881 }
894 const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
895 if (lod_min != min_lod) {
896 min_lod = lod_min;
897 glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
898 }
899 882
900 const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; 883 if (const float min = config.GetMinLod(); min_lod != min) {
901 if (lod_max != max_lod) { 884 min_lod = min;
902 max_lod = lod_max; 885 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
903 glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod); 886 }
887 if (const float max = config.GetMaxLod(); max_lod != max) {
888 max_lod = max;
889 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
904 } 890 }
905 const u32 bias = config.mip_lod_bias.Value(); 891
906 // Sign extend the 13-bit value. 892 if (const float bias = config.GetLodBias(); lod_bias != bias) {
907 constexpr u32 mask = 1U << (13 - 1); 893 lod_bias = bias;
908 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; 894 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
909 if (lod_bias != bias_lod) {
910 lod_bias = bias_lod;
911 glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias);
912 } 895 }
913} 896}
914 897
@@ -916,7 +899,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
916 const Shader& shader, GLuint program_handle, 899 const Shader& shader, GLuint program_handle,
917 BaseBindings base_bindings) { 900 BaseBindings base_bindings) {
918 MICROPROFILE_SCOPE(OpenGL_UBO); 901 MICROPROFILE_SCOPE(OpenGL_UBO);
919 const auto& gpu = Core::System::GetInstance().GPU(); 902 const auto& gpu = system.GPU();
920 const auto& maxwell3d = gpu.Maxwell3D(); 903 const auto& maxwell3d = gpu.Maxwell3D();
921 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; 904 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
922 const auto& entries = shader->GetShaderEntries().const_buffers; 905 const auto& entries = shader->GetShaderEntries().const_buffers;
@@ -948,8 +931,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
948 size = buffer.size; 931 size = buffer.size;
949 932
950 if (size > MaxConstbufferSize) { 933 if (size > MaxConstbufferSize) {
951 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 934 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
952 MaxConstbufferSize); 935 MaxConstbufferSize);
953 size = MaxConstbufferSize; 936 size = MaxConstbufferSize;
954 } 937 }
955 } else { 938 } else {
@@ -995,7 +978,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
995void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 978void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
996 GLuint program_handle, BaseBindings base_bindings) { 979 GLuint program_handle, BaseBindings base_bindings) {
997 MICROPROFILE_SCOPE(OpenGL_Texture); 980 MICROPROFILE_SCOPE(OpenGL_Texture);
998 const auto& gpu = Core::System::GetInstance().GPU(); 981 const auto& gpu = system.GPU();
999 const auto& maxwell3d = gpu.Maxwell3D(); 982 const auto& maxwell3d = gpu.Maxwell3D();
1000 const auto& entries = shader->GetShaderEntries().samplers; 983 const auto& entries = shader->GetShaderEntries().samplers;
1001 984
@@ -1004,35 +987,25 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1004 987
1005 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 988 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
1006 const auto& entry = entries[bindpoint]; 989 const auto& entry = entries[bindpoint];
990 const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
1007 const u32 current_bindpoint = base_bindings.sampler + bindpoint; 991 const u32 current_bindpoint = base_bindings.sampler + bindpoint;
1008 auto& unit = state.texture_units[current_bindpoint];
1009
1010 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
1011 if (!texture.enabled) {
1012 unit.texture = 0;
1013 continue;
1014 }
1015 992
1016 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 993 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1017 994
1018 Surface surface = res_cache.GetTextureSurface(texture, entry); 995 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1019 if (surface != nullptr) { 996 state.texture_units[current_bindpoint].texture =
1020 unit.texture = 997 surface->Texture(entry.IsArray()).handle;
1021 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 998 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1022 unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); 999 texture.tic.w_source);
1023 unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
1024 unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
1025 unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
1026 unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
1027 } else { 1000 } else {
1028 // Can occur when texture addr is null or its memory is unmapped/invalid 1001 // Can occur when texture addr is null or its memory is unmapped/invalid
1029 unit.texture = 0; 1002 state.texture_units[current_bindpoint].texture = 0;
1030 } 1003 }
1031 } 1004 }
1032} 1005}
1033 1006
1034void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 1007void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1035 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1008 const auto& regs = system.GPU().Maxwell3D().regs;
1036 const bool geometry_shaders_enabled = 1009 const bool geometry_shaders_enabled =
1037 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1010 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1038 const std::size_t viewport_count = 1011 const std::size_t viewport_count =
@@ -1040,7 +1013,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1040 for (std::size_t i = 0; i < viewport_count; i++) { 1013 for (std::size_t i = 0; i < viewport_count; i++) {
1041 auto& viewport = current_state.viewports[i]; 1014 auto& viewport = current_state.viewports[i];
1042 const auto& src = regs.viewports[i]; 1015 const auto& src = regs.viewports[i];
1043 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 1016 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
1044 viewport.x = viewport_rect.left; 1017 viewport.x = viewport_rect.left;
1045 viewport.y = viewport_rect.bottom; 1018 viewport.y = viewport_rect.bottom;
1046 viewport.width = viewport_rect.GetWidth(); 1019 viewport.width = viewport_rect.GetWidth();
@@ -1055,7 +1028,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1055void RasterizerOpenGL::SyncClipEnabled( 1028void RasterizerOpenGL::SyncClipEnabled(
1056 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { 1029 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
1057 1030
1058 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1031 const auto& regs = system.GPU().Maxwell3D().regs;
1059 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ 1032 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
1060 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, 1033 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
1061 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, 1034 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
@@ -1072,7 +1045,7 @@ void RasterizerOpenGL::SyncClipCoef() {
1072} 1045}
1073 1046
1074void RasterizerOpenGL::SyncCullMode() { 1047void RasterizerOpenGL::SyncCullMode() {
1075 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1048 const auto& regs = system.GPU().Maxwell3D().regs;
1076 1049
1077 state.cull.enabled = regs.cull.enabled != 0; 1050 state.cull.enabled = regs.cull.enabled != 0;
1078 1051
@@ -1096,14 +1069,14 @@ void RasterizerOpenGL::SyncCullMode() {
1096} 1069}
1097 1070
1098void RasterizerOpenGL::SyncPrimitiveRestart() { 1071void RasterizerOpenGL::SyncPrimitiveRestart() {
1099 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1072 const auto& regs = system.GPU().Maxwell3D().regs;
1100 1073
1101 state.primitive_restart.enabled = regs.primitive_restart.enabled; 1074 state.primitive_restart.enabled = regs.primitive_restart.enabled;
1102 state.primitive_restart.index = regs.primitive_restart.index; 1075 state.primitive_restart.index = regs.primitive_restart.index;
1103} 1076}
1104 1077
1105void RasterizerOpenGL::SyncDepthTestState() { 1078void RasterizerOpenGL::SyncDepthTestState() {
1106 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1079 const auto& regs = system.GPU().Maxwell3D().regs;
1107 1080
1108 state.depth.test_enabled = regs.depth_test_enable != 0; 1081 state.depth.test_enabled = regs.depth_test_enable != 0;
1109 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1082 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
@@ -1115,7 +1088,7 @@ void RasterizerOpenGL::SyncDepthTestState() {
1115} 1088}
1116 1089
1117void RasterizerOpenGL::SyncStencilTestState() { 1090void RasterizerOpenGL::SyncStencilTestState() {
1118 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1091 const auto& regs = system.GPU().Maxwell3D().regs;
1119 state.stencil.test_enabled = regs.stencil_enable != 0; 1092 state.stencil.test_enabled = regs.stencil_enable != 0;
1120 1093
1121 if (!regs.stencil_enable) { 1094 if (!regs.stencil_enable) {
@@ -1149,7 +1122,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
1149} 1122}
1150 1123
1151void RasterizerOpenGL::SyncColorMask() { 1124void RasterizerOpenGL::SyncColorMask() {
1152 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1125 const auto& regs = system.GPU().Maxwell3D().regs;
1153 const std::size_t count = 1126 const std::size_t count =
1154 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1127 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
1155 for (std::size_t i = 0; i < count; i++) { 1128 for (std::size_t i = 0; i < count; i++) {
@@ -1163,18 +1136,18 @@ void RasterizerOpenGL::SyncColorMask() {
1163} 1136}
1164 1137
1165void RasterizerOpenGL::SyncMultiSampleState() { 1138void RasterizerOpenGL::SyncMultiSampleState() {
1166 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1139 const auto& regs = system.GPU().Maxwell3D().regs;
1167 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; 1140 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
1168 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; 1141 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
1169} 1142}
1170 1143
1171void RasterizerOpenGL::SyncFragmentColorClampState() { 1144void RasterizerOpenGL::SyncFragmentColorClampState() {
1172 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1145 const auto& regs = system.GPU().Maxwell3D().regs;
1173 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; 1146 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
1174} 1147}
1175 1148
1176void RasterizerOpenGL::SyncBlendState() { 1149void RasterizerOpenGL::SyncBlendState() {
1177 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1150 const auto& regs = system.GPU().Maxwell3D().regs;
1178 1151
1179 state.blend_color.red = regs.blend_color.r; 1152 state.blend_color.red = regs.blend_color.r;
1180 state.blend_color.green = regs.blend_color.g; 1153 state.blend_color.green = regs.blend_color.g;
@@ -1216,7 +1189,7 @@ void RasterizerOpenGL::SyncBlendState() {
1216} 1189}
1217 1190
1218void RasterizerOpenGL::SyncLogicOpState() { 1191void RasterizerOpenGL::SyncLogicOpState() {
1219 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1192 const auto& regs = system.GPU().Maxwell3D().regs;
1220 1193
1221 state.logic_op.enabled = regs.logic_op.enable != 0; 1194 state.logic_op.enabled = regs.logic_op.enable != 0;
1222 1195
@@ -1230,7 +1203,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
1230} 1203}
1231 1204
1232void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1205void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1233 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1206 const auto& regs = system.GPU().Maxwell3D().regs;
1234 const bool geometry_shaders_enabled = 1207 const bool geometry_shaders_enabled =
1235 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1208 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1236 const std::size_t viewport_count = 1209 const std::size_t viewport_count =
@@ -1252,21 +1225,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1252} 1225}
1253 1226
1254void RasterizerOpenGL::SyncTransformFeedback() { 1227void RasterizerOpenGL::SyncTransformFeedback() {
1255 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1228 const auto& regs = system.GPU().Maxwell3D().regs;
1256 1229 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1257 if (regs.tfb_enabled != 0) {
1258 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1259 UNREACHABLE();
1260 }
1261} 1230}
1262 1231
1263void RasterizerOpenGL::SyncPointState() { 1232void RasterizerOpenGL::SyncPointState() {
1264 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1233 const auto& regs = system.GPU().Maxwell3D().regs;
1265 state.point.size = regs.point_size; 1234 state.point.size = regs.point_size;
1266} 1235}
1267 1236
1268void RasterizerOpenGL::SyncPolygonOffset() { 1237void RasterizerOpenGL::SyncPolygonOffset() {
1269 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1238 const auto& regs = system.GPU().Maxwell3D().regs;
1270 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1239 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1271 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1240 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1272 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1241 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
@@ -1276,13 +1245,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1276} 1245}
1277 1246
1278void RasterizerOpenGL::CheckAlphaTests() { 1247void RasterizerOpenGL::CheckAlphaTests() {
1279 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1248 const auto& regs = system.GPU().Maxwell3D().regs;
1280 1249 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1281 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1250 "Alpha Testing is enabled with more than one rendertarget");
1282 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1283 "this behavior is undefined.");
1284 UNREACHABLE();
1285 }
1286} 1251}
1287 1252
1288} // namespace OpenGL 1253} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7f2bf0f8b..30f3e8acb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
8#include <cstddef> 9#include <cstddef>
9#include <map> 10#include <map>
10#include <memory> 11#include <memory>
@@ -33,6 +34,10 @@
33#include "video_core/renderer_opengl/gl_state.h" 34#include "video_core/renderer_opengl/gl_state.h"
34#include "video_core/renderer_opengl/gl_stream_buffer.h" 35#include "video_core/renderer_opengl/gl_stream_buffer.h"
35 36
37namespace Core {
38class System;
39}
40
36namespace Core::Frontend { 41namespace Core::Frontend {
37class EmuWindow; 42class EmuWindow;
38} 43}
@@ -45,21 +50,26 @@ struct FramebufferCacheKey;
45 50
46class RasterizerOpenGL : public VideoCore::RasterizerInterface { 51class RasterizerOpenGL : public VideoCore::RasterizerInterface {
47public: 52public:
48 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info); 53 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
54 ScreenInfo& info);
49 ~RasterizerOpenGL() override; 55 ~RasterizerOpenGL() override;
50 56
51 void DrawArrays() override; 57 void DrawArrays() override;
52 void Clear() override; 58 void Clear() override;
53 void FlushAll() override; 59 void FlushAll() override;
54 void FlushRegion(VAddr addr, u64 size) override; 60 void FlushRegion(CacheAddr addr, u64 size) override;
55 void InvalidateRegion(VAddr addr, u64 size) override; 61 void InvalidateRegion(CacheAddr addr, u64 size) override;
56 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 62 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
57 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
58 const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; 64 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
65 const Common::Rectangle<u32>& src_rect,
66 const Common::Rectangle<u32>& dst_rect) override;
59 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
60 u32 pixel_stride) override; 68 u32 pixel_stride) override;
61 bool AccelerateDrawBatch(bool is_indexed) override; 69 bool AccelerateDrawBatch(bool is_indexed) override;
62 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; 70 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
71 void LoadDiskResources(const std::atomic_bool& stop_loading,
72 const VideoCore::DiskResourceLoadCallback& callback) override;
63 73
64 /// Maximum supported size that a constbuffer can have in bytes. 74 /// Maximum supported size that a constbuffer can have in bytes.
65 static constexpr std::size_t MaxConstbufferSize = 0x10000; 75 static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -84,11 +94,12 @@ private:
84 private: 94 private:
85 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; 95 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
86 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; 96 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
87 Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; 97 Tegra::Texture::TextureMipmapFilter mipmap_filter =
98 Tegra::Texture::TextureMipmapFilter::None;
88 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; 99 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
89 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; 100 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
90 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; 101 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
91 bool uses_depth_compare = false; 102 bool use_depth_compare = false;
92 Tegra::Texture::DepthCompareFunc depth_compare_func = 103 Tegra::Texture::DepthCompareFunc depth_compare_func =
93 Tegra::Texture::DepthCompareFunc::Always; 104 Tegra::Texture::DepthCompareFunc::Always;
94 GLvec4 border_color = {}; 105 GLvec4 border_color = {};
@@ -204,6 +215,7 @@ private:
204 GlobalRegionCacheOpenGL global_cache; 215 GlobalRegionCacheOpenGL global_cache;
205 216
206 Core::Frontend::EmuWindow& emu_window; 217 Core::Frontend::EmuWindow& emu_window;
218 Core::System& system;
207 219
208 ScreenInfo& screen_info; 220 ScreenInfo& screen_info;
209 221
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 50286432d..57329cd61 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -18,10 +19,9 @@
18#include "video_core/morton.h" 19#include "video_core/morton.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 20#include "video_core/renderer_opengl/gl_rasterizer.h"
20#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
21#include "video_core/renderer_opengl/gl_state.h"
22#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h" 23#include "video_core/surface.h"
24#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
25#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
@@ -44,23 +44,22 @@ struct FormatTuple {
44 bool compressed; 44 bool compressed;
45}; 45};
46 46
47static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) { 47static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
48 glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 48 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
49 glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 49 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
50 glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 50 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
51 glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 51 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
52 glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); 52 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
53 if (max_mip_level == 1) { 53 if (max_mip_level == 1) {
54 glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0); 54 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
55 } 55 }
56} 56}
57 57
58void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { 58void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; 59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
60 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
61 60
62 addr = cpu_addr ? *cpu_addr : 0;
63 gpu_addr = gpu_addr_; 61 gpu_addr = gpu_addr_;
62 host_ptr = memory_manager.GetPointer(gpu_addr_);
64 size_in_bytes = SizeInBytesRaw(); 63 size_in_bytes = SizeInBytesRaw();
65 64
66 if (IsPixelFormatASTC(pixel_format)) { 65 if (IsPixelFormatASTC(pixel_format)) {
@@ -126,6 +125,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
126 125
127 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); 126 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
128 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); 127 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
128 if (!params.is_tiled) {
129 params.pitch = config.tic.Pitch();
130 }
129 params.unaligned_height = config.tic.Height(); 131 params.unaligned_height = config.tic.Height();
130 params.target = SurfaceTargetFromTextureType(config.tic.texture_type); 132 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
131 params.identity = SurfaceClass::Uploaded; 133 params.identity = SurfaceClass::Uploaded;
@@ -192,7 +194,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
192 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; 194 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
193 params.component_type = ComponentTypeFromRenderTarget(config.format); 195 params.component_type = ComponentTypeFromRenderTarget(config.format);
194 params.type = GetFormatType(params.pixel_format); 196 params.type = GetFormatType(params.pixel_format);
195 params.width = config.width; 197 if (params.is_tiled) {
198 params.width = config.width;
199 } else {
200 params.pitch = config.width;
201 const u32 bpp = params.GetFormatBpp() / 8;
202 params.width = params.pitch / bpp;
203 }
196 params.height = config.height; 204 params.height = config.height;
197 params.unaligned_height = config.height; 205 params.unaligned_height = config.height;
198 params.target = SurfaceTarget::Texture2D; 206 params.target = SurfaceTarget::Texture2D;
@@ -391,7 +399,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
391 return format; 399 return format;
392} 400}
393 401
394MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 402/// Returns the discrepant array target
403constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
404 switch (target) {
405 case SurfaceTarget::Texture1D:
406 return GL_TEXTURE_1D_ARRAY;
407 case SurfaceTarget::Texture2D:
408 return GL_TEXTURE_2D_ARRAY;
409 case SurfaceTarget::Texture3D:
410 return GL_NONE;
411 case SurfaceTarget::Texture1DArray:
412 return GL_TEXTURE_1D;
413 case SurfaceTarget::Texture2DArray:
414 return GL_TEXTURE_2D;
415 case SurfaceTarget::TextureCubemap:
416 return GL_TEXTURE_CUBE_MAP_ARRAY;
417 case SurfaceTarget::TextureCubeArray:
418 return GL_TEXTURE_CUBE_MAP;
419 }
420 return GL_NONE;
421}
422
423Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
395 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 424 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
396 if (IsPixelFormatASTC(pixel_format)) { 425 if (IsPixelFormatASTC(pixel_format)) {
397 // ASTC formats must stop at the ATSC block size boundary 426 // ASTC formats must stop at the ATSC block size boundary
@@ -415,8 +444,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
415 for (u32 i = 0; i < params.depth; i++) { 444 for (u32 i = 0; i < params.depth; i++) {
416 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 445 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
417 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 446 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
418 params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, 447 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
419 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 448 gl_buffer.data() + offset_gl, params.host_ptr + offset);
420 offset += layer_size; 449 offset += layer_size;
421 offset_gl += gl_size; 450 offset_gl += gl_size;
422 } 451 }
@@ -425,11 +454,12 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
425 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 454 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
426 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 455 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
427 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, 456 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
428 gl_buffer.data(), gl_buffer.size(), params.addr + offset); 457 gl_buffer.data(), params.host_ptr + offset);
429 } 458 }
430} 459}
431 460
432static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { 461void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
462 const Surface& dst_surface) {
433 const auto& src_params{src_surface->GetSurfaceParams()}; 463 const auto& src_params{src_surface->GetSurfaceParams()};
434 const auto& dst_params{dst_surface->GetSurfaceParams()}; 464 const auto& dst_params{dst_surface->GetSurfaceParams()};
435 465
@@ -439,12 +469,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
439 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, 469 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
440 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, 470 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
441 0, 0, width, height, 1); 471 0, 0, width, height, 1);
472
473 dst_surface->MarkAsModified(true, *this);
442} 474}
443 475
444MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); 476MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
445static void CopySurface(const Surface& src_surface, const Surface& dst_surface, 477void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
446 const GLuint copy_pbo_handle, const GLenum src_attachment = 0, 478 const GLuint copy_pbo_handle, const GLenum src_attachment,
447 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) { 479 const GLenum dst_attachment,
480 const std::size_t cubemap_face) {
448 MICROPROFILE_SCOPE(OpenGL_CopySurface); 481 MICROPROFILE_SCOPE(OpenGL_CopySurface);
449 ASSERT_MSG(dst_attachment == 0, "Unimplemented"); 482 ASSERT_MSG(dst_attachment == 0, "Unimplemented");
450 483
@@ -479,9 +512,9 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
479 "reinterpretation but the texture is tiled."); 512 "reinterpretation but the texture is tiled.");
480 } 513 }
481 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; 514 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
482 515 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
483 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, 516 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
484 Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); 517 memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
485 } 518 }
486 519
487 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 520 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -524,62 +557,52 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
524 } 557 }
525 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); 558 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
526 } 559 }
560
561 dst_surface->MarkAsModified(true, *this);
527} 562}
528 563
529CachedSurface::CachedSurface(const SurfaceParams& params) 564CachedSurface::CachedSurface(const SurfaceParams& params)
530 : params(params), gl_target(SurfaceTargetToGL(params.target)), 565 : params{params}, gl_target{SurfaceTargetToGL(params.target)},
531 cached_size_in_bytes(params.size_in_bytes) { 566 cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
532 texture.Create(); 567 texture.Create(gl_target);
533 const auto& rect{params.GetRect()}; 568
534 569 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
535 // Keep track of previous texture bindings 570 // alternatives. This signals a bug on those functions.
536 OpenGLState cur_state = OpenGLState::GetCurState(); 571 const auto width = static_cast<GLsizei>(params.MipWidth(0));
537 const auto& old_tex = cur_state.texture_units[0]; 572 const auto height = static_cast<GLsizei>(params.MipHeight(0));
538 SCOPE_EXIT({ 573 memory_size = params.MemorySize();
539 cur_state.texture_units[0] = old_tex; 574 reinterpreted = false;
540 cur_state.Apply();
541 });
542
543 cur_state.texture_units[0].texture = texture.handle;
544 cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
545 cur_state.Apply();
546 glActiveTexture(GL_TEXTURE0);
547 575
548 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 576 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
549 gl_internal_format = format_tuple.internal_format; 577 gl_internal_format = format_tuple.internal_format;
550 gl_is_compressed = format_tuple.compressed;
551 578
552 if (!format_tuple.compressed) { 579 switch (params.target) {
553 // Only pre-create the texture for non-compressed textures. 580 case SurfaceTarget::Texture1D:
554 switch (params.target) { 581 glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
555 case SurfaceTarget::Texture1D: 582 width);
556 glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level, 583 break;
557 format_tuple.internal_format, rect.GetWidth()); 584 case SurfaceTarget::Texture2D:
558 break; 585 case SurfaceTarget::TextureCubemap:
559 case SurfaceTarget::Texture2D: 586 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
560 case SurfaceTarget::TextureCubemap: 587 width, height);
561 glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level, 588 break;
562 format_tuple.internal_format, rect.GetWidth(), rect.GetHeight()); 589 case SurfaceTarget::Texture3D:
563 break; 590 case SurfaceTarget::Texture2DArray:
564 case SurfaceTarget::Texture3D: 591 case SurfaceTarget::TextureCubeArray:
565 case SurfaceTarget::Texture2DArray: 592 glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
566 case SurfaceTarget::TextureCubeArray: 593 width, height, params.depth);
567 glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level, 594 break;
568 format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(), 595 default:
569 params.depth); 596 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
570 break; 597 static_cast<u32>(params.target));
571 default: 598 UNREACHABLE();
572 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 599 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
573 static_cast<u32>(params.target)); 600 width, height);
574 UNREACHABLE();
575 glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
576 rect.GetWidth(), rect.GetHeight());
577 }
578 } 601 }
579 602
580 ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level); 603 ApplyTextureDefaults(texture.handle, params.max_mip_level);
581 604
582 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); 605 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
583 606
584 // Clamp size to mapped GPU memory region 607 // Clamp size to mapped GPU memory region
585 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 608 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -592,103 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
592 LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); 615 LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
593 cached_size_in_bytes = max_size; 616 cached_size_in_bytes = max_size;
594 } 617 }
595}
596
597static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
598 union S8Z24 {
599 BitField<0, 24, u32> z24;
600 BitField<24, 8, u32> s8;
601 };
602 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
603
604 union Z24S8 {
605 BitField<0, 8, u32> s8;
606 BitField<8, 24, u32> z24;
607 };
608 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
609
610 S8Z24 s8z24_pixel{};
611 Z24S8 z24s8_pixel{};
612 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
613 for (std::size_t y = 0; y < height; ++y) {
614 for (std::size_t x = 0; x < width; ++x) {
615 const std::size_t offset{bpp * (y * width + x)};
616 if (reverse) {
617 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
618 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
619 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
620 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
621 } else {
622 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
623 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
624 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
625 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
626 }
627 }
628 }
629}
630
631/**
632 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
633 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
634 * typical desktop GPUs.
635 */
636static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
637 u32 width, u32 height, u32 depth) {
638 switch (pixel_format) {
639 case PixelFormat::ASTC_2D_4X4:
640 case PixelFormat::ASTC_2D_8X8:
641 case PixelFormat::ASTC_2D_8X5:
642 case PixelFormat::ASTC_2D_5X4:
643 case PixelFormat::ASTC_2D_5X5:
644 case PixelFormat::ASTC_2D_4X4_SRGB:
645 case PixelFormat::ASTC_2D_8X8_SRGB:
646 case PixelFormat::ASTC_2D_8X5_SRGB:
647 case PixelFormat::ASTC_2D_5X4_SRGB:
648 case PixelFormat::ASTC_2D_5X5_SRGB:
649 case PixelFormat::ASTC_2D_10X8:
650 case PixelFormat::ASTC_2D_10X8_SRGB: {
651 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
652 u32 block_width{};
653 u32 block_height{};
654 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
655 data =
656 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
657 break;
658 }
659 case PixelFormat::S8Z24:
660 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
661 ConvertS8Z24ToZ24S8(data, width, height, false);
662 break;
663 }
664}
665 618
666/** 619 cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
667 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
668 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
669 * with typical desktop GPUs.
670 */
671static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
672 u32 width, u32 height) {
673 switch (pixel_format) {
674 case PixelFormat::ASTC_2D_4X4:
675 case PixelFormat::ASTC_2D_8X8:
676 case PixelFormat::ASTC_2D_4X4_SRGB:
677 case PixelFormat::ASTC_2D_8X8_SRGB:
678 case PixelFormat::ASTC_2D_5X5:
679 case PixelFormat::ASTC_2D_5X5_SRGB:
680 case PixelFormat::ASTC_2D_10X8:
681 case PixelFormat::ASTC_2D_10X8_SRGB: {
682 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
683 static_cast<u32>(pixel_format));
684 UNREACHABLE();
685 break;
686 }
687 case PixelFormat::S8Z24:
688 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
689 ConvertS8Z24ToZ24S8(data, width, height, true);
690 break;
691 }
692} 620}
693 621
694MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 622MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -703,13 +631,31 @@ void CachedSurface::LoadGLBuffer() {
703 for (u32 i = 0; i < params.max_mip_level; i++) 631 for (u32 i = 0; i < params.max_mip_level; i++)
704 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); 632 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
705 } else { 633 } else {
706 const auto texture_src_data{Memory::GetPointer(params.addr)}; 634 const u32 bpp = params.GetFormatBpp() / 8;
707 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 635 const u32 copy_size = params.width * bpp;
708 gl_buffer[0].assign(texture_src_data, texture_src_data_end); 636 if (params.pitch == copy_size) {
637 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
638 } else {
639 const u8* start{params.host_ptr};
640 u8* write_to = gl_buffer[0].data();
641 for (u32 h = params.height; h > 0; h--) {
642 std::memcpy(write_to, start, copy_size);
643 start += params.pitch;
644 write_to += copy_size;
645 }
646 }
709 } 647 }
710 for (u32 i = 0; i < params.max_mip_level; i++) { 648 for (u32 i = 0; i < params.max_mip_level; i++) {
711 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 649 const u32 width = params.MipWidth(i);
712 params.MipHeight(i), params.MipDepth(i)); 650 const u32 height = params.MipHeight(i);
651 const u32 depth = params.MipDepth(i);
652 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
653 // Reserve size for RGBA8 conversion
654 constexpr std::size_t rgba_bpp = 4;
655 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
656 }
657 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
658 height, depth, true, true);
713 } 659 }
714} 660}
715 661
@@ -732,17 +678,27 @@ void CachedSurface::FlushGLBuffer() {
732 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 678 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
733 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 679 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
734 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 680 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
735 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
736 params.height); 682 params.height, params.depth, true, true);
737 const u8* const texture_src_data = Memory::GetPointer(params.addr);
738 ASSERT(texture_src_data);
739 if (params.is_tiled) { 683 if (params.is_tiled) {
740 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 684 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
741 params.block_width, static_cast<u32>(params.target)); 685 params.block_width, static_cast<u32>(params.target));
742 686
743 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); 687 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
744 } else { 688 } else {
745 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 689 const u32 bpp = params.GetFormatBpp() / 8;
690 const u32 copy_size = params.width * bpp;
691 if (params.pitch == copy_size) {
692 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
693 } else {
694 u8* start{params.host_ptr};
695 const u8* read_to = gl_buffer[0].data();
696 for (u32 h = params.height; h > 0; h--) {
697 std::memcpy(start, read_to, copy_size);
698 start += params.pitch;
699 read_to += copy_size;
700 }
701 }
746 } 702 }
747} 703}
748 704
@@ -751,63 +707,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
751 const auto& rect{params.GetRect(mip_map)}; 707 const auto& rect{params.GetRect(mip_map)};
752 708
753 // Load data from memory to the surface 709 // Load data from memory to the surface
754 const GLint x0 = static_cast<GLint>(rect.left); 710 const auto x0 = static_cast<GLint>(rect.left);
755 const GLint y0 = static_cast<GLint>(rect.bottom); 711 const auto y0 = static_cast<GLint>(rect.bottom);
756 std::size_t buffer_offset = 712 auto buffer_offset =
757 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + 713 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
758 static_cast<std::size_t>(x0)) * 714 static_cast<std::size_t>(x0)) *
759 GetBytesPerPixel(params.pixel_format); 715 GetBytesPerPixel(params.pixel_format);
760 716
761 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); 717 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
762 const GLuint target_tex = texture.handle;
763 OpenGLState cur_state = OpenGLState::GetCurState();
764
765 const auto& old_tex = cur_state.texture_units[0];
766 SCOPE_EXIT({
767 cur_state.texture_units[0] = old_tex;
768 cur_state.Apply();
769 });
770 cur_state.texture_units[0].texture = target_tex;
771 cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
772 cur_state.Apply();
773 718
774 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT 719 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
775 ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0); 720 ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
776 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); 721 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
777 722
778 GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); 723 const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
779 glActiveTexture(GL_TEXTURE0);
780 if (tuple.compressed) { 724 if (tuple.compressed) {
781 switch (params.target) { 725 switch (params.target) {
782 case SurfaceTarget::Texture2D: 726 case SurfaceTarget::Texture2D:
783 glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 727 glCompressedTextureSubImage2D(
784 static_cast<GLsizei>(params.MipWidth(mip_map)), 728 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
785 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size, 729 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
786 &gl_buffer[mip_map][buffer_offset]); 730 &gl_buffer[mip_map][buffer_offset]);
787 break; 731 break;
788 case SurfaceTarget::Texture3D: 732 case SurfaceTarget::Texture3D:
789 glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 733 glCompressedTextureSubImage3D(
790 static_cast<GLsizei>(params.MipWidth(mip_map)), 734 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
791 static_cast<GLsizei>(params.MipHeight(mip_map)), 735 static_cast<GLsizei>(params.MipHeight(mip_map)),
792 static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size, 736 static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
793 &gl_buffer[mip_map][buffer_offset]); 737 &gl_buffer[mip_map][buffer_offset]);
794 break; 738 break;
795 case SurfaceTarget::Texture2DArray: 739 case SurfaceTarget::Texture2DArray:
796 case SurfaceTarget::TextureCubeArray: 740 case SurfaceTarget::TextureCubeArray:
797 glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 741 glCompressedTextureSubImage3D(
798 static_cast<GLsizei>(params.MipWidth(mip_map)), 742 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
799 static_cast<GLsizei>(params.MipHeight(mip_map)), 743 static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
800 static_cast<GLsizei>(params.depth), 0, image_size, 744 tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
801 &gl_buffer[mip_map][buffer_offset]);
802 break; 745 break;
803 case SurfaceTarget::TextureCubemap: { 746 case SurfaceTarget::TextureCubemap: {
804 GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); 747 const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
805 for (std::size_t face = 0; face < params.depth; ++face) { 748 for (std::size_t face = 0; face < params.depth; ++face) {
806 glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 749 glCompressedTextureSubImage3D(
807 mip_map, tuple.internal_format, 750 texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
808 static_cast<GLsizei>(params.MipWidth(mip_map)), 751 static_cast<GLsizei>(params.MipWidth(mip_map)),
809 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, 752 static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
810 layer_size, &gl_buffer[mip_map][buffer_offset]); 753 layer_size, &gl_buffer[mip_map][buffer_offset]);
811 buffer_offset += layer_size; 754 buffer_offset += layer_size;
812 } 755 }
813 break; 756 break;
@@ -816,46 +759,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
816 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 759 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
817 static_cast<u32>(params.target)); 760 static_cast<u32>(params.target));
818 UNREACHABLE(); 761 UNREACHABLE();
819 glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format, 762 glCompressedTextureSubImage2D(
820 static_cast<GLsizei>(params.MipWidth(mip_map)), 763 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
821 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, 764 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
822 static_cast<GLsizei>(params.size_in_bytes_gl), 765 static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
823 &gl_buffer[mip_map][buffer_offset]);
824 } 766 }
825 } else { 767 } else {
826
827 switch (params.target) { 768 switch (params.target) {
828 case SurfaceTarget::Texture1D: 769 case SurfaceTarget::Texture1D:
829 glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0, 770 glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
830 static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type, 771 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
831 &gl_buffer[mip_map][buffer_offset]);
832 break; 772 break;
833 case SurfaceTarget::Texture2D: 773 case SurfaceTarget::Texture2D:
834 glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 774 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
835 static_cast<GLsizei>(rect.GetWidth()), 775 static_cast<GLsizei>(rect.GetWidth()),
836 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 776 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
837 &gl_buffer[mip_map][buffer_offset]); 777 &gl_buffer[mip_map][buffer_offset]);
838 break; 778 break;
839 case SurfaceTarget::Texture3D: 779 case SurfaceTarget::Texture3D:
840 glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, 780 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
841 static_cast<GLsizei>(rect.GetWidth()), 781 static_cast<GLsizei>(rect.GetWidth()),
842 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), 782 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
843 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); 783 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
844 break; 784 break;
845 case SurfaceTarget::Texture2DArray: 785 case SurfaceTarget::Texture2DArray:
846 case SurfaceTarget::TextureCubeArray: 786 case SurfaceTarget::TextureCubeArray:
847 glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, 787 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
848 static_cast<GLsizei>(rect.GetWidth()), 788 static_cast<GLsizei>(rect.GetWidth()),
849 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, 789 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
850 tuple.type, &gl_buffer[mip_map][buffer_offset]); 790 tuple.type, &gl_buffer[mip_map][buffer_offset]);
851 break; 791 break;
852 case SurfaceTarget::TextureCubemap: { 792 case SurfaceTarget::TextureCubemap: {
853 std::size_t start = buffer_offset; 793 std::size_t start = buffer_offset;
854 for (std::size_t face = 0; face < params.depth; ++face) { 794 for (std::size_t face = 0; face < params.depth; ++face) {
855 glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map, 795 glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
856 x0, y0, static_cast<GLsizei>(rect.GetWidth()), 796 static_cast<GLsizei>(rect.GetWidth()),
857 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 797 static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
858 &gl_buffer[mip_map][buffer_offset]); 798 tuple.type, &gl_buffer[mip_map][buffer_offset]);
859 buffer_offset += params.LayerSizeGL(mip_map); 799 buffer_offset += params.LayerSizeGL(mip_map);
860 } 800 }
861 break; 801 break;
@@ -864,41 +804,33 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
864 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 804 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
865 static_cast<u32>(params.target)); 805 static_cast<u32>(params.target));
866 UNREACHABLE(); 806 UNREACHABLE();
867 glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 807 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
868 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 808 static_cast<GLsizei>(rect.GetWidth()),
869 &gl_buffer[mip_map][buffer_offset]); 809 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
810 &gl_buffer[mip_map][buffer_offset]);
870 } 811 }
871 } 812 }
872 813
873 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 814 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
874} 815}
875 816
876void CachedSurface::EnsureTextureView() { 817void CachedSurface::EnsureTextureDiscrepantView() {
877 if (texture_view.handle != 0) 818 if (discrepant_view.handle != 0)
878 return; 819 return;
879 // Compressed texture are not being created with immutable storage
880 UNIMPLEMENTED_IF(gl_is_compressed);
881 820
882 const GLenum target{TargetLayer()}; 821 const GLenum target{GetArrayDiscrepantTarget(params.target)};
822 ASSERT(target != GL_NONE);
823
883 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 824 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
884 constexpr GLuint min_layer = 0; 825 constexpr GLuint min_layer = 0;
885 constexpr GLuint min_level = 0; 826 constexpr GLuint min_level = 0;
886 827
887 texture_view.Create(); 828 glGenTextures(1, &discrepant_view.handle);
888 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 829 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
889 params.max_mip_level, min_layer, num_layers); 830 params.max_mip_level, min_layer, num_layers);
890 831 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
891 OpenGLState cur_state = OpenGLState::GetCurState(); 832 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
892 const auto& old_tex = cur_state.texture_units[0]; 833 reinterpret_cast<const GLint*>(swizzle.data()));
893 SCOPE_EXIT({
894 cur_state.texture_units[0] = old_tex;
895 cur_state.Apply();
896 });
897 cur_state.texture_units[0].texture = texture_view.handle;
898 cur_state.texture_units[0].target = target;
899 cur_state.Apply();
900
901 ApplyTextureDefaults(target, params.max_mip_level);
902} 834}
903 835
904MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); 836MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
@@ -909,6 +841,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
909 UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); 841 UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
910} 842}
911 843
844void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
845 Tegra::Texture::SwizzleSource swizzle_y,
846 Tegra::Texture::SwizzleSource swizzle_z,
847 Tegra::Texture::SwizzleSource swizzle_w) {
848 const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
849 const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
850 const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
851 const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
852 if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
853 return;
854 }
855 swizzle = {new_x, new_y, new_z, new_w};
856 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
857 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
858 if (discrepant_view.handle != 0) {
859 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
860 }
861}
862
912RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) 863RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
913 : RasterizerCache{rasterizer} { 864 : RasterizerCache{rasterizer} {
914 read_framebuffer.Create(); 865 read_framebuffer.Create();
@@ -946,53 +897,59 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
946 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 897 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
947 const auto& regs{gpu.regs}; 898 const auto& regs{gpu.regs};
948 899
949 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 900 if (!gpu.dirty_flags.color_buffer[index]) {
950 return last_color_buffers[index]; 901 return current_color_buffers[index];
951 } 902 }
952 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 903 gpu.dirty_flags.color_buffer.reset(index);
953 904
954 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 905 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
955 906
956 if (index >= regs.rt_control.count) { 907 if (index >= regs.rt_control.count) {
957 return last_color_buffers[index] = {}; 908 return current_color_buffers[index] = {};
958 } 909 }
959 910
960 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 911 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
961 return last_color_buffers[index] = {}; 912 return current_color_buffers[index] = {};
962 } 913 }
963 914
964 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 915 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
965 916
966 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 917 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
967} 918}
968 919
969void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 920void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
970 surface->LoadGLBuffer(); 921 surface->LoadGLBuffer();
971 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 922 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
972 surface->MarkAsModified(false, *this); 923 surface->MarkAsModified(false, *this);
924 surface->MarkForReload(false);
973} 925}
974 926
975Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 927Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
976 if (params.addr == 0 || params.height * params.width == 0) { 928 if (params.gpu_addr == 0 || params.height * params.width == 0) {
977 return {}; 929 return {};
978 } 930 }
979 931
980 // Look up surface in the cache based on address 932 // Look up surface in the cache based on address
981 Surface surface{TryGet(params.addr)}; 933 Surface surface{TryGet(params.host_ptr)};
982 if (surface) { 934 if (surface) {
983 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 935 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
984 // Use the cached surface as-is 936 // Use the cached surface as-is unless it's not synced with memory
937 if (surface->MustReload())
938 LoadSurface(surface);
985 return surface; 939 return surface;
986 } else if (preserve_contents) { 940 } else if (preserve_contents) {
987 // If surface parameters changed and we care about keeping the previous data, recreate 941 // If surface parameters changed and we care about keeping the previous data, recreate
988 // the surface from the old one 942 // the surface from the old one
989 Surface new_surface{RecreateSurface(surface, params)}; 943 Surface new_surface{RecreateSurface(surface, params)};
990 Unregister(surface); 944 UnregisterSurface(surface);
991 Register(new_surface); 945 Register(new_surface);
946 if (new_surface->IsUploaded()) {
947 RegisterReinterpretSurface(new_surface);
948 }
992 return new_surface; 949 return new_surface;
993 } else { 950 } else {
994 // Delete the old surface before creating a new one to prevent collisions. 951 // Delete the old surface before creating a new one to prevent collisions.
995 Unregister(surface); 952 UnregisterSurface(surface);
996 } 953 }
997 } 954 }
998 955
@@ -1022,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1022 const Surface& dst_surface) { 979 const Surface& dst_surface) {
1023 const auto& init_params{src_surface->GetSurfaceParams()}; 980 const auto& init_params{src_surface->GetSurfaceParams()};
1024 const auto& dst_params{dst_surface->GetSurfaceParams()}; 981 const auto& dst_params{dst_surface->GetSurfaceParams()};
1025 VAddr address = init_params.addr; 982 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
1026 const std::size_t layer_size = dst_params.LayerMemorySize(); 983 Tegra::GPUVAddr address{init_params.gpu_addr};
984 const std::size_t layer_size{dst_params.LayerMemorySize()};
1027 for (u32 layer = 0; layer < dst_params.depth; layer++) { 985 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1028 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { 986 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1029 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); 987 const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
1030 const Surface& copy = TryGet(sub_address); 988 const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
1031 if (!copy) 989 if (!copy) {
1032 continue; 990 continue;
991 }
1033 const auto& src_params{copy->GetSurfaceParams()}; 992 const auto& src_params{copy->GetSurfaceParams()};
1034 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; 993 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
1035 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; 994 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1041,26 +1000,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1041 } 1000 }
1042 address += layer_size; 1001 address += layer_size;
1043 } 1002 }
1003
1004 dst_surface->MarkAsModified(true, *this);
1005}
1006
1007static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1008 const Common::Rectangle<u32>& src_rect,
1009 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1010 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1011 std::size_t cubemap_face = 0) {
1012
1013 const auto& src_params{src_surface->GetSurfaceParams()};
1014 const auto& dst_params{dst_surface->GetSurfaceParams()};
1015
1016 OpenGLState prev_state{OpenGLState::GetCurState()};
1017 SCOPE_EXIT({ prev_state.Apply(); });
1018
1019 OpenGLState state;
1020 state.draw.read_framebuffer = read_fb_handle;
1021 state.draw.draw_framebuffer = draw_fb_handle;
1022 state.Apply();
1023
1024 u32 buffers{};
1025
1026 if (src_params.type == SurfaceType::ColorTexture) {
1027 switch (src_params.target) {
1028 case SurfaceTarget::Texture2D:
1029 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1030 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1031 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1032 0, 0);
1033 break;
1034 case SurfaceTarget::TextureCubemap:
1035 glFramebufferTexture2D(
1036 GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1037 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1038 src_surface->Texture().handle, 0);
1039 glFramebufferTexture2D(
1040 GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1041 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1042 break;
1043 case SurfaceTarget::Texture2DArray:
1044 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1045 src_surface->Texture().handle, 0, 0);
1046 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1047 break;
1048 case SurfaceTarget::Texture3D:
1049 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1050 SurfaceTargetToGL(src_params.target),
1051 src_surface->Texture().handle, 0, 0);
1052 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1053 SurfaceTargetToGL(src_params.target), 0, 0, 0);
1054 break;
1055 default:
1056 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1057 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1058 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1059 0, 0);
1060 break;
1061 }
1062
1063 switch (dst_params.target) {
1064 case SurfaceTarget::Texture2D:
1065 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1066 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1067 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1068 0, 0);
1069 break;
1070 case SurfaceTarget::TextureCubemap:
1071 glFramebufferTexture2D(
1072 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1073 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1074 dst_surface->Texture().handle, 0);
1075 glFramebufferTexture2D(
1076 GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1077 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1078 break;
1079 case SurfaceTarget::Texture2DArray:
1080 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1081 dst_surface->Texture().handle, 0, 0);
1082 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1083 break;
1084
1085 case SurfaceTarget::Texture3D:
1086 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1087 SurfaceTargetToGL(dst_params.target),
1088 dst_surface->Texture().handle, 0, 0);
1089 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1090 SurfaceTargetToGL(dst_params.target), 0, 0, 0);
1091 break;
1092 default:
1093 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1094 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1095 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1096 0, 0);
1097 break;
1098 }
1099
1100 buffers = GL_COLOR_BUFFER_BIT;
1101 } else if (src_params.type == SurfaceType::Depth) {
1102 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1103 GL_TEXTURE_2D, 0, 0);
1104 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1105 src_surface->Texture().handle, 0);
1106 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1107
1108 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1109 GL_TEXTURE_2D, 0, 0);
1110 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1111 dst_surface->Texture().handle, 0);
1112 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1113
1114 buffers = GL_DEPTH_BUFFER_BIT;
1115 } else if (src_params.type == SurfaceType::DepthStencil) {
1116 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1117 GL_TEXTURE_2D, 0, 0);
1118 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1119 src_surface->Texture().handle, 0);
1120
1121 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1122 GL_TEXTURE_2D, 0, 0);
1123 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1124 dst_surface->Texture().handle, 0);
1125
1126 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1127 }
1128
1129 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
1130 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
1131 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
1132
1133 return true;
1044} 1134}
1045 1135
1046void RasterizerCacheOpenGL::FermiCopySurface( 1136void RasterizerCacheOpenGL::FermiCopySurface(
1047 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1137 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1048 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { 1138 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1139 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1049 1140
1050 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 1141 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1051 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 1142 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
1052 1143
1053 ASSERT(src_params.width == dst_params.width);
1054 ASSERT(src_params.height == dst_params.height);
1055 ASSERT(src_params.pixel_format == dst_params.pixel_format); 1144 ASSERT(src_params.pixel_format == dst_params.pixel_format);
1056 ASSERT(src_params.block_height == dst_params.block_height); 1145 ASSERT(src_params.block_height == dst_params.block_height);
1057 ASSERT(src_params.is_tiled == dst_params.is_tiled); 1146 ASSERT(src_params.is_tiled == dst_params.is_tiled);
1058 ASSERT(src_params.depth == dst_params.depth); 1147 ASSERT(src_params.depth == dst_params.depth);
1059 ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
1060 ASSERT(src_params.target == dst_params.target); 1148 ASSERT(src_params.target == dst_params.target);
1061 ASSERT(src_params.rt.index == dst_params.rt.index); 1149 ASSERT(src_params.rt.index == dst_params.rt.index);
1062 1150
1063 FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); 1151 auto src_surface = GetSurface(src_params, true);
1152 auto dst_surface = GetSurface(dst_params, true);
1153
1154 BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
1155 draw_framebuffer.handle);
1156
1157 dst_surface->MarkAsModified(true, *this);
1064} 1158}
1065 1159
1066void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, 1160void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
@@ -1069,7 +1163,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1069 const auto& dst_params{dst_surface->GetSurfaceParams()}; 1163 const auto& dst_params{dst_surface->GetSurfaceParams()};
1070 1164
1071 // Flush enough memory for both the source and destination surface 1165 // Flush enough memory for both the source and destination surface
1072 FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); 1166 FlushRegion(ToCacheAddr(src_params.host_ptr),
1167 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1073 1168
1074 LoadSurface(dst_surface); 1169 LoadSurface(dst_surface);
1075} 1170}
@@ -1106,7 +1201,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1106 case SurfaceTarget::TextureCubemap: 1201 case SurfaceTarget::TextureCubemap:
1107 case SurfaceTarget::Texture2DArray: 1202 case SurfaceTarget::Texture2DArray:
1108 case SurfaceTarget::TextureCubeArray: 1203 case SurfaceTarget::TextureCubeArray:
1109 FastLayeredCopySurface(old_surface, new_surface); 1204 if (old_params.pixel_format == new_params.pixel_format)
1205 FastLayeredCopySurface(old_surface, new_surface);
1206 else {
1207 AccurateCopySurface(old_surface, new_surface);
1208 }
1110 break; 1209 break;
1111 default: 1210 default:
1112 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 1211 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1117,8 +1216,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1117 return new_surface; 1216 return new_surface;
1118} 1217}
1119 1218
1120Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { 1219Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1121 return TryGet(addr); 1220 return TryGet(host_ptr);
1122} 1221}
1123 1222
1124void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { 1223void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1135,4 +1234,109 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1135 return {}; 1234 return {};
1136} 1235}
1137 1236
1237static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1238 u32 height) {
1239 for (u32 i = 0; i < params.max_mip_level; i++) {
1240 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1241 return {i};
1242 }
1243 }
1244 return {};
1245}
1246
1247static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
1248 u32 mipmap) {
1249 const std::size_t size{params.LayerMemorySize()};
1250 Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
1251 for (u32 i = 0; i < params.depth; i++) {
1252 if (start == addr) {
1253 return {i};
1254 }
1255 start += size;
1256 }
1257 return {};
1258}
1259
1260static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1261 const Surface blitted_surface) {
1262 const auto& dst_params = blitted_surface->GetSurfaceParams();
1263 const auto& src_params = render_surface->GetSurfaceParams();
1264 const std::size_t src_memory_size = src_params.size_in_bytes;
1265 const std::optional<u32> level =
1266 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1267 if (level.has_value()) {
1268 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1269 src_params.height == dst_params.MipHeight(*level) &&
1270 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1271 const std::optional<u32> slot =
1272 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
1273 if (slot.has_value()) {
1274 glCopyImageSubData(render_surface->Texture().handle,
1275 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1276 blitted_surface->Texture().handle,
1277 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1278 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1279 blitted_surface->MarkAsModified(true, cache);
1280 return true;
1281 }
1282 }
1283 }
1284 return false;
1285}
1286
1287static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1288 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1289 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1290 if (bound2 > bound1)
1291 return true;
1292 const auto& dst_params = blitted_surface->GetSurfaceParams();
1293 const auto& src_params = render_surface->GetSurfaceParams();
1294 return (dst_params.component_type != src_params.component_type);
1295}
1296
1297static bool IsReinterpretInvalidSecond(const Surface render_surface,
1298 const Surface blitted_surface) {
1299 const auto& dst_params = blitted_surface->GetSurfaceParams();
1300 const auto& src_params = render_surface->GetSurfaceParams();
1301 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1302}
1303
1304bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1305 Surface intersect) {
1306 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1307 UnregisterSurface(intersect);
1308 return false;
1309 }
1310 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1311 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1312 UnregisterSurface(intersect);
1313 return false;
1314 }
1315 FlushObject(intersect);
1316 FlushObject(triggering_surface);
1317 intersect->MarkForReload(true);
1318 }
1319 return true;
1320}
1321
1322void RasterizerCacheOpenGL::SignalPreDrawCall() {
1323 if (texception && GLAD_GL_ARB_texture_barrier) {
1324 glTextureBarrier();
1325 }
1326 texception = false;
1327}
1328
1329void RasterizerCacheOpenGL::SignalPostDrawCall() {
1330 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1331 if (current_color_buffers[i] != nullptr) {
1332 Surface intersect =
1333 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1334 if (intersect != nullptr) {
1335 PartialReinterpretSurface(current_color_buffers[i], intersect);
1336 texception = true;
1337 }
1338 }
1339 }
1340}
1341
1138} // namespace OpenGL 1342} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8d7d6722c..9366f47f2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,6 +8,7 @@
8#include <map> 8#include <map>
9#include <memory> 9#include <memory>
10#include <string> 10#include <string>
11#include <unordered_set>
11#include <vector> 12#include <vector>
12 13
13#include "common/alignment.h" 14#include "common/alignment.h"
@@ -27,15 +28,15 @@ namespace OpenGL {
27 28
28class CachedSurface; 29class CachedSurface;
29using Surface = std::shared_ptr<CachedSurface>; 30using Surface = std::shared_ptr<CachedSurface>;
30using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
31 32
32using SurfaceTarget = VideoCore::Surface::SurfaceTarget; 33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
33using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
34using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
35using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
36 38
37struct SurfaceParams { 39struct SurfaceParams {
38
39 enum class SurfaceClass { 40 enum class SurfaceClass {
40 Uploaded, 41 Uploaded,
41 RenderTarget, 42 RenderTarget,
@@ -71,7 +72,7 @@ struct SurfaceParams {
71 } 72 }
72 73
73 /// Returns the rectangle corresponding to this surface 74 /// Returns the rectangle corresponding to this surface
74 MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; 75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
75 76
76 /// Returns the total size of this surface in bytes, adjusted for compression 77 /// Returns the total size of this surface in bytes, adjusted for compression
77 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { 78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
140 return offset; 141 return offset;
141 } 142 }
142 143
144 std::size_t GetMipmapSingleSize(u32 mip_level) const {
145 return InnerMipmapMemorySize(mip_level, false, is_layered);
146 }
147
143 u32 MipWidth(u32 mip_level) const { 148 u32 MipWidth(u32 mip_level) const {
144 return std::max(1U, width >> mip_level); 149 return std::max(1U, width >> mip_level);
145 } 150 }
146 151
152 u32 MipWidthGobAligned(u32 mip_level) const {
153 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
154 }
155
147 u32 MipHeight(u32 mip_level) const { 156 u32 MipHeight(u32 mip_level) const {
148 return std::max(1U, height >> mip_level); 157 return std::max(1U, height >> mip_level);
149 } 158 }
@@ -168,20 +177,27 @@ struct SurfaceParams {
168 } 177 }
169 178
170 u32 MipBlockDepth(u32 mip_level) const { 179 u32 MipBlockDepth(u32 mip_level) const {
171 if (mip_level == 0) 180 if (mip_level == 0) {
172 return block_depth; 181 return block_depth;
173 if (is_layered) 182 }
183
184 if (is_layered) {
174 return 1; 185 return 1;
175 u32 depth = MipDepth(mip_level); 186 }
187
188 const u32 mip_depth = MipDepth(mip_level);
176 u32 bd = 32; 189 u32 bd = 32;
177 while (bd > 1 && depth * 2 <= bd) { 190 while (bd > 1 && mip_depth * 2 <= bd) {
178 bd >>= 1; 191 bd >>= 1;
179 } 192 }
193
180 if (bd == 32) { 194 if (bd == 32) {
181 u32 bh = MipBlockHeight(mip_level); 195 const u32 bh = MipBlockHeight(mip_level);
182 if (bh >= 4) 196 if (bh >= 4) {
183 return 16; 197 return 16;
198 }
184 } 199 }
200
185 return bd; 201 return bd;
186 } 202 }
187 203
@@ -272,6 +288,7 @@ struct SurfaceParams {
272 u32 height; 288 u32 height;
273 u32 depth; 289 u32 depth;
274 u32 unaligned_height; 290 u32 unaligned_height;
291 u32 pitch;
275 SurfaceTarget target; 292 SurfaceTarget target;
276 SurfaceClass identity; 293 SurfaceClass identity;
277 u32 max_mip_level; 294 u32 max_mip_level;
@@ -279,7 +296,7 @@ struct SurfaceParams {
279 bool is_array; 296 bool is_array;
280 bool srgb_conversion; 297 bool srgb_conversion;
281 // Parameters used for caching 298 // Parameters used for caching
282 VAddr addr; 299 u8* host_ptr;
283 Tegra::GPUVAddr gpu_addr; 300 Tegra::GPUVAddr gpu_addr;
284 std::size_t size_in_bytes; 301 std::size_t size_in_bytes;
285 std::size_t size_in_bytes_gl; 302 std::size_t size_in_bytes_gl;
@@ -328,16 +345,20 @@ class RasterizerOpenGL;
328 345
329class CachedSurface final : public RasterizerCacheObject { 346class CachedSurface final : public RasterizerCacheObject {
330public: 347public:
331 CachedSurface(const SurfaceParams& params); 348 explicit CachedSurface(const SurfaceParams& params);
332 349
333 VAddr GetAddr() const override { 350 VAddr GetCpuAddr() const override {
334 return params.addr; 351 return cpu_addr;
335 } 352 }
336 353
337 std::size_t GetSizeInBytes() const override { 354 std::size_t GetSizeInBytes() const override {
338 return cached_size_in_bytes; 355 return cached_size_in_bytes;
339 } 356 }
340 357
358 std::size_t GetMemorySize() const {
359 return memory_size;
360 }
361
341 void Flush() override { 362 void Flush() override {
342 FlushGLBuffer(); 363 FlushGLBuffer();
343 } 364 }
@@ -346,31 +367,19 @@ public:
346 return texture; 367 return texture;
347 } 368 }
348 369
349 const OGLTexture& TextureLayer() { 370 const OGLTexture& Texture(bool as_array) {
350 if (params.is_array) { 371 if (params.is_array == as_array) {
351 return Texture(); 372 return texture;
373 } else {
374 EnsureTextureDiscrepantView();
375 return discrepant_view;
352 } 376 }
353 EnsureTextureView();
354 return texture_view;
355 } 377 }
356 378
357 GLenum Target() const { 379 GLenum Target() const {
358 return gl_target; 380 return gl_target;
359 } 381 }
360 382
361 GLenum TargetLayer() const {
362 using VideoCore::Surface::SurfaceTarget;
363 switch (params.target) {
364 case SurfaceTarget::Texture1D:
365 return GL_TEXTURE_1D_ARRAY;
366 case SurfaceTarget::Texture2D:
367 return GL_TEXTURE_2D_ARRAY;
368 case SurfaceTarget::TextureCubemap:
369 return GL_TEXTURE_CUBE_MAP_ARRAY;
370 }
371 return Target();
372 }
373
374 const SurfaceParams& GetSurfaceParams() const { 383 const SurfaceParams& GetSurfaceParams() const {
375 return params; 384 return params;
376 } 385 }
@@ -382,19 +391,48 @@ public:
382 // Upload data in gl_buffer to this surface's texture 391 // Upload data in gl_buffer to this surface's texture
383 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); 392 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
384 393
394 void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
395 Tegra::Texture::SwizzleSource swizzle_y,
396 Tegra::Texture::SwizzleSource swizzle_z,
397 Tegra::Texture::SwizzleSource swizzle_w);
398
399 void MarkReinterpreted() {
400 reinterpreted = true;
401 }
402
403 bool IsReinterpreted() const {
404 return reinterpreted;
405 }
406
407 void MarkForReload(bool reload) {
408 must_reload = reload;
409 }
410
411 bool MustReload() const {
412 return must_reload;
413 }
414
415 bool IsUploaded() const {
416 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
417 }
418
385private: 419private:
386 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 420 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
387 421
388 void EnsureTextureView(); 422 void EnsureTextureDiscrepantView();
389 423
390 OGLTexture texture; 424 OGLTexture texture;
391 OGLTexture texture_view; 425 OGLTexture discrepant_view;
392 std::vector<std::vector<u8>> gl_buffer; 426 std::vector<std::vector<u8>> gl_buffer;
393 SurfaceParams params{}; 427 SurfaceParams params{};
394 GLenum gl_target{}; 428 GLenum gl_target{};
395 GLenum gl_internal_format{}; 429 GLenum gl_internal_format{};
396 bool gl_is_compressed{};
397 std::size_t cached_size_in_bytes{}; 430 std::size_t cached_size_in_bytes{};
431 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
432 std::size_t memory_size;
433 bool reinterpreted = false;
434 bool must_reload = false;
435 VAddr cpu_addr{};
398}; 436};
399 437
400class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 438class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -412,11 +450,16 @@ public:
412 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); 450 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
413 451
414 /// Tries to find a framebuffer using on the provided CPU address 452 /// Tries to find a framebuffer using on the provided CPU address
415 Surface TryFindFramebufferSurface(VAddr addr) const; 453 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
416 454
417 /// Copies the contents of one surface to another 455 /// Copies the contents of one surface to another
418 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 456 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
419 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); 457 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
458 const Common::Rectangle<u32>& src_rect,
459 const Common::Rectangle<u32>& dst_rect);
460
461 void SignalPreDrawCall();
462 void SignalPostDrawCall();
420 463
421private: 464private:
422 void LoadSurface(const Surface& surface); 465 void LoadSurface(const Surface& surface);
@@ -434,9 +477,17 @@ private:
434 /// Tries to get a reserved surface for the specified parameters 477 /// Tries to get a reserved surface for the specified parameters
435 Surface TryGetReservedSurface(const SurfaceParams& params); 478 Surface TryGetReservedSurface(const SurfaceParams& params);
436 479
480 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
481 // returns true if the reinterpret was successful, false in case it was not.
482 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
483
437 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 484 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
438 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 485 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
439 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 486 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
487 void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
488 void CopySurface(const Surface& src_surface, const Surface& dst_surface,
489 const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
490 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
440 491
441 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 492 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
442 /// previously been used. This is to prevent surfaces from being constantly created and 493 /// previously been used. This is to prevent surfaces from being constantly created and
@@ -446,12 +497,50 @@ private:
446 OGLFramebuffer read_framebuffer; 497 OGLFramebuffer read_framebuffer;
447 OGLFramebuffer draw_framebuffer; 498 OGLFramebuffer draw_framebuffer;
448 499
500 bool texception = false;
501
449 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 502 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
450 /// using the new format. 503 /// using the new format.
451 OGLBuffer copy_pbo; 504 OGLBuffer copy_pbo;
452 505
453 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 506 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
507 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
454 Surface last_depth_buffer; 508 Surface last_depth_buffer;
509
510 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
511 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
512
513 static auto GetReinterpretInterval(const Surface& object) {
514 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
515 object->GetCacheAddr() + object->GetMemorySize() - 1);
516 }
517
518 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
519 SurfaceIntervalCache reinterpreted_surfaces;
520
521 void RegisterReinterpretSurface(Surface reinterpret_surface) {
522 auto interval = GetReinterpretInterval(reinterpret_surface);
523 reinterpreted_surfaces.insert({interval, reinterpret_surface});
524 reinterpret_surface->MarkReinterpreted();
525 }
526
527 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
528 const SurfaceInterval interval{addr};
529 for (auto& pair :
530 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
531 return pair.second;
532 }
533 return nullptr;
534 }
535
536 /// Unregisters an object from the cache
537 void UnregisterSurface(const Surface& object) {
538 if (object->IsReinterpreted()) {
539 auto interval = GetReinterpretInterval(object);
540 reinterpreted_surfaces.erase(interval);
541 }
542 Unregister(object);
543 }
455}; 544};
456 545
457} // namespace OpenGL 546} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 1da744158..bfe666a73 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
15 15
16namespace OpenGL { 16namespace OpenGL {
17 17
18void OGLTexture::Create() { 18void OGLTexture::Create(GLenum target) {
19 if (handle != 0) 19 if (handle != 0)
20 return; 20 return;
21 21
22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
23 glGenTextures(1, &handle); 23 glCreateTextures(target, 1, &handle);
24} 24}
25 25
26void OGLTexture::Release() { 26void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
71} 71}
72 72
73void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, 73void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
74 const char* frag_shader, bool separable_program) { 74 const char* frag_shader, bool separable_program,
75 bool hint_retrievable) {
75 OGLShader vert, geo, frag; 76 OGLShader vert, geo, frag;
76 if (vert_shader) 77 if (vert_shader)
77 vert.Create(vert_shader, GL_VERTEX_SHADER); 78 vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
81 frag.Create(frag_shader, GL_FRAGMENT_SHADER); 82 frag.Create(frag_shader, GL_FRAGMENT_SHADER);
82 83
83 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 84 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
84 Create(separable_program, vert.handle, geo.handle, frag.handle); 85 Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
85} 86}
86 87
87void OGLProgram::Release() { 88void OGLProgram::Release() {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index e33f1e973..fbb93ee49 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -28,7 +28,7 @@ public:
28 } 28 }
29 29
30 /// Creates a new internal OpenGL resource and stores the handle 30 /// Creates a new internal OpenGL resource and stores the handle
31 void Create(); 31 void Create(GLenum target);
32 32
33 /// Deletes the internal OpenGL resource 33 /// Deletes the internal OpenGL resource
34 void Release(); 34 void Release();
@@ -101,15 +101,15 @@ public:
101 } 101 }
102 102
103 template <typename... T> 103 template <typename... T>
104 void Create(bool separable_program, T... shaders) { 104 void Create(bool separable_program, bool hint_retrievable, T... shaders) {
105 if (handle != 0) 105 if (handle != 0)
106 return; 106 return;
107 handle = GLShader::LoadProgram(separable_program, shaders...); 107 handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
108 } 108 }
109 109
110 /// Creates a new internal OpenGL resource and stores the handle 110 /// Creates a new internal OpenGL resource and stores the handle
111 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, 111 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
112 bool separable_program = false); 112 bool separable_program = false, bool hint_retrievable = false);
113 113
114 /// Deletes the internal OpenGL resource 114 /// Deletes the internal OpenGL resource
115 void Release(); 115 void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 90eda7814..1ed740877 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -11,6 +11,7 @@
11#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_cache.h" 12#include "video_core/renderer_opengl/gl_shader_cache.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h" 13#include "video_core/renderer_opengl/gl_shader_decompiler.h"
14#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
14#include "video_core/renderer_opengl/gl_shader_manager.h" 15#include "video_core/renderer_opengl/gl_shader_manager.h"
15#include "video_core/renderer_opengl/utils.h" 16#include "video_core/renderer_opengl/utils.h"
16#include "video_core/shader/shader_ir.h" 17#include "video_core/shader/shader_ir.h"
@@ -19,18 +20,28 @@ namespace OpenGL {
19 20
20using VideoCommon::Shader::ProgramCode; 21using VideoCommon::Shader::ProgramCode;
21 22
23// One UBO is always reserved for emulation values
24constexpr u32 RESERVED_UBOS = 1;
25
26struct UnspecializedShader {
27 std::string code;
28 GLShader::ShaderEntries entries;
29 Maxwell::ShaderProgram program_type;
30};
31
32namespace {
33
22/// Gets the address for the specified shader stage program 34/// Gets the address for the specified shader stage program
23static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 35Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
24 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 36 const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
25 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; 37 const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
26 return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + 38 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
27 shader_config.offset);
28} 39}
29 40
30/// Gets the shader program code from memory for the specified address 41/// Gets the shader program code from memory for the specified address
31static ProgramCode GetShaderCode(VAddr addr) { 42ProgramCode GetShaderCode(const u8* host_ptr) {
32 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); 43 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
33 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 44 std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
34 return program_code; 45 return program_code;
35} 46}
36 47
@@ -49,38 +60,198 @@ constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
49 } 60 }
50} 61}
51 62
52CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) 63/// Gets if the current instruction offset is a scheduler instruction
53 : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { 64constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
65 // Sched instructions appear once every 4 instructions.
66 constexpr std::size_t SchedPeriod = 4;
67 const std::size_t absolute_offset = offset - main_offset;
68 return (absolute_offset % SchedPeriod) == 0;
69}
54 70
55 GLShader::ProgramResult program_result; 71/// Describes primitive behavior on geometry shaders
72constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
73 switch (primitive_mode) {
74 case GL_POINTS:
75 return {"points", "Points", 1};
76 case GL_LINES:
77 case GL_LINE_STRIP:
78 return {"lines", "Lines", 2};
79 case GL_LINES_ADJACENCY:
80 case GL_LINE_STRIP_ADJACENCY:
81 return {"lines_adjacency", "LinesAdj", 4};
82 case GL_TRIANGLES:
83 case GL_TRIANGLE_STRIP:
84 case GL_TRIANGLE_FAN:
85 return {"triangles", "Triangles", 3};
86 case GL_TRIANGLES_ADJACENCY:
87 case GL_TRIANGLE_STRIP_ADJACENCY:
88 return {"triangles_adjacency", "TrianglesAdj", 6};
89 default:
90 return {"points", "Invalid", 1};
91 }
92}
56 93
57 switch (program_type) { 94/// Calculates the size of a program stream
58 case Maxwell::ShaderProgram::VertexA: 95std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
96 constexpr std::size_t start_offset = 10;
97 std::size_t offset = start_offset;
98 std::size_t size = start_offset * sizeof(u64);
99 while (offset < program.size()) {
100 const u64 instruction = program[offset];
101 if (!IsSchedInstruction(offset, start_offset)) {
102 if (instruction == 0 || (instruction >> 52) == 0x50b) {
103 // End on Maxwell's "nop" instruction
104 break;
105 }
106 }
107 size += sizeof(u64);
108 offset++;
109 }
110 // The last instruction is included in the program size
111 return std::min(size + sizeof(u64), program.size() * sizeof(u64));
112}
113
114/// Hashes one (or two) program streams
115u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
116 const ProgramCode& code_b) {
117 u64 unique_identifier =
118 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
119 if (program_type != Maxwell::ShaderProgram::VertexA) {
120 return unique_identifier;
121 }
122 // VertexA programs include two programs
123
124 std::size_t seed = 0;
125 boost::hash_combine(seed, unique_identifier);
126
127 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
128 CalculateProgramSize(code_b));
129 boost::hash_combine(seed, identifier_b);
130 return static_cast<u64>(seed);
131}
132
133/// Creates an unspecialized program from code streams
134GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
135 ProgramCode program_code_b) {
136 GLShader::ShaderSetup setup(program_code);
137 if (program_type == Maxwell::ShaderProgram::VertexA) {
59 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 138 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
60 // Conventional HW does not support this, so we combine VertexA and VertexB into one 139 // Conventional HW does not support this, so we combine VertexA and VertexB into one
61 // stage here. 140 // stage here.
62 setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); 141 setup.SetProgramB(program_code_b);
142 }
143 setup.program.unique_identifier =
144 GetUniqueIdentifier(program_type, program_code, program_code_b);
145
146 switch (program_type) {
147 case Maxwell::ShaderProgram::VertexA:
63 case Maxwell::ShaderProgram::VertexB: 148 case Maxwell::ShaderProgram::VertexB:
64 CalculateProperties(); 149 return GLShader::GenerateVertexShader(setup);
65 program_result = GLShader::GenerateVertexShader(setup);
66 break;
67 case Maxwell::ShaderProgram::Geometry: 150 case Maxwell::ShaderProgram::Geometry:
68 CalculateProperties(); 151 return GLShader::GenerateGeometryShader(setup);
69 program_result = GLShader::GenerateGeometryShader(setup);
70 break;
71 case Maxwell::ShaderProgram::Fragment: 152 case Maxwell::ShaderProgram::Fragment:
72 CalculateProperties(); 153 return GLShader::GenerateFragmentShader(setup);
73 program_result = GLShader::GenerateFragmentShader(setup);
74 break;
75 default: 154 default:
76 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 155 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
77 UNREACHABLE(); 156 UNREACHABLE();
157 return {};
158 }
159}
160
161CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
162 Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
163 GLenum primitive_mode, bool hint_retrievable = false) {
164 std::string source = "#version 430 core\n";
165 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
166
167 for (const auto& cbuf : entries.const_buffers) {
168 source +=
169 fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
170 }
171 for (const auto& gmem : entries.global_memory_entries) {
172 source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
173 gmem.GetCbufOffset(), base_bindings.gmem++);
174 }
175 for (const auto& sampler : entries.samplers) {
176 source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
177 base_bindings.sampler++);
178 }
179
180 if (program_type == Maxwell::ShaderProgram::Geometry) {
181 const auto [glsl_topology, debug_name, max_vertices] =
182 GetPrimitiveDescription(primitive_mode);
183
184 source += "layout (" + std::string(glsl_topology) + ") in;\n";
185 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
186 }
187
188 source += code;
189
190 OGLShader shader;
191 shader.Create(source.c_str(), GetShaderType(program_type));
192
193 auto program = std::make_shared<OGLProgram>();
194 program->Create(true, hint_retrievable, shader.handle);
195 return program;
196}
197
198std::set<GLenum> GetSupportedFormats() {
199 std::set<GLenum> supported_formats;
200
201 GLint num_formats{};
202 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
203
204 std::vector<GLint> formats(num_formats);
205 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
206
207 for (const GLint format : formats)
208 supported_formats.insert(static_cast<GLenum>(format));
209 return supported_formats;
210}
211
212} // namespace
213
214CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
215 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
216 const PrecompiledPrograms& precompiled_programs,
217 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
218 : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
219 program_type{program_type}, disk_cache{disk_cache},
220 precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
221
222 const std::size_t code_size = CalculateProgramSize(program_code);
223 const std::size_t code_size_b =
224 program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
225
226 GLShader::ProgramResult program_result =
227 CreateProgram(program_type, program_code, program_code_b);
228 if (program_result.first.empty()) {
229 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
78 return; 230 return;
79 } 231 }
80 232
81 code = program_result.first; 233 code = program_result.first;
82 entries = program_result.second; 234 entries = program_result.second;
83 shader_length = entries.shader_length; 235 shader_length = entries.shader_length;
236
237 const ShaderDiskCacheRaw raw(unique_identifier, program_type,
238 static_cast<u32>(code_size / sizeof(u64)),
239 static_cast<u32>(code_size_b / sizeof(u64)),
240 std::move(program_code), std::move(program_code_b));
241 disk_cache.SaveRaw(raw);
242}
243
244CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
245 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
246 const PrecompiledPrograms& precompiled_programs,
247 GLShader::ProgramResult result, u8* host_ptr)
248 : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
249 disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
250 host_ptr} {
251
252 code = std::move(result.first);
253 entries = result.second;
254 shader_length = entries.shader_length;
84} 255}
85 256
86std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, 257std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
@@ -92,150 +263,255 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
92 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); 263 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
93 auto& program = entry->second; 264 auto& program = entry->second;
94 if (is_cache_miss) { 265 if (is_cache_miss) {
95 std::string source = AllocateBindings(base_bindings); 266 program = TryLoadProgram(primitive_mode, base_bindings);
96 source += code; 267 if (!program) {
268 program =
269 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
270 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
271 }
97 272
98 OGLShader shader; 273 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
99 shader.Create(source.c_str(), GetShaderType(program_type));
100 program.Create(true, shader.handle);
101 LabelGLObject(GL_PROGRAM, program.handle, addr);
102 } 274 }
103 275
104 handle = program.handle; 276 handle = program->handle;
105 } 277 }
106 278
107 // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for 279 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
108 // emulation values
109 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
110 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 280 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
111 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 281 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
112 282
113 return {handle, base_bindings}; 283 return {handle, base_bindings};
114} 284}
115 285
116std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
117 std::string code = "#version 430 core\n";
118 code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
119
120 for (const auto& cbuf : entries.const_buffers) {
121 code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
122 }
123
124 for (const auto& gmem : entries.global_memory_entries) {
125 code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
126 gmem.GetCbufOffset(), base_bindings.gmem++);
127 }
128
129 for (const auto& sampler : entries.samplers) {
130 code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
131 base_bindings.sampler++);
132 }
133
134 return code;
135}
136
137GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { 286GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
138 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); 287 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
139 auto& programs = entry->second; 288 auto& programs = entry->second;
140 289
141 switch (primitive_mode) { 290 switch (primitive_mode) {
142 case GL_POINTS: 291 case GL_POINTS:
143 return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); 292 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
144 case GL_LINES: 293 case GL_LINES:
145 case GL_LINE_STRIP: 294 case GL_LINE_STRIP:
146 return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines"); 295 return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
147 case GL_LINES_ADJACENCY: 296 case GL_LINES_ADJACENCY:
148 case GL_LINE_STRIP_ADJACENCY: 297 case GL_LINE_STRIP_ADJACENCY:
149 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4, 298 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
150 "ShaderLinesAdjacency");
151 case GL_TRIANGLES: 299 case GL_TRIANGLES:
152 case GL_TRIANGLE_STRIP: 300 case GL_TRIANGLE_STRIP:
153 case GL_TRIANGLE_FAN: 301 case GL_TRIANGLE_FAN:
154 return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3, 302 return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
155 "ShaderTriangles");
156 case GL_TRIANGLES_ADJACENCY: 303 case GL_TRIANGLES_ADJACENCY:
157 case GL_TRIANGLE_STRIP_ADJACENCY: 304 case GL_TRIANGLE_STRIP_ADJACENCY:
158 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, 305 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
159 "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
160 default: 306 default:
161 UNREACHABLE_MSG("Unknown primitive mode."); 307 UNREACHABLE_MSG("Unknown primitive mode.");
162 return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); 308 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
163 } 309 }
164} 310}
165 311
166GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, 312GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
167 const std::string& glsl_topology, u32 max_vertices, 313 GLenum primitive_mode) {
168 const std::string& debug_name) { 314 if (target_program) {
169 if (target_program.handle != 0) { 315 return target_program->handle;
170 return target_program.handle; 316 }
317 const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
318 target_program = TryLoadProgram(primitive_mode, base_bindings);
319 if (!target_program) {
320 target_program =
321 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
322 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
171 } 323 }
172 std::string source = AllocateBindings(base_bindings);
173 source += "layout (" + glsl_topology + ") in;\n";
174 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
175 source += code;
176 324
177 OGLShader shader; 325 LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
178 shader.Create(source.c_str(), GL_GEOMETRY_SHADER); 326
179 target_program.Create(true, shader.handle); 327 return target_program->handle;
180 LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
181 return target_program.handle;
182}; 328};
183 329
184static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { 330CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
185 // sched instructions appear once every 4 instructions. 331 BaseBindings base_bindings) const {
186 static constexpr std::size_t SchedPeriod = 4; 332 const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
187 const std::size_t absolute_offset = offset - main_offset; 333 if (found == precompiled_programs.end()) {
188 return (absolute_offset % SchedPeriod) == 0; 334 return {};
335 }
336 return found->second;
189} 337}
190 338
191static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 339ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
192 constexpr std::size_t start_offset = 10; 340 BaseBindings base_bindings) const {
193 std::size_t offset = start_offset; 341 return {unique_identifier, base_bindings, primitive_mode};
194 std::size_t size = start_offset * sizeof(u64); 342}
195 while (offset < program.size()) { 343
196 const u64 inst = program[offset]; 344ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
197 if (!IsSchedInstruction(offset, start_offset)) { 345 : RasterizerCache{rasterizer}, disk_cache{system} {}
198 if (inst == 0 || (inst >> 52) == 0x50b) { 346
199 break; 347void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
348 const VideoCore::DiskResourceLoadCallback& callback) {
349 const auto transferable = disk_cache.LoadTransferable();
350 if (!transferable) {
351 return;
352 }
353 const auto [raws, usages] = *transferable;
354
355 auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
356
357 const auto supported_formats{GetSupportedFormats()};
358 const auto unspecialized{
359 GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
360 if (stop_loading)
361 return;
362
363 // Build shaders
364 if (callback)
365 callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
366 for (std::size_t i = 0; i < usages.size(); ++i) {
367 if (stop_loading)
368 return;
369
370 const auto& usage{usages[i]};
371 LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
372 i + 1, usages.size());
373
374 const auto& unspec{unspecialized.at(usage.unique_identifier)};
375 const auto dump_it = dumps.find(usage);
376
377 CachedProgram shader;
378 if (dump_it != dumps.end()) {
379 // If the shader is dumped, attempt to load it with
380 shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
381 if (!shader) {
382 // Invalidate the precompiled cache if a shader dumped shader was rejected
383 disk_cache.InvalidatePrecompiled();
384 dumps.clear();
200 } 385 }
201 } 386 }
202 size += sizeof(inst); 387 if (!shader) {
203 offset++; 388 shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
389 usage.bindings, usage.primitive, true);
390 }
391 precompiled_programs.insert({usage, std::move(shader)});
392
393 if (callback)
394 callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
395 }
396
397 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
398 // precompiling them
399
400 for (std::size_t i = 0; i < usages.size(); ++i) {
401 const auto& usage{usages[i]};
402 if (dumps.find(usage) == dumps.end()) {
403 const auto& program = precompiled_programs.at(usage);
404 disk_cache.SaveDump(usage, program->handle);
405 }
204 } 406 }
205 return size;
206} 407}
207 408
208void CachedShader::CalculateProperties() { 409CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
209 setup.program.real_size = CalculateProgramSize(setup.program.code); 410 const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
210 setup.program.real_size_b = 0; 411
211 setup.program.unique_identifier = Common::CityHash64( 412 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
212 reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size); 413 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
213 if (program_type == Maxwell::ShaderProgram::VertexA) { 414 return {};
214 std::size_t seed = 0;
215 boost::hash_combine(seed, setup.program.unique_identifier);
216 setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
217 const u64 identifier_b = Common::CityHash64(
218 reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
219 boost::hash_combine(seed, identifier_b);
220 setup.program.unique_identifier = static_cast<u64>(seed);
221 } 415 }
416
417 CachedProgram shader = std::make_shared<OGLProgram>();
418 shader->handle = glCreateProgram();
419 glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
420 glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
421 static_cast<GLsizei>(dump.binary.size()));
422
423 GLint link_status{};
424 glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
425 if (link_status == GL_FALSE) {
426 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
427 return {};
428 }
429
430 return shader;
222} 431}
223 432
224ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} 433std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
434 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
435 const std::vector<ShaderDiskCacheRaw>& raws,
436 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
437 std::unordered_map<u64, UnspecializedShader> unspecialized;
438
439 if (callback)
440 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
441
442 for (std::size_t i = 0; i < raws.size(); ++i) {
443 if (stop_loading)
444 return {};
445
446 const auto& raw{raws[i]};
447 const u64 unique_identifier = raw.GetUniqueIdentifier();
448 const u64 calculated_hash =
449 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
450 if (unique_identifier != calculated_hash) {
451 LOG_ERROR(
452 Render_OpenGL,
453 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
454 raw.GetUniqueIdentifier(), calculated_hash);
455 disk_cache.InvalidateTransferable();
456 return {};
457 }
458
459 GLShader::ProgramResult result;
460 if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
461 // If it's stored in the precompiled file, avoid decompiling it here
462 const auto& stored_decompiled{it->second};
463 result = {stored_decompiled.code, stored_decompiled.entries};
464 } else {
465 // Otherwise decompile the shader at boot and save the result to the decompiled file
466 result =
467 CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
468 disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
469 }
470
471 precompiled_shaders.insert({unique_identifier, result});
472
473 unspecialized.insert(
474 {raw.GetUniqueIdentifier(),
475 {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
476
477 if (callback)
478 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
479 }
480 return unspecialized;
481}
225 482
226Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 483Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
227 if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { 484 if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
228 return last_shaders[static_cast<u32>(program)]; 485 return last_shaders[static_cast<u32>(program)];
229 } 486 }
230 487
231 const VAddr program_addr{GetShaderAddress(program)}; 488 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
489 const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
232 490
233 // Look up shader in the cache based on address 491 // Look up shader in the cache based on address
234 Shader shader{TryGet(program_addr)}; 492 const auto& host_ptr{memory_manager.GetPointer(program_addr)};
493 Shader shader{TryGet(host_ptr)};
235 494
236 if (!shader) { 495 if (!shader) {
237 // No shader found - create a new one 496 // No shader found - create a new one
238 shader = std::make_shared<CachedShader>(program_addr, program); 497 ProgramCode program_code{GetShaderCode(host_ptr)};
498 ProgramCode program_code_b;
499 if (program == Maxwell::ShaderProgram::VertexA) {
500 program_code_b = GetShaderCode(
501 memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
502 }
503 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
504 const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
505 const auto found = precompiled_shaders.find(unique_identifier);
506 if (found != precompiled_shaders.end()) {
507 shader =
508 std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
509 precompiled_programs, found->second, host_ptr);
510 } else {
511 shader = std::make_shared<CachedShader>(
512 cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
513 std::move(program_code), std::move(program_code_b), host_ptr);
514 }
239 Register(shader); 515 Register(shader);
240 } 516 }
241 517
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 904d15dd0..fd1c85115 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,43 +5,52 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <map>
9#include <memory> 8#include <memory>
9#include <set>
10#include <tuple> 10#include <tuple>
11#include <unordered_map>
11 12
12#include <glad/glad.h> 13#include <glad/glad.h>
13 14
14#include "common/assert.h" 15#include "common/assert.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "video_core/rasterizer_cache.h" 17#include "video_core/rasterizer_cache.h"
18#include "video_core/renderer_base.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
18#include "video_core/renderer_opengl/gl_shader_decompiler.h" 20#include "video_core/renderer_opengl/gl_shader_decompiler.h"
21#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
19#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
20 23
24namespace Core {
25class System;
26} // namespace Core
27
21namespace OpenGL { 28namespace OpenGL {
22 29
23class CachedShader; 30class CachedShader;
24class RasterizerOpenGL; 31class RasterizerOpenGL;
32struct UnspecializedShader;
25 33
26using Shader = std::shared_ptr<CachedShader>; 34using Shader = std::shared_ptr<CachedShader>;
35using CachedProgram = std::shared_ptr<OGLProgram>;
27using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36using Maxwell = Tegra::Engines::Maxwell3D::Regs;
28 37using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
29struct BaseBindings { 38using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
30 u32 cbuf{};
31 u32 gmem{};
32 u32 sampler{};
33
34 bool operator<(const BaseBindings& rhs) const {
35 return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
36 }
37};
38 39
39class CachedShader final : public RasterizerCacheObject { 40class CachedShader final : public RasterizerCacheObject {
40public: 41public:
41 CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); 42 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
42 43 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
43 VAddr GetAddr() const override { 44 const PrecompiledPrograms& precompiled_programs,
44 return addr; 45 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
46
47 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
48 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
49 const PrecompiledPrograms& precompiled_programs,
50 GLShader::ProgramResult result, u8* host_ptr);
51
52 VAddr GetCpuAddr() const override {
53 return cpu_addr;
45 } 54 }
46 55
47 std::size_t GetSizeInBytes() const override { 56 std::size_t GetSizeInBytes() const override {
@@ -65,49 +74,68 @@ private:
65 // declared by the hardware. Workaround this issue by generating a different shader per input 74 // declared by the hardware. Workaround this issue by generating a different shader per input
66 // topology class. 75 // topology class.
67 struct GeometryPrograms { 76 struct GeometryPrograms {
68 OGLProgram points; 77 CachedProgram points;
69 OGLProgram lines; 78 CachedProgram lines;
70 OGLProgram lines_adjacency; 79 CachedProgram lines_adjacency;
71 OGLProgram triangles; 80 CachedProgram triangles;
72 OGLProgram triangles_adjacency; 81 CachedProgram triangles_adjacency;
73 }; 82 };
74 83
75 std::string AllocateBindings(BaseBindings base_bindings);
76
77 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); 84 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
78 85
79 /// Generates a geometry shader or returns one that already exists. 86 /// Generates a geometry shader or returns one that already exists.
80 GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, 87 GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
81 const std::string& glsl_topology, u32 max_vertices, 88 GLenum primitive_mode);
82 const std::string& debug_name);
83 89
84 void CalculateProperties(); 90 CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
85 91
86 VAddr addr{}; 92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
87 std::size_t shader_length{}; 93
94 u8* host_ptr{};
95 VAddr cpu_addr{};
96 u64 unique_identifier{};
88 Maxwell::ShaderProgram program_type{}; 97 Maxwell::ShaderProgram program_type{};
89 GLShader::ShaderSetup setup; 98 ShaderDiskCacheOpenGL& disk_cache;
99 const PrecompiledPrograms& precompiled_programs;
100
101 std::size_t shader_length{};
90 GLShader::ShaderEntries entries; 102 GLShader::ShaderEntries entries;
91 103
92 std::string code; 104 std::string code;
93 105
94 std::map<BaseBindings, OGLProgram> programs; 106 std::unordered_map<BaseBindings, CachedProgram> programs;
95 std::map<BaseBindings, GeometryPrograms> geometry_programs; 107 std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
96 108
97 std::map<u32, GLuint> cbuf_resource_cache; 109 std::unordered_map<u32, GLuint> cbuf_resource_cache;
98 std::map<u32, GLuint> gmem_resource_cache; 110 std::unordered_map<u32, GLuint> gmem_resource_cache;
99 std::map<u32, GLint> uniform_cache; 111 std::unordered_map<u32, GLint> uniform_cache;
100}; 112};
101 113
102class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 114class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
103public: 115public:
104 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer); 116 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
117
118 /// Loads disk cache for the current game
119 void LoadDiskCache(const std::atomic_bool& stop_loading,
120 const VideoCore::DiskResourceLoadCallback& callback);
105 121
106 /// Gets the current specified shader stage program 122 /// Gets the current specified shader stage program
107 Shader GetStageProgram(Maxwell::ShaderProgram program); 123 Shader GetStageProgram(Maxwell::ShaderProgram program);
108 124
109private: 125private:
126 std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
127 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
128 const std::vector<ShaderDiskCacheRaw>& raws,
129 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
130
131 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
132 const std::set<GLenum>& supported_formats);
133
110 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 134 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
135
136 ShaderDiskCacheOpenGL disk_cache;
137 PrecompiledShaders precompiled_shaders;
138 PrecompiledPrograms precompiled_programs;
111}; 139};
112 140
113} // namespace OpenGL 141} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 36035d0d2..11d1169f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <array> 5#include <array>
6#include <string> 6#include <string>
7#include <string_view> 7#include <string_view>
8#include <utility>
8#include <variant> 9#include <variant>
10#include <vector>
9 11
10#include <fmt/format.h> 12#include <fmt/format.h>
11 13
@@ -20,6 +22,7 @@
20namespace OpenGL::GLShader { 22namespace OpenGL::GLShader {
21 23
22using Tegra::Shader::Attribute; 24using Tegra::Shader::Attribute;
25using Tegra::Shader::AttributeUse;
23using Tegra::Shader::Header; 26using Tegra::Shader::Header;
24using Tegra::Shader::IpaInterpMode; 27using Tegra::Shader::IpaInterpMode;
25using Tegra::Shader::IpaMode; 28using Tegra::Shader::IpaMode;
@@ -171,7 +174,7 @@ public:
171 code.AddLine(fmt::format("case 0x{:x}u: {{", address)); 174 code.AddLine(fmt::format("case 0x{:x}u: {{", address));
172 ++code.scope; 175 ++code.scope;
173 176
174 VisitBasicBlock(bb); 177 VisitBlock(bb);
175 178
176 --code.scope; 179 --code.scope;
177 code.AddLine('}'); 180 code.AddLine('}');
@@ -193,15 +196,14 @@ public:
193 ShaderEntries GetShaderEntries() const { 196 ShaderEntries GetShaderEntries() const {
194 ShaderEntries entries; 197 ShaderEntries entries;
195 for (const auto& cbuf : ir.GetConstantBuffers()) { 198 for (const auto& cbuf : ir.GetConstantBuffers()) {
196 entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first), 199 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
197 cbuf.first); 200 cbuf.first);
198 } 201 }
199 for (const auto& sampler : ir.GetSamplers()) { 202 for (const auto& sampler : ir.GetSamplers()) {
200 entries.samplers.emplace_back(sampler, stage, GetSampler(sampler)); 203 entries.samplers.emplace_back(sampler);
201 } 204 }
202 for (const auto& gmem : ir.GetGlobalMemoryBases()) { 205 for (const auto& gmem : ir.GetGlobalMemoryBases()) {
203 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage, 206 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
204 GetGlobalMemoryBlock(gmem));
205 } 207 }
206 entries.clip_distances = ir.GetClipDistances(); 208 entries.clip_distances = ir.GetClipDistances();
207 entries.shader_length = ir.GetLength(); 209 entries.shader_length = ir.GetLength();
@@ -289,34 +291,22 @@ private:
289 code.AddNewLine(); 291 code.AddNewLine();
290 } 292 }
291 293
292 std::string GetInputFlags(const IpaMode& input_mode) { 294 std::string GetInputFlags(AttributeUse attribute) {
293 const IpaSampleMode sample_mode = input_mode.sampling_mode;
294 const IpaInterpMode interp_mode = input_mode.interpolation_mode;
295 std::string out; 295 std::string out;
296 296
297 switch (interp_mode) { 297 switch (attribute) {
298 case IpaInterpMode::Flat: 298 case AttributeUse::Constant:
299 out += "flat "; 299 out += "flat ";
300 break; 300 break;
301 case IpaInterpMode::Linear: 301 case AttributeUse::ScreenLinear:
302 out += "noperspective "; 302 out += "noperspective ";
303 break; 303 break;
304 case IpaInterpMode::Perspective: 304 case AttributeUse::Perspective:
305 // Default, Smooth 305 // Default, Smooth
306 break; 306 break;
307 default: 307 default:
308 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); 308 LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
309 } 309 UNREACHABLE();
310 switch (sample_mode) {
311 case IpaSampleMode::Centroid:
312 // It can be implemented with the "centroid " keyword in GLSL
313 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
314 break;
315 case IpaSampleMode::Default:
316 // Default, n/a
317 break;
318 default:
319 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
320 } 310 }
321 return out; 311 return out;
322 } 312 }
@@ -325,16 +315,11 @@ private:
325 const auto& attributes = ir.GetInputAttributes(); 315 const auto& attributes = ir.GetInputAttributes();
326 for (const auto element : attributes) { 316 for (const auto element : attributes) {
327 const Attribute::Index index = element.first; 317 const Attribute::Index index = element.first;
328 const IpaMode& input_mode = *element.second.begin();
329 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { 318 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
330 // Skip when it's not a generic attribute 319 // Skip when it's not a generic attribute
331 continue; 320 continue;
332 } 321 }
333 322
334 ASSERT(element.second.size() > 0);
335 UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
336 "Multiple input flag modes are not supported in GLSL");
337
338 // TODO(bunnei): Use proper number of elements for these 323 // TODO(bunnei): Use proper number of elements for these
339 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); 324 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
340 if (stage != ShaderStage::Vertex) { 325 if (stage != ShaderStage::Vertex) {
@@ -346,8 +331,14 @@ private:
346 if (stage == ShaderStage::Geometry) { 331 if (stage == ShaderStage::Geometry) {
347 attr = "gs_" + attr + "[]"; 332 attr = "gs_" + attr + "[]";
348 } 333 }
349 code.AddLine("layout (location = " + std::to_string(idx) + ") " + 334 std::string suffix;
350 GetInputFlags(input_mode) + "in vec4 " + attr + ';'); 335 if (stage == ShaderStage::Fragment) {
336 const auto input_mode =
337 header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
338 suffix = GetInputFlags(input_mode);
339 }
340 code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
341 attr + ';');
351 } 342 }
352 if (!attributes.empty()) 343 if (!attributes.empty())
353 code.AddNewLine(); 344 code.AddNewLine();
@@ -424,7 +415,7 @@ private:
424 code.AddNewLine(); 415 code.AddNewLine();
425 } 416 }
426 417
427 void VisitBasicBlock(const BasicBlock& bb) { 418 void VisitBlock(const NodeBlock& bb) {
428 for (const Node node : bb) { 419 for (const Node node : bb) {
429 if (const std::string expr = Visit(node); !expr.empty()) { 420 if (const std::string expr = Visit(node); !expr.empty()) {
430 code.AddLine(expr); 421 code.AddLine(expr);
@@ -576,7 +567,7 @@ private:
576 code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {"); 567 code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
577 ++code.scope; 568 ++code.scope;
578 569
579 VisitBasicBlock(conditional->GetCode()); 570 VisitBlock(conditional->GetCode());
580 571
581 --code.scope; 572 --code.scope;
582 code.AddLine('}'); 573 code.AddLine('}');
@@ -617,17 +608,8 @@ private:
617 608
618 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { 609 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
619 std::string value = VisitOperand(operation, operand_index); 610 std::string value = VisitOperand(operation, operand_index);
620
621 switch (type) { 611 switch (type) {
622 case Type::Bool: 612 case Type::HalfFloat: {
623 case Type::Bool2:
624 case Type::Float:
625 return value;
626 case Type::Int:
627 return "ftoi(" + value + ')';
628 case Type::Uint:
629 return "ftou(" + value + ')';
630 case Type::HalfFloat:
631 const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); 613 const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
632 if (!half_meta) { 614 if (!half_meta) {
633 value = "toHalf2(" + value + ')'; 615 value = "toHalf2(" + value + ')';
@@ -644,6 +626,26 @@ private:
644 return "vec2(toHalf2(" + value + ")[1])"; 626 return "vec2(toHalf2(" + value + ")[1])";
645 } 627 }
646 } 628 }
629 default:
630 return CastOperand(value, type);
631 }
632 }
633
634 std::string CastOperand(const std::string& value, Type type) const {
635 switch (type) {
636 case Type::Bool:
637 case Type::Bool2:
638 case Type::Float:
639 return value;
640 case Type::Int:
641 return "ftoi(" + value + ')';
642 case Type::Uint:
643 return "ftou(" + value + ')';
644 case Type::HalfFloat:
645 // Can't be handled as a stand-alone value
646 UNREACHABLE();
647 return value;
648 }
647 UNREACHABLE(); 649 UNREACHABLE();
648 return value; 650 return value;
649 } 651 }
@@ -651,6 +653,7 @@ private:
651 std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { 653 std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
652 switch (type) { 654 switch (type) {
653 case Type::Bool: 655 case Type::Bool:
656 case Type::Bool2:
654 case Type::Float: 657 case Type::Float:
655 if (needs_parenthesis) { 658 if (needs_parenthesis) {
656 return '(' + value + ')'; 659 return '(' + value + ')';
@@ -716,51 +719,68 @@ private:
716 } 719 }
717 720
718 std::string GenerateTexture(Operation operation, const std::string& func, 721 std::string GenerateTexture(Operation operation, const std::string& func,
719 bool is_extra_int = false) { 722 const std::vector<std::pair<Type, Node>>& extras) {
720 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 723 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
721 724
722 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 725 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
723 const auto count = static_cast<u32>(operation.GetOperandsCount());
724 ASSERT(meta); 726 ASSERT(meta);
725 727
728 const std::size_t count = operation.GetOperandsCount();
729 const bool has_array = meta->sampler.IsArray();
730 const bool has_shadow = meta->sampler.IsShadow();
731
726 std::string expr = func; 732 std::string expr = func;
727 expr += '('; 733 expr += '(';
728 expr += GetSampler(meta->sampler); 734 expr += GetSampler(meta->sampler);
729 expr += ", "; 735 expr += ", ";
730 736
731 expr += coord_constructors[meta->coords_count - 1]; 737 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
732 expr += '('; 738 expr += '(';
733 for (u32 i = 0; i < count; ++i) { 739 for (std::size_t i = 0; i < count; ++i) {
734 const bool is_extra = i >= meta->coords_count; 740 expr += Visit(operation[i]);
735 const bool is_array = i == meta->array_index;
736
737 std::string operand = [&]() {
738 if (is_extra && is_extra_int) {
739 if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
740 return std::to_string(static_cast<s32>(immediate->GetValue()));
741 } else {
742 return "ftoi(" + Visit(operation[i]) + ')';
743 }
744 } else {
745 return Visit(operation[i]);
746 }
747 }();
748 if (is_array) {
749 ASSERT(!is_extra);
750 operand = "float(ftoi(" + operand + "))";
751 }
752 741
753 expr += operand; 742 const std::size_t next = i + 1;
743 if (next < count)
744 expr += ", ";
745 }
746 if (has_array) {
747 expr += ", float(ftoi(" + Visit(meta->array) + "))";
748 }
749 if (has_shadow) {
750 expr += ", " + Visit(meta->depth_compare);
751 }
752 expr += ')';
754 753
755 if (i + 1 == meta->coords_count) { 754 for (const auto& extra_pair : extras) {
756 expr += ')'; 755 const auto [type, operand] = extra_pair;
756 if (operand == nullptr) {
757 continue;
758 }
759 expr += ", ";
760
761 switch (type) {
762 case Type::Int:
763 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
764 // Inline the string as an immediate integer in GLSL (some extra arguments are
765 // required to be constant)
766 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
767 } else {
768 expr += "ftoi(" + Visit(operand) + ')';
769 }
770 break;
771 case Type::Float:
772 expr += Visit(operand);
773 break;
774 default: {
775 const auto type_int = static_cast<u32>(type);
776 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
777 expr += '0';
778 break;
757 } 779 }
758 if (i + 1 < count) {
759 expr += ", ";
760 } 780 }
761 } 781 }
762 expr += ')'; 782
763 return expr; 783 return expr + ')';
764 } 784 }
765 785
766 std::string Assign(Operation operation) { 786 std::string Assign(Operation operation) {
@@ -1135,37 +1155,38 @@ private:
1135 Type::HalfFloat); 1155 Type::HalfFloat);
1136 } 1156 }
1137 1157
1138 std::string F4Texture(Operation operation) { 1158 std::string Texture(Operation operation) {
1139 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1159 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1140 ASSERT(meta); 1160 ASSERT(meta);
1141 1161
1142 std::string expr = GenerateTexture(operation, "texture"); 1162 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
1143 if (meta->sampler.IsShadow()) { 1163 if (meta->sampler.IsShadow()) {
1144 expr = "vec4(" + expr + ')'; 1164 expr = "vec4(" + expr + ')';
1145 } 1165 }
1146 return expr + GetSwizzle(meta->element); 1166 return expr + GetSwizzle(meta->element);
1147 } 1167 }
1148 1168
1149 std::string F4TextureLod(Operation operation) { 1169 std::string TextureLod(Operation operation) {
1150 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1170 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1151 ASSERT(meta); 1171 ASSERT(meta);
1152 1172
1153 std::string expr = GenerateTexture(operation, "textureLod"); 1173 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
1154 if (meta->sampler.IsShadow()) { 1174 if (meta->sampler.IsShadow()) {
1155 expr = "vec4(" + expr + ')'; 1175 expr = "vec4(" + expr + ')';
1156 } 1176 }
1157 return expr + GetSwizzle(meta->element); 1177 return expr + GetSwizzle(meta->element);
1158 } 1178 }
1159 1179
1160 std::string F4TextureGather(Operation operation) { 1180 std::string TextureGather(Operation operation) {
1161 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1181 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1162 ASSERT(meta); 1182 ASSERT(meta);
1163 1183
1164 return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + 1184 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1185 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
1165 GetSwizzle(meta->element); 1186 GetSwizzle(meta->element);
1166 } 1187 }
1167 1188
1168 std::string F4TextureQueryDimensions(Operation operation) { 1189 std::string TextureQueryDimensions(Operation operation) {
1169 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1190 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1170 ASSERT(meta); 1191 ASSERT(meta);
1171 1192
@@ -1185,40 +1206,44 @@ private:
1185 return "0"; 1206 return "0";
1186 } 1207 }
1187 1208
1188 std::string F4TextureQueryLod(Operation operation) { 1209 std::string TextureQueryLod(Operation operation) {
1189 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1210 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1190 ASSERT(meta); 1211 ASSERT(meta);
1191 1212
1192 if (meta->element < 2) { 1213 if (meta->element < 2) {
1193 return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + 1214 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
1194 GetSwizzle(meta->element) + "))"; 1215 " * vec2(256))" + GetSwizzle(meta->element) + "))";
1195 } 1216 }
1196 return "0"; 1217 return "0";
1197 } 1218 }
1198 1219
1199 std::string F4TexelFetch(Operation operation) { 1220 std::string TexelFetch(Operation operation) {
1200 constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; 1221 constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
1201 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1222 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1202 const auto count = static_cast<u32>(operation.GetOperandsCount());
1203 ASSERT(meta); 1223 ASSERT(meta);
1224 UNIMPLEMENTED_IF(meta->sampler.IsArray());
1225 const std::size_t count = operation.GetOperandsCount();
1204 1226
1205 std::string expr = "texelFetch("; 1227 std::string expr = "texelFetch(";
1206 expr += GetSampler(meta->sampler); 1228 expr += GetSampler(meta->sampler);
1207 expr += ", "; 1229 expr += ", ";
1208 1230
1209 expr += constructors[meta->coords_count - 1]; 1231 expr += constructors.at(operation.GetOperandsCount() - 1);
1210 expr += '('; 1232 expr += '(';
1211 for (u32 i = 0; i < count; ++i) { 1233 for (std::size_t i = 0; i < count; ++i) {
1212 expr += VisitOperand(operation, i, Type::Int); 1234 expr += VisitOperand(operation, i, Type::Int);
1213 1235 const std::size_t next = i + 1;
1214 if (i + 1 == meta->coords_count) { 1236 if (next == count)
1215 expr += ')'; 1237 expr += ')';
1216 } 1238 else if (next < count)
1217 if (i + 1 < count) {
1218 expr += ", "; 1239 expr += ", ";
1219 } 1240 }
1241 if (meta->lod) {
1242 expr += ", ";
1243 expr += CastOperand(Visit(meta->lod), Type::Int);
1220 } 1244 }
1221 expr += ')'; 1245 expr += ')';
1246
1222 return expr + GetSwizzle(meta->element); 1247 return expr + GetSwizzle(meta->element);
1223 } 1248 }
1224 1249
@@ -1455,12 +1480,12 @@ private:
1455 &GLSLDecompiler::Logical2HNotEqual, 1480 &GLSLDecompiler::Logical2HNotEqual,
1456 &GLSLDecompiler::Logical2HGreaterEqual, 1481 &GLSLDecompiler::Logical2HGreaterEqual,
1457 1482
1458 &GLSLDecompiler::F4Texture, 1483 &GLSLDecompiler::Texture,
1459 &GLSLDecompiler::F4TextureLod, 1484 &GLSLDecompiler::TextureLod,
1460 &GLSLDecompiler::F4TextureGather, 1485 &GLSLDecompiler::TextureGather,
1461 &GLSLDecompiler::F4TextureQueryDimensions, 1486 &GLSLDecompiler::TextureQueryDimensions,
1462 &GLSLDecompiler::F4TextureQueryLod, 1487 &GLSLDecompiler::TextureQueryLod,
1463 &GLSLDecompiler::F4TexelFetch, 1488 &GLSLDecompiler::TexelFetch,
1464 1489
1465 &GLSLDecompiler::Branch, 1490 &GLSLDecompiler::Branch,
1466 &GLSLDecompiler::PushFlowStack, 1491 &GLSLDecompiler::PushFlowStack,
@@ -1563,4 +1588,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
1563 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 1588 return {decompiler.GetResult(), decompiler.GetShaderEntries()};
1564} 1589}
1565 1590
1566} // namespace OpenGL::GLShader \ No newline at end of file 1591} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 0856a1361..72aca4938 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <set>
8#include <string> 9#include <string>
9#include <utility> 10#include <utility>
10#include <vector> 11#include <vector>
@@ -18,56 +19,29 @@ class ShaderIR;
18 19
19namespace OpenGL::GLShader { 20namespace OpenGL::GLShader {
20 21
22struct ShaderEntries;
23
21using Maxwell = Tegra::Engines::Maxwell3D::Regs; 24using Maxwell = Tegra::Engines::Maxwell3D::Regs;
25using ProgramResult = std::pair<std::string, ShaderEntries>;
26using SamplerEntry = VideoCommon::Shader::Sampler;
22 27
23class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 28class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
24public: 29public:
25 explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, 30 explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
26 Maxwell::ShaderStage stage, const std::string& name, u32 index) 31 : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
27 : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
28
29 const std::string& GetName() const {
30 return name;
31 }
32
33 Maxwell::ShaderStage GetStage() const {
34 return stage;
35 }
36 32
37 u32 GetIndex() const { 33 u32 GetIndex() const {
38 return index; 34 return index;
39 } 35 }
40 36
41private: 37private:
42 std::string name;
43 Maxwell::ShaderStage stage{};
44 u32 index{}; 38 u32 index{};
45}; 39};
46 40
47class SamplerEntry : public VideoCommon::Shader::Sampler {
48public:
49 explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
50 const std::string& name)
51 : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
52
53 const std::string& GetName() const {
54 return name;
55 }
56
57 Maxwell::ShaderStage GetStage() const {
58 return stage;
59 }
60
61private:
62 std::string name;
63 Maxwell::ShaderStage stage{};
64};
65
66class GlobalMemoryEntry { 41class GlobalMemoryEntry {
67public: 42public:
68 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage, 43 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
69 std::string name) 44 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
70 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
71 45
72 u32 GetCbufIndex() const { 46 u32 GetCbufIndex() const {
73 return cbuf_index; 47 return cbuf_index;
@@ -77,19 +51,9 @@ public:
77 return cbuf_offset; 51 return cbuf_offset;
78 } 52 }
79 53
80 const std::string& GetName() const {
81 return name;
82 }
83
84 Maxwell::ShaderStage GetStage() const {
85 return stage;
86 }
87
88private: 54private:
89 u32 cbuf_index{}; 55 u32 cbuf_index{};
90 u32 cbuf_offset{}; 56 u32 cbuf_offset{};
91 Maxwell::ShaderStage stage{};
92 std::string name;
93}; 57};
94 58
95struct ShaderEntries { 59struct ShaderEntries {
@@ -100,8 +64,6 @@ struct ShaderEntries {
100 std::size_t shader_length{}; 64 std::size_t shader_length{};
101}; 65};
102 66
103using ProgramResult = std::pair<std::string, ShaderEntries>;
104
105std::string GetCommonDeclarations(); 67std::string GetCommonDeclarations();
106 68
107ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, 69ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
new file mode 100644
index 000000000..82fc4d44b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -0,0 +1,654 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <fmt/format.h>
7#include <lz4.h>
8
9#include "common/assert.h"
10#include "common/common_paths.h"
11#include "common/common_types.h"
12#include "common/file_util.h"
13#include "common/logging/log.h"
14#include "common/scm_rev.h"
15
16#include "core/core.h"
17#include "core/hle/kernel/process.h"
18#include "core/settings.h"
19
20#include "video_core/renderer_opengl/gl_shader_cache.h"
21#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
22
23namespace OpenGL {
24
25using ShaderCacheVersionHash = std::array<u8, 64>;
26
27enum class TransferableEntryKind : u32 {
28 Raw,
29 Usage,
30};
31
32enum class PrecompiledEntryKind : u32 {
33 Decompiled,
34 Dump,
35};
36
37constexpr u32 NativeVersion = 1;
38
39// Making sure sizes doesn't change by accident
40static_assert(sizeof(BaseBindings) == 12);
41static_assert(sizeof(ShaderDiskCacheUsage) == 24);
42
43namespace {
44
45ShaderCacheVersionHash GetShaderCacheVersionHash() {
46 ShaderCacheVersionHash hash{};
47 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
48 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
49 return hash;
50}
51
52template <typename T>
53std::vector<u8> CompressData(const T* source, std::size_t source_size) {
54 if (source_size > LZ4_MAX_INPUT_SIZE) {
55 // Source size exceeds LZ4 maximum input size
56 return {};
57 }
58 const auto source_size_int = static_cast<int>(source_size);
59 const int max_compressed_size = LZ4_compressBound(source_size_int);
60 std::vector<u8> compressed(max_compressed_size);
61 const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
62 reinterpret_cast<char*>(compressed.data()),
63 source_size_int, max_compressed_size);
64 if (compressed_size <= 0) {
65 // Compression failed
66 return {};
67 }
68 compressed.resize(compressed_size);
69 return compressed;
70}
71
72std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
73 std::vector<u8> uncompressed(uncompressed_size);
74 const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
75 reinterpret_cast<char*>(uncompressed.data()),
76 static_cast<int>(compressed.size()),
77 static_cast<int>(uncompressed.size()));
78 if (static_cast<int>(uncompressed_size) != size_check) {
79 // Decompression failed
80 return {};
81 }
82 return uncompressed;
83}
84
85} // namespace
86
87ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
88 u32 program_code_size, u32 program_code_size_b,
89 ProgramCode program_code, ProgramCode program_code_b)
90 : unique_identifier{unique_identifier}, program_type{program_type},
91 program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
92 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
93
94ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
95
96ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
97
98bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
99 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
100 file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
101 return false;
102 }
103 u32 program_code_size{};
104 u32 program_code_size_b{};
105 if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
106 file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
107 return false;
108 }
109
110 program_code.resize(program_code_size);
111 program_code_b.resize(program_code_size_b);
112
113 if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
114 return false;
115
116 if (HasProgramA() &&
117 file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
118 return false;
119 }
120 return true;
121}
122
123bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
124 if (file.WriteObject(unique_identifier) != 1 ||
125 file.WriteObject(static_cast<u32>(program_type)) != 1 ||
126 file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
127 return false;
128 }
129
130 if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
131 return false;
132
133 if (HasProgramA() &&
134 file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
135 return false;
136 }
137 return true;
138}
139
140ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
141
142std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
143ShaderDiskCacheOpenGL::LoadTransferable() {
144 // Skip games without title id
145 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
146 if (!Settings::values.use_disk_shader_cache || !has_title_id)
147 return {};
148 tried_to_load = true;
149
150 FileUtil::IOFile file(GetTransferablePath(), "rb");
151 if (!file.IsOpen()) {
152 LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
153 GetTitleID());
154 return {};
155 }
156
157 u32 version{};
158 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
159 LOG_ERROR(Render_OpenGL,
160 "Failed to get transferable cache version for title id={} - skipping",
161 GetTitleID());
162 return {};
163 }
164
165 if (version < NativeVersion) {
166 LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
167 file.Close();
168 InvalidateTransferable();
169 return {};
170 }
171 if (version > NativeVersion) {
172 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
173 "of the emulator - skipping");
174 return {};
175 }
176
177 // Version is valid, load the shaders
178 std::vector<ShaderDiskCacheRaw> raws;
179 std::vector<ShaderDiskCacheUsage> usages;
180 while (file.Tell() < file.GetSize()) {
181 TransferableEntryKind kind{};
182 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
183 LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
184 return {};
185 }
186
187 switch (kind) {
188 case TransferableEntryKind::Raw: {
189 ShaderDiskCacheRaw entry;
190 if (!entry.Load(file)) {
191 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
192 return {};
193 }
194 transferable.insert({entry.GetUniqueIdentifier(), {}});
195 raws.push_back(std::move(entry));
196 break;
197 }
198 case TransferableEntryKind::Usage: {
199 ShaderDiskCacheUsage usage{};
200 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
201 LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
202 return {};
203 }
204 usages.push_back(std::move(usage));
205 break;
206 }
207 default:
208 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
209 static_cast<u32>(kind));
210 return {};
211 }
212 }
213 return {{raws, usages}};
214}
215
216std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
217 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
218ShaderDiskCacheOpenGL::LoadPrecompiled() {
219 if (!IsUsable())
220 return {};
221
222 FileUtil::IOFile file(GetPrecompiledPath(), "rb");
223 if (!file.IsOpen()) {
224 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
225 GetTitleID());
226 return {};
227 }
228
229 const auto result = LoadPrecompiledFile(file);
230 if (!result) {
231 LOG_INFO(Render_OpenGL,
232 "Failed to load precompiled cache for game with title id={} - removing",
233 GetTitleID());
234 file.Close();
235 InvalidatePrecompiled();
236 return {};
237 }
238 return *result;
239}
240
241std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
242 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
243ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
244 ShaderCacheVersionHash file_hash{};
245 if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
246 return {};
247 }
248 if (GetShaderCacheVersionHash() != file_hash) {
249 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
250 return {};
251 }
252
253 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
254 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
255 while (file.Tell() < file.GetSize()) {
256 PrecompiledEntryKind kind{};
257 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
258 return {};
259 }
260
261 switch (kind) {
262 case PrecompiledEntryKind::Decompiled: {
263 u64 unique_identifier{};
264 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
265 return {};
266
267 const auto entry = LoadDecompiledEntry(file);
268 if (!entry)
269 return {};
270 decompiled.insert({unique_identifier, std::move(*entry)});
271 break;
272 }
273 case PrecompiledEntryKind::Dump: {
274 ShaderDiskCacheUsage usage;
275 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
276 return {};
277
278 ShaderDiskCacheDump dump;
279 if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
280 return {};
281
282 u32 binary_length{};
283 u32 compressed_size{};
284 if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
285 file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
286 return {};
287 }
288
289 std::vector<u8> compressed_binary(compressed_size);
290 if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
291 compressed_binary.size()) {
292 return {};
293 }
294
295 dump.binary = DecompressData(compressed_binary, binary_length);
296 if (dump.binary.empty()) {
297 return {};
298 }
299
300 dumps.insert({usage, dump});
301 break;
302 }
303 default:
304 return {};
305 }
306 }
307 return {{decompiled, dumps}};
308}
309
310std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
311 FileUtil::IOFile& file) {
312 u32 code_size{};
313 u32 compressed_code_size{};
314 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
315 file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
316 return {};
317 }
318
319 std::vector<u8> compressed_code(compressed_code_size);
320 if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
321 return {};
322 }
323
324 const std::vector<u8> code = DecompressData(compressed_code, code_size);
325 if (code.empty()) {
326 return {};
327 }
328 ShaderDiskCacheDecompiled entry;
329 entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
330
331 u32 const_buffers_count{};
332 if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
333 return {};
334 for (u32 i = 0; i < const_buffers_count; ++i) {
335 u32 max_offset{};
336 u32 index{};
337 u8 is_indirect{};
338 if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
339 file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
340 file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
341 return {};
342 }
343 entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
344 }
345
346 u32 samplers_count{};
347 if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
348 return {};
349 for (u32 i = 0; i < samplers_count; ++i) {
350 u64 offset{};
351 u64 index{};
352 u32 type{};
353 u8 is_array{};
354 u8 is_shadow{};
355 if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
356 file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
357 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
358 file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
359 file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
360 return {};
361 }
362 entry.entries.samplers.emplace_back(
363 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
364 static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
365 }
366
367 u32 global_memory_count{};
368 if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
369 return {};
370 for (u32 i = 0; i < global_memory_count; ++i) {
371 u32 cbuf_index{};
372 u32 cbuf_offset{};
373 if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
374 file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
375 return {};
376 }
377 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
378 }
379
380 for (auto& clip_distance : entry.entries.clip_distances) {
381 u8 clip_distance_raw{};
382 if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
383 return {};
384 clip_distance = clip_distance_raw != 0;
385 }
386
387 u64 shader_length{};
388 if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
389 return {};
390 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
391
392 return entry;
393}
394
395bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
396 const std::string& code,
397 const std::vector<u8>& compressed_code,
398 const GLShader::ShaderEntries& entries) {
399 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
400 file.WriteObject(unique_identifier) != 1 ||
401 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
402 file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
403 file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
404 return false;
405 }
406
407 if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
408 return false;
409 for (const auto& cbuf : entries.const_buffers) {
410 if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
411 file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
412 file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
413 return false;
414 }
415 }
416
417 if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
418 return false;
419 for (const auto& sampler : entries.samplers) {
420 if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
421 file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
422 file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
423 file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
424 file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
425 return false;
426 }
427 }
428
429 if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
430 return false;
431 for (const auto& gmem : entries.global_memory_entries) {
432 if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
433 file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
434 return false;
435 }
436 }
437
438 for (const bool clip_distance : entries.clip_distances) {
439 if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
440 return false;
441 }
442
443 return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
444}
445
446void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
447 if (!FileUtil::Delete(GetTransferablePath())) {
448 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
449 GetTransferablePath());
450 }
451 InvalidatePrecompiled();
452}
453
454void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
455 if (!FileUtil::Delete(GetPrecompiledPath())) {
456 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
457 }
458}
459
460void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
461 if (!IsUsable())
462 return;
463
464 const u64 id = entry.GetUniqueIdentifier();
465 if (transferable.find(id) != transferable.end()) {
466 // The shader already exists
467 return;
468 }
469
470 FileUtil::IOFile file = AppendTransferableFile();
471 if (!file.IsOpen())
472 return;
473 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
474 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
475 file.Close();
476 InvalidateTransferable();
477 return;
478 }
479 transferable.insert({id, {}});
480}
481
482void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
483 if (!IsUsable())
484 return;
485
486 const auto it = transferable.find(usage.unique_identifier);
487 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
488
489 auto& usages{it->second};
490 ASSERT(usages.find(usage) == usages.end());
491 usages.insert(usage);
492
493 FileUtil::IOFile file = AppendTransferableFile();
494 if (!file.IsOpen())
495 return;
496
497 if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
498 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
499 file.Close();
500 InvalidateTransferable();
501 return;
502 }
503}
504
505void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
506 const GLShader::ShaderEntries& entries) {
507 if (!IsUsable())
508 return;
509
510 const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
511 if (compressed_code.empty()) {
512 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
513 unique_identifier);
514 return;
515 }
516
517 FileUtil::IOFile file = AppendPrecompiledFile();
518 if (!file.IsOpen())
519 return;
520
521 if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
522 LOG_ERROR(Render_OpenGL,
523 "Failed to save decompiled entry to the precompiled file - removing");
524 file.Close();
525 InvalidatePrecompiled();
526 }
527}
528
529void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
530 if (!IsUsable())
531 return;
532
533 GLint binary_length{};
534 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
535
536 GLenum binary_format{};
537 std::vector<u8> binary(binary_length);
538 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
539
540 const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
541 if (compressed_binary.empty()) {
542 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
543 usage.unique_identifier);
544 return;
545 }
546
547 FileUtil::IOFile file = AppendPrecompiledFile();
548 if (!file.IsOpen())
549 return;
550
551 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
552 file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
553 file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
554 file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
555 file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
556 compressed_binary.size()) {
557 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
558 usage.unique_identifier);
559 file.Close();
560 InvalidatePrecompiled();
561 return;
562 }
563}
564
565bool ShaderDiskCacheOpenGL::IsUsable() const {
566 return tried_to_load && Settings::values.use_disk_shader_cache;
567}
568
569FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
570 if (!EnsureDirectories())
571 return {};
572
573 const auto transferable_path{GetTransferablePath()};
574 const bool existed = FileUtil::Exists(transferable_path);
575
576 FileUtil::IOFile file(transferable_path, "ab");
577 if (!file.IsOpen()) {
578 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
579 return {};
580 }
581 if (!existed || file.GetSize() == 0) {
582 // If the file didn't exist, write its version
583 if (file.WriteObject(NativeVersion) != 1) {
584 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
585 transferable_path);
586 return {};
587 }
588 }
589 return file;
590}
591
592FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
593 if (!EnsureDirectories())
594 return {};
595
596 const auto precompiled_path{GetPrecompiledPath()};
597 const bool existed = FileUtil::Exists(precompiled_path);
598
599 FileUtil::IOFile file(precompiled_path, "ab");
600 if (!file.IsOpen()) {
601 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
602 return {};
603 }
604
605 if (!existed || file.GetSize() == 0) {
606 const auto hash{GetShaderCacheVersionHash()};
607 if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
608 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
609 precompiled_path);
610 return {};
611 }
612 }
613 return file;
614}
615
616bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
617 const auto CreateDir = [](const std::string& dir) {
618 if (!FileUtil::CreateDir(dir)) {
619 LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
620 return false;
621 }
622 return true;
623 };
624
625 return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
626 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
627 CreateDir(GetPrecompiledDir());
628}
629
630std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
631 return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
632}
633
634std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
635 return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
636}
637
638std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
639 return GetBaseDir() + DIR_SEP "transferable";
640}
641
642std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
643 return GetBaseDir() + DIR_SEP "precompiled";
644}
645
646std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
647 return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
648}
649
650std::string ShaderDiskCacheOpenGL::GetTitleID() const {
651 return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
652}
653
654} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
new file mode 100644
index 000000000..6be0c0547
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -0,0 +1,245 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <string>
9#include <tuple>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include <glad/glad.h>
16
17#include "common/assert.h"
18#include "common/common_types.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_opengl/gl_shader_gen.h"
21
22namespace Core {
23class System;
24}
25
26namespace FileUtil {
27class IOFile;
28}
29
30namespace OpenGL {
31
32using ProgramCode = std::vector<u64>;
33using Maxwell = Tegra::Engines::Maxwell3D::Regs;
34
35/// Allocated bindings used by an OpenGL shader program
36struct BaseBindings {
37 u32 cbuf{};
38 u32 gmem{};
39 u32 sampler{};
40
41 bool operator==(const BaseBindings& rhs) const {
42 return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
43 }
44
45 bool operator!=(const BaseBindings& rhs) const {
46 return !operator==(rhs);
47 }
48};
49
50/// Describes how a shader is used
51struct ShaderDiskCacheUsage {
52 u64 unique_identifier{};
53 BaseBindings bindings;
54 GLenum primitive{};
55
56 bool operator==(const ShaderDiskCacheUsage& rhs) const {
57 return std::tie(unique_identifier, bindings, primitive) ==
58 std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
59 }
60
61 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
62 return !operator==(rhs);
63 }
64};
65
66} // namespace OpenGL
67
68namespace std {
69
70template <>
71struct hash<OpenGL::BaseBindings> {
72 std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
73 return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
74 }
75};
76
77template <>
78struct hash<OpenGL::ShaderDiskCacheUsage> {
79 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
80 return static_cast<std::size_t>(usage.unique_identifier) ^
81 std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
82 }
83};
84
85} // namespace std
86
87namespace OpenGL {
88
89/// Describes a shader how it's used by the guest GPU
90class ShaderDiskCacheRaw {
91public:
92 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
93 u32 program_code_size, u32 program_code_size_b,
94 ProgramCode program_code, ProgramCode program_code_b);
95 ShaderDiskCacheRaw();
96 ~ShaderDiskCacheRaw();
97
98 bool Load(FileUtil::IOFile& file);
99
100 bool Save(FileUtil::IOFile& file) const;
101
102 u64 GetUniqueIdentifier() const {
103 return unique_identifier;
104 }
105
106 bool HasProgramA() const {
107 return program_type == Maxwell::ShaderProgram::VertexA;
108 }
109
110 Maxwell::ShaderProgram GetProgramType() const {
111 return program_type;
112 }
113
114 Maxwell::ShaderStage GetProgramStage() const {
115 switch (program_type) {
116 case Maxwell::ShaderProgram::VertexA:
117 case Maxwell::ShaderProgram::VertexB:
118 return Maxwell::ShaderStage::Vertex;
119 case Maxwell::ShaderProgram::TesselationControl:
120 return Maxwell::ShaderStage::TesselationControl;
121 case Maxwell::ShaderProgram::TesselationEval:
122 return Maxwell::ShaderStage::TesselationEval;
123 case Maxwell::ShaderProgram::Geometry:
124 return Maxwell::ShaderStage::Geometry;
125 case Maxwell::ShaderProgram::Fragment:
126 return Maxwell::ShaderStage::Fragment;
127 }
128 UNREACHABLE();
129 }
130
131 const ProgramCode& GetProgramCode() const {
132 return program_code;
133 }
134
135 const ProgramCode& GetProgramCodeB() const {
136 return program_code_b;
137 }
138
139private:
140 u64 unique_identifier{};
141 Maxwell::ShaderProgram program_type{};
142 u32 program_code_size{};
143 u32 program_code_size_b{};
144
145 ProgramCode program_code;
146 ProgramCode program_code_b;
147};
148
149/// Contains decompiled data from a shader
150struct ShaderDiskCacheDecompiled {
151 std::string code;
152 GLShader::ShaderEntries entries;
153};
154
155/// Contains an OpenGL dumped binary program
156struct ShaderDiskCacheDump {
157 GLenum binary_format;
158 std::vector<u8> binary;
159};
160
161class ShaderDiskCacheOpenGL {
162public:
163 explicit ShaderDiskCacheOpenGL(Core::System& system);
164
165 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
166 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
167 LoadTransferable();
168
169 /// Loads current game's precompiled cache. Invalidates on failure.
170 std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
171 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
172 LoadPrecompiled();
173
174 /// Removes the transferable (and precompiled) cache file.
175 void InvalidateTransferable() const;
176
177 /// Removes the precompiled cache file.
178 void InvalidatePrecompiled() const;
179
180 /// Saves a raw dump to the transferable file. Checks for collisions.
181 void SaveRaw(const ShaderDiskCacheRaw& entry);
182
183 /// Saves shader usage to the transferable file. Does not check for collisions.
184 void SaveUsage(const ShaderDiskCacheUsage& usage);
185
186 /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
187 void SaveDecompiled(u64 unique_identifier, const std::string& code,
188 const GLShader::ShaderEntries& entries);
189
190 /// Saves a dump entry to the precompiled file. Does not check for collisions.
191 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
192
193private:
194 /// Loads the transferable cache. Returns empty on failure.
195 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
196 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
197 LoadPrecompiledFile(FileUtil::IOFile& file);
198
199 /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
200 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
201
202 /// Saves a decompiled entry to the passed file. Returns true on success.
203 bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
204 const std::vector<u8>& compressed_code,
205 const GLShader::ShaderEntries& entries);
206
207 /// Returns if the cache can be used
208 bool IsUsable() const;
209
210 /// Opens current game's transferable file and write it's header if it doesn't exist
211 FileUtil::IOFile AppendTransferableFile() const;
212
213 /// Opens current game's precompiled file and write it's header if it doesn't exist
214 FileUtil::IOFile AppendPrecompiledFile() const;
215
216 /// Create shader disk cache directories. Returns true on success.
217 bool EnsureDirectories() const;
218
219 /// Gets current game's transferable file path
220 std::string GetTransferablePath() const;
221
222 /// Gets current game's precompiled file path
223 std::string GetPrecompiledPath() const;
224
225 /// Get user's transferable directory path
226 std::string GetTransferableDir() const;
227
228 /// Get user's precompiled directory path
229 std::string GetPrecompiledDir() const;
230
231 /// Get user's shader directory path
232 std::string GetBaseDir() const;
233
234 /// Get current game's title id
235 std::string GetTitleID() const;
236
237 // Copre system
238 Core::System& system;
239 // Stored transferable shaders
240 std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
241 // The cache has been loaded at boot
242 bool tried_to_load{};
243};
244
245} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 04e1db911..7d96649af 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
124layout (location = 6) out vec4 FragColor6; 124layout (location = 6) out vec4 FragColor6;
125layout (location = 7) out vec4 FragColor7; 125layout (location = 7) out vec4 FragColor7;
126 126
127layout (location = 0) in vec4 position; 127layout (location = 0) in noperspective vec4 position;
128 128
129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { 129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
130 vec4 viewport_flip; 130 vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
172 return {out, program.second}; 172 return {out, program.second};
173} 173}
174 174
175} // namespace OpenGL::GLShader \ No newline at end of file 175} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ac5e6917b..fba8e681b 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -26,12 +26,10 @@ struct ShaderSetup {
26 ProgramCode code; 26 ProgramCode code;
27 ProgramCode code_b; // Used for dual vertex shaders 27 ProgramCode code_b; // Used for dual vertex shaders
28 u64 unique_identifier; 28 u64 unique_identifier;
29 std::size_t real_size;
30 std::size_t real_size_b;
31 } program; 29 } program;
32 30
33 /// Used in scenarios where we have a dual vertex shaders 31 /// Used in scenarios where we have a dual vertex shaders
34 void SetProgramB(ProgramCode&& program_b) { 32 void SetProgramB(ProgramCode program_b) {
35 program.code_b = std::move(program_b); 33 program.code_b = std::move(program_b);
36 has_program_b = true; 34 has_program_b = true;
37 } 35 }
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 285594f50..03b7548c2 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
47 * @returns Handle of the newly created OpenGL program object 47 * @returns Handle of the newly created OpenGL program object
48 */ 48 */
49template <typename... T> 49template <typename... T>
50GLuint LoadProgram(bool separable_program, T... shaders) { 50GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
51 // Link the program 51 // Link the program
52 LOG_DEBUG(Render_OpenGL, "Linking program..."); 52 LOG_DEBUG(Render_OpenGL, "Linking program...");
53 53
@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
58 if (separable_program) { 58 if (separable_program) {
59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); 59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
60 } 60 }
61 if (hint_retrievable) {
62 glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
63 }
61 64
62 glLinkProgram(program_id); 65 glLinkProgram(program_id);
63 66
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index b7ba59350..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,7 +11,9 @@
11namespace OpenGL { 11namespace OpenGL {
12 12
13OpenGLState OpenGLState::cur_state; 13OpenGLState OpenGLState::cur_state;
14
14bool OpenGLState::s_rgb_used; 15bool OpenGLState::s_rgb_used;
16
15OpenGLState::OpenGLState() { 17OpenGLState::OpenGLState() {
16 // These all match default OpenGL values 18 // These all match default OpenGL values
17 geometry_shaders.enabled = false; 19 geometry_shaders.enabled = false;
@@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() {
112} 114}
113 115
114void OpenGLState::ApplySRgb() const { 116void OpenGLState::ApplySRgb() const {
115 // sRGB
116 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { 117 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
117 if (framebuffer_srgb.enabled) { 118 if (framebuffer_srgb.enabled) {
118 // Track if sRGB is used 119 // Track if sRGB is used
@@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const {
125} 126}
126 127
127void OpenGLState::ApplyCulling() const { 128void OpenGLState::ApplyCulling() const {
128 // Culling 129 if (cull.enabled != cur_state.cull.enabled) {
129 const bool cull_changed = cull.enabled != cur_state.cull.enabled;
130 if (cull_changed) {
131 if (cull.enabled) { 130 if (cull.enabled) {
132 glEnable(GL_CULL_FACE); 131 glEnable(GL_CULL_FACE);
133 } else { 132 } else {
134 glDisable(GL_CULL_FACE); 133 glDisable(GL_CULL_FACE);
135 } 134 }
136 } 135 }
137 if (cull.enabled) {
138 if (cull_changed || cull.mode != cur_state.cull.mode) {
139 glCullFace(cull.mode);
140 }
141 136
142 if (cull_changed || cull.front_face != cur_state.cull.front_face) { 137 if (cull.mode != cur_state.cull.mode) {
143 glFrontFace(cull.front_face); 138 glCullFace(cull.mode);
144 } 139 }
140
141 if (cull.front_face != cur_state.cull.front_face) {
142 glFrontFace(cull.front_face);
145 } 143 }
146} 144}
147 145
@@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const {
172} 170}
173 171
174void OpenGLState::ApplyDepth() const { 172void OpenGLState::ApplyDepth() const {
175 // Depth test 173 if (depth.test_enabled != cur_state.depth.test_enabled) {
176 const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
177 if (depth_test_changed) {
178 if (depth.test_enabled) { 174 if (depth.test_enabled) {
179 glEnable(GL_DEPTH_TEST); 175 glEnable(GL_DEPTH_TEST);
180 } else { 176 } else {
181 glDisable(GL_DEPTH_TEST); 177 glDisable(GL_DEPTH_TEST);
182 } 178 }
183 } 179 }
184 if (depth.test_enabled && 180
185 (depth_test_changed || depth.test_func != cur_state.depth.test_func)) { 181 if (depth.test_func != cur_state.depth.test_func) {
186 glDepthFunc(depth.test_func); 182 glDepthFunc(depth.test_func);
187 } 183 }
188 // Depth mask 184
189 if (depth.write_mask != cur_state.depth.write_mask) { 185 if (depth.write_mask != cur_state.depth.write_mask) {
190 glDepthMask(depth.write_mask); 186 glDepthMask(depth.write_mask);
191 } 187 }
192} 188}
193 189
194void OpenGLState::ApplyPrimitiveRestart() const { 190void OpenGLState::ApplyPrimitiveRestart() const {
195 const bool primitive_restart_changed = 191 if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
196 primitive_restart.enabled != cur_state.primitive_restart.enabled;
197 if (primitive_restart_changed) {
198 if (primitive_restart.enabled) { 192 if (primitive_restart.enabled) {
199 glEnable(GL_PRIMITIVE_RESTART); 193 glEnable(GL_PRIMITIVE_RESTART);
200 } else { 194 } else {
201 glDisable(GL_PRIMITIVE_RESTART); 195 glDisable(GL_PRIMITIVE_RESTART);
202 } 196 }
203 } 197 }
204 if (primitive_restart_changed || 198
205 (primitive_restart.enabled && 199 if (primitive_restart.index != cur_state.primitive_restart.index) {
206 primitive_restart.index != cur_state.primitive_restart.index)) {
207 glPrimitiveRestartIndex(primitive_restart.index); 200 glPrimitiveRestartIndex(primitive_restart.index);
208 } 201 }
209} 202}
210 203
211void OpenGLState::ApplyStencilTest() const { 204void OpenGLState::ApplyStencilTest() const {
212 const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; 205 if (stencil.test_enabled != cur_state.stencil.test_enabled) {
213 if (stencil_test_changed) {
214 if (stencil.test_enabled) { 206 if (stencil.test_enabled) {
215 glEnable(GL_STENCIL_TEST); 207 glEnable(GL_STENCIL_TEST);
216 } else { 208 } else {
217 glDisable(GL_STENCIL_TEST); 209 glDisable(GL_STENCIL_TEST);
218 } 210 }
219 } 211 }
220 if (stencil.test_enabled) { 212
221 auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, 213 const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
222 const auto& prev_config) { 214 if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
223 if (stencil_test_changed || config.test_func != prev_config.test_func || 215 config.test_mask != prev_config.test_mask) {
224 config.test_ref != prev_config.test_ref || 216 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
225 config.test_mask != prev_config.test_mask) { 217 }
226 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); 218 if (config.action_depth_fail != prev_config.action_depth_fail ||
227 } 219 config.action_depth_pass != prev_config.action_depth_pass ||
228 if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || 220 config.action_stencil_fail != prev_config.action_stencil_fail) {
229 config.action_depth_pass != prev_config.action_depth_pass || 221 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
230 config.action_stencil_fail != prev_config.action_stencil_fail) { 222 config.action_depth_pass);
231 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, 223 }
232 config.action_depth_pass); 224 if (config.write_mask != prev_config.write_mask) {
233 } 225 glStencilMaskSeparate(face, config.write_mask);
234 if (config.write_mask != prev_config.write_mask) { 226 }
235 glStencilMaskSeparate(face, config.write_mask); 227 };
236 } 228 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
237 }; 229 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
238 config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
239 config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
240 }
241} 230}
242// Viewport does not affects glClearBuffer so emulate viewport using scissor test 231// Viewport does not affects glClearBuffer so emulate viewport using scissor test
243void OpenGLState::EmulateViewportWithScissor() { 232void OpenGLState::EmulateViewportWithScissor() {
@@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const {
278 updated.depth_range_far != current.depth_range_far) { 267 updated.depth_range_far != current.depth_range_far) {
279 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); 268 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
280 } 269 }
281 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 270
282 if (scissor_changed) { 271 if (updated.scissor.enabled != current.scissor.enabled) {
283 if (updated.scissor.enabled) { 272 if (updated.scissor.enabled) {
284 glEnablei(GL_SCISSOR_TEST, i); 273 glEnablei(GL_SCISSOR_TEST, i);
285 } else { 274 } else {
286 glDisablei(GL_SCISSOR_TEST, i); 275 glDisablei(GL_SCISSOR_TEST, i);
287 } 276 }
288 } 277 }
289 if (updated.scissor.enabled && 278
290 (scissor_changed || updated.scissor.x != current.scissor.x || 279 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
291 updated.scissor.y != current.scissor.y || 280 updated.scissor.width != current.scissor.width ||
292 updated.scissor.width != current.scissor.width || 281 updated.scissor.height != current.scissor.height) {
293 updated.scissor.height != current.scissor.height)) {
294 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, 282 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
295 updated.scissor.height); 283 updated.scissor.height);
296 } 284 }
@@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const {
302 updated.height != current.height) { 290 updated.height != current.height) {
303 glViewport(updated.x, updated.y, updated.width, updated.height); 291 glViewport(updated.x, updated.y, updated.width, updated.height);
304 } 292 }
293
305 if (updated.depth_range_near != current.depth_range_near || 294 if (updated.depth_range_near != current.depth_range_near ||
306 updated.depth_range_far != current.depth_range_far) { 295 updated.depth_range_far != current.depth_range_far) {
307 glDepthRange(updated.depth_range_near, updated.depth_range_far); 296 glDepthRange(updated.depth_range_near, updated.depth_range_far);
308 } 297 }
309 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 298
310 if (scissor_changed) { 299 if (updated.scissor.enabled != current.scissor.enabled) {
311 if (updated.scissor.enabled) { 300 if (updated.scissor.enabled) {
312 glEnable(GL_SCISSOR_TEST); 301 glEnable(GL_SCISSOR_TEST);
313 } else { 302 } else {
314 glDisable(GL_SCISSOR_TEST); 303 glDisable(GL_SCISSOR_TEST);
315 } 304 }
316 } 305 }
317 if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x || 306
318 updated.scissor.y != current.scissor.y || 307 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
319 updated.scissor.width != current.scissor.width || 308 updated.scissor.width != current.scissor.width ||
320 updated.scissor.height != current.scissor.height)) { 309 updated.scissor.height != current.scissor.height) {
321 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, 310 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
322 updated.scissor.height); 311 updated.scissor.height);
323 } 312 }
@@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const {
327void OpenGLState::ApplyGlobalBlending() const { 316void OpenGLState::ApplyGlobalBlending() const {
328 const Blend& current = cur_state.blend[0]; 317 const Blend& current = cur_state.blend[0];
329 const Blend& updated = blend[0]; 318 const Blend& updated = blend[0];
330 const bool blend_changed = updated.enabled != current.enabled; 319 if (updated.enabled != current.enabled) {
331 if (blend_changed) {
332 if (updated.enabled) { 320 if (updated.enabled) {
333 glEnable(GL_BLEND); 321 glEnable(GL_BLEND);
334 } else { 322 } else {
@@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const {
338 if (!updated.enabled) { 326 if (!updated.enabled) {
339 return; 327 return;
340 } 328 }
341 if (blend_changed || updated.src_rgb_func != current.src_rgb_func || 329 if (updated.src_rgb_func != current.src_rgb_func ||
342 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 330 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
343 updated.dst_a_func != current.dst_a_func) { 331 updated.dst_a_func != current.dst_a_func) {
344 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, 332 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
345 updated.dst_a_func); 333 updated.dst_a_func);
346 } 334 }
347 335
348 if (blend_changed || updated.rgb_equation != current.rgb_equation || 336 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
349 updated.a_equation != current.a_equation) {
350 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); 337 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
351 } 338 }
352} 339}
@@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const {
354void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { 341void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
355 const Blend& updated = blend[target]; 342 const Blend& updated = blend[target];
356 const Blend& current = cur_state.blend[target]; 343 const Blend& current = cur_state.blend[target];
357 const bool blend_changed = updated.enabled != current.enabled || force; 344 if (updated.enabled != current.enabled || force) {
358 if (blend_changed) {
359 if (updated.enabled) { 345 if (updated.enabled) {
360 glEnablei(GL_BLEND, static_cast<GLuint>(target)); 346 glEnablei(GL_BLEND, static_cast<GLuint>(target));
361 } else { 347 } else {
362 glDisablei(GL_BLEND, static_cast<GLuint>(target)); 348 glDisablei(GL_BLEND, static_cast<GLuint>(target));
363 } 349 }
364 } 350 }
365 if (!updated.enabled) { 351
366 return; 352 if (updated.src_rgb_func != current.src_rgb_func ||
367 }
368 if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
369 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 353 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
370 updated.dst_a_func != current.dst_a_func) { 354 updated.dst_a_func != current.dst_a_func) {
371 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, 355 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
372 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); 356 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
373 } 357 }
374 358
375 if (blend_changed || updated.rgb_equation != current.rgb_equation || 359 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
376 updated.a_equation != current.a_equation) {
377 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, 360 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
378 updated.a_equation); 361 updated.a_equation);
379 } 362 }
@@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const {
397} 380}
398 381
399void OpenGLState::ApplyLogicOp() const { 382void OpenGLState::ApplyLogicOp() const {
400 const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; 383 if (logic_op.enabled != cur_state.logic_op.enabled) {
401 if (logic_op_changed) {
402 if (logic_op.enabled) { 384 if (logic_op.enabled) {
403 glEnable(GL_COLOR_LOGIC_OP); 385 glEnable(GL_COLOR_LOGIC_OP);
404 } else { 386 } else {
@@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const {
406 } 388 }
407 } 389 }
408 390
409 if (logic_op.enabled && 391 if (logic_op.operation != cur_state.logic_op.operation) {
410 (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
411 glLogicOp(logic_op.operation); 392 glLogicOp(logic_op.operation);
412 } 393 }
413} 394}
414 395
415void OpenGLState::ApplyPolygonOffset() const { 396void OpenGLState::ApplyPolygonOffset() const {
416
417 const bool fill_enable_changed = 397 const bool fill_enable_changed =
418 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; 398 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
419 const bool line_enable_changed = 399 const bool line_enable_changed =
@@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const {
448 } 428 }
449 } 429 }
450 430
451 if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && 431 if (factor_changed || units_changed || clamp_changed) {
452 (factor_changed || units_changed || clamp_changed)) {
453
454 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { 432 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
455 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); 433 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
456 } else { 434 } else {
@@ -462,29 +440,35 @@ void OpenGLState::ApplyPolygonOffset() const {
462} 440}
463 441
464void OpenGLState::ApplyTextures() const { 442void OpenGLState::ApplyTextures() const {
443 bool has_delta{};
444 std::size_t first{};
445 std::size_t last{};
446 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
447
465 for (std::size_t i = 0; i < std::size(texture_units); ++i) { 448 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
466 const auto& texture_unit = texture_units[i]; 449 const auto& texture_unit = texture_units[i];
467 const auto& cur_state_texture_unit = cur_state.texture_units[i]; 450 const auto& cur_state_texture_unit = cur_state.texture_units[i];
451 textures[i] = texture_unit.texture;
468 452
469 if (texture_unit.texture != cur_state_texture_unit.texture) { 453 if (textures[i] != cur_state_texture_unit.texture) {
470 glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum()); 454 if (!has_delta) {
471 glBindTexture(texture_unit.target, texture_unit.texture); 455 first = i;
472 } 456 has_delta = true;
473 // Update the texture swizzle 457 }
474 if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r || 458 last = i;
475 texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
476 texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
477 texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
478 std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
479 texture_unit.swizzle.b, texture_unit.swizzle.a};
480 glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
481 } 459 }
482 } 460 }
461
462 if (has_delta) {
463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
464 textures.data() + first);
465 }
483} 466}
484 467
485void OpenGLState::ApplySamplers() const { 468void OpenGLState::ApplySamplers() const {
486 bool has_delta{}; 469 bool has_delta{};
487 std::size_t first{}, last{}; 470 std::size_t first{};
471 std::size_t last{};
488 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; 472 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
489 for (std::size_t i = 0; i < std::size(samplers); ++i) { 473 for (std::size_t i = 0; i < std::size(samplers); ++i) {
490 samplers[i] = texture_units[i].sampler; 474 samplers[i] = texture_units[i].sampler;
@@ -498,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
498 } 482 }
499 if (has_delta) { 483 if (has_delta) {
500 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
501 samplers.data()); 485 samplers.data() + first);
502 } 486 }
503} 487}
504 488
@@ -522,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const {
522 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { 506 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
523 return; 507 return;
524 } 508 }
525 if (depth_clamp.far_plane != depth_clamp.near_plane) { 509 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
526 UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!"); 510 "Unimplemented Depth Clamp Separation!");
527 } 511
528 if (depth_clamp.far_plane || depth_clamp.near_plane) { 512 if (depth_clamp.far_plane || depth_clamp.near_plane) {
529 glEnable(GL_DEPTH_CLAMP); 513 glEnable(GL_DEPTH_CLAMP);
530 } else { 514 } else {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index a5a7c0920..9e1eda5b1 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -126,26 +126,14 @@ public:
126 struct TextureUnit { 126 struct TextureUnit {
127 GLuint texture; // GL_TEXTURE_BINDING_2D 127 GLuint texture; // GL_TEXTURE_BINDING_2D
128 GLuint sampler; // GL_SAMPLER_BINDING 128 GLuint sampler; // GL_SAMPLER_BINDING
129 GLenum target;
130 struct {
131 GLint r; // GL_TEXTURE_SWIZZLE_R
132 GLint g; // GL_TEXTURE_SWIZZLE_G
133 GLint b; // GL_TEXTURE_SWIZZLE_B
134 GLint a; // GL_TEXTURE_SWIZZLE_A
135 } swizzle;
136 129
137 void Unbind() { 130 void Unbind() {
138 texture = 0; 131 texture = 0;
139 swizzle.r = GL_RED;
140 swizzle.g = GL_GREEN;
141 swizzle.b = GL_BLUE;
142 swizzle.a = GL_ALPHA;
143 } 132 }
144 133
145 void Reset() { 134 void Reset() {
146 Unbind(); 135 Unbind();
147 sampler = 0; 136 sampler = 0;
148 target = GL_TEXTURE_2D;
149 } 137 }
150 }; 138 };
151 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; 139 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e37b65b38..5e3d862c6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -98,8 +98,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
98 return matrix; 98 return matrix;
99} 99}
100 100
101RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window) 101RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
102 : VideoCore::RendererBase{window} {} 102 : VideoCore::RendererBase{window}, system{system} {}
103 103
104RendererOpenGL::~RendererOpenGL() = default; 104RendererOpenGL::~RendererOpenGL() = default;
105 105
@@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default;
107void RendererOpenGL::SwapBuffers( 107void RendererOpenGL::SwapBuffers(
108 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 108 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
109 109
110 Core::System::GetInstance().GetPerfStats().EndSystemFrame(); 110 system.GetPerfStats().EndSystemFrame();
111 111
112 // Maintain the rasterizer's state as a priority 112 // Maintain the rasterizer's state as a priority
113 OpenGLState prev_state = OpenGLState::GetCurState(); 113 OpenGLState prev_state = OpenGLState::GetCurState();
@@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers(
137 137
138 render_window.PollEvents(); 138 render_window.PollEvents();
139 139
140 Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); 140 system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
141 Core::System::GetInstance().GetPerfStats().BeginSystemFrame(); 141 system.GetPerfStats().BeginSystemFrame();
142 142
143 // Restore the rasterizer state 143 // Restore the rasterizer state
144 prev_state.Apply(); 144 prev_state.Apply();
@@ -164,17 +164,14 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
164 // Reset the screen info's display texture to its own permanent texture 164 // Reset the screen info's display texture to its own permanent texture
165 screen_info.display_texture = screen_info.texture.resource.handle; 165 screen_info.display_texture = screen_info.texture.resource.handle;
166 166
167 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, 167 rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
168 Memory::FlushMode::Flush);
169 168
170 VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, 169 constexpr u32 linear_bpp = 4;
171 Memory::GetPointer(framebuffer_addr), 170 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
172 gl_framebuffer_data.data(), true); 171 framebuffer.width, framebuffer.height, bytes_per_pixel,
172 linear_bpp, Memory::GetPointer(framebuffer_addr),
173 gl_framebuffer_data.data());
173 174
174 state.texture_units[0].texture = screen_info.texture.resource.handle;
175 state.Apply();
176
177 glActiveTexture(GL_TEXTURE0);
178 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 175 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
179 176
180 // Update existing texture 177 // Update existing texture
@@ -182,14 +179,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
182 // they differ from the LCD resolution. 179 // they differ from the LCD resolution.
183 // TODO: Applications could theoretically crash yuzu here by specifying too large 180 // TODO: Applications could theoretically crash yuzu here by specifying too large
184 // framebuffer sizes. We should make sure that this cannot happen. 181 // framebuffer sizes. We should make sure that this cannot happen.
185 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, 182 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
186 screen_info.texture.gl_format, screen_info.texture.gl_type, 183 framebuffer.height, screen_info.texture.gl_format,
187 gl_framebuffer_data.data()); 184 screen_info.texture.gl_type, gl_framebuffer_data.data());
188 185
189 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 186 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
190
191 state.texture_units[0].texture = 0;
192 state.Apply();
193 } 187 }
194} 188}
195 189
@@ -199,17 +193,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
199 */ 193 */
200void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 194void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
201 const TextureInfo& texture) { 195 const TextureInfo& texture) {
202 state.texture_units[0].texture = texture.resource.handle; 196 const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
203 state.Apply(); 197 glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
204
205 glActiveTexture(GL_TEXTURE0);
206 u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
207
208 // Update existing texture
209 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
210
211 state.texture_units[0].texture = 0;
212 state.Apply();
213} 198}
214 199
215/** 200/**
@@ -249,55 +234,57 @@ void RendererOpenGL::InitOpenGLObjects() {
249 sizeof(ScreenRectVertex)); 234 sizeof(ScreenRectVertex));
250 235
251 // Allocate textures for the screen 236 // Allocate textures for the screen
252 screen_info.texture.resource.Create(); 237 screen_info.texture.resource.Create(GL_TEXTURE_2D);
253 238
254 // Allocation of storage is deferred until the first frame, when we 239 const GLuint texture = screen_info.texture.resource.handle;
255 // know the framebuffer size. 240 glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
256
257 state.texture_units[0].texture = screen_info.texture.resource.handle;
258 state.Apply();
259
260 glActiveTexture(GL_TEXTURE0);
261 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
262 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
263 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
264 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
265 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
266 241
267 screen_info.display_texture = screen_info.texture.resource.handle; 242 screen_info.display_texture = screen_info.texture.resource.handle;
268 243
269 state.texture_units[0].texture = 0;
270 state.Apply();
271
272 // Clear screen to black 244 // Clear screen to black
273 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 245 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
274} 246}
275 247
248void RendererOpenGL::AddTelemetryFields() {
249 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
250 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
251 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
252
253 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
254 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
255 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
256
257 auto& telemetry_session = system.TelemetrySession();
258 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
260 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
261}
262
276void RendererOpenGL::CreateRasterizer() { 263void RendererOpenGL::CreateRasterizer() {
277 if (rasterizer) { 264 if (rasterizer) {
278 return; 265 return;
279 } 266 }
280 // Initialize sRGB Usage 267 // Initialize sRGB Usage
281 OpenGLState::ClearsRGBUsed(); 268 OpenGLState::ClearsRGBUsed();
282 rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info); 269 rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
283} 270}
284 271
285void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 272void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
286 const Tegra::FramebufferConfig& framebuffer) { 273 const Tegra::FramebufferConfig& framebuffer) {
287
288 texture.width = framebuffer.width; 274 texture.width = framebuffer.width;
289 texture.height = framebuffer.height; 275 texture.height = framebuffer.height;
276 texture.pixel_format = framebuffer.pixel_format;
290 277
291 GLint internal_format; 278 GLint internal_format;
292 switch (framebuffer.pixel_format) { 279 switch (framebuffer.pixel_format) {
293 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 280 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
294 internal_format = GL_RGBA; 281 internal_format = GL_RGBA8;
295 texture.gl_format = GL_RGBA; 282 texture.gl_format = GL_RGBA;
296 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 283 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
297 gl_framebuffer_data.resize(texture.width * texture.height * 4); 284 gl_framebuffer_data.resize(texture.width * texture.height * 4);
298 break; 285 break;
299 default: 286 default:
300 internal_format = GL_RGBA; 287 internal_format = GL_RGBA8;
301 texture.gl_format = GL_RGBA; 288 texture.gl_format = GL_RGBA;
302 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 289 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
303 gl_framebuffer_data.resize(texture.width * texture.height * 4); 290 gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -306,15 +293,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
306 UNREACHABLE(); 293 UNREACHABLE();
307 } 294 }
308 295
309 state.texture_units[0].texture = texture.resource.handle; 296 texture.resource.Release();
310 state.Apply(); 297 texture.resource.Create(GL_TEXTURE_2D);
311 298 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
312 glActiveTexture(GL_TEXTURE0);
313 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
314 texture.gl_format, texture.gl_type, nullptr);
315
316 state.texture_units[0].texture = 0;
317 state.Apply();
318} 299}
319 300
320void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, 301void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -356,7 +337,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
356 }}; 337 }};
357 338
358 state.texture_units[0].texture = screen_info.display_texture; 339 state.texture_units[0].texture = screen_info.display_texture;
359 state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
360 // Workaround brigthness problems in SMO by enabling sRGB in the final output 340 // Workaround brigthness problems in SMO by enabling sRGB in the final output
361 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 341 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
362 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 342 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
@@ -417,7 +397,8 @@ void RendererOpenGL::CaptureScreenshot() {
417 GLuint renderbuffer; 397 GLuint renderbuffer;
418 glGenRenderbuffers(1, &renderbuffer); 398 glGenRenderbuffers(1, &renderbuffer);
419 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); 399 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
420 glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); 400 glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
401 layout.height);
421 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); 402 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
422 403
423 DrawScreen(layout); 404 DrawScreen(layout);
@@ -501,17 +482,7 @@ bool RendererOpenGL::Init() {
501 glDebugMessageCallback(DebugHandler, nullptr); 482 glDebugMessageCallback(DebugHandler, nullptr);
502 } 483 }
503 484
504 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 485 AddTelemetryFields();
505 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
506 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
507
508 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
509 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
510 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
511
512 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
513 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
514 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
515 486
516 if (!GLAD_GL_VERSION_4_3) { 487 if (!GLAD_GL_VERSION_4_3) {
517 return false; 488 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 1665018db..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,6 +12,10 @@
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_state.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace Core::Frontend { 19namespace Core::Frontend {
16class EmuWindow; 20class EmuWindow;
17} 21}
@@ -35,13 +39,13 @@ struct TextureInfo {
35/// Structure used for storing information about the display target for the Switch screen 39/// Structure used for storing information about the display target for the Switch screen
36struct ScreenInfo { 40struct ScreenInfo {
37 GLuint display_texture; 41 GLuint display_texture;
38 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; 42 const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
39 TextureInfo texture; 43 TextureInfo texture;
40}; 44};
41 45
42class RendererOpenGL : public VideoCore::RendererBase { 46class RendererOpenGL : public VideoCore::RendererBase {
43public: 47public:
44 explicit RendererOpenGL(Core::Frontend::EmuWindow& window); 48 explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
45 ~RendererOpenGL() override; 49 ~RendererOpenGL() override;
46 50
47 /// Swap buffers (render frame) 51 /// Swap buffers (render frame)
@@ -56,6 +60,7 @@ public:
56 60
57private: 61private:
58 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
59 void CreateRasterizer(); 64 void CreateRasterizer();
60 65
61 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -72,6 +77,8 @@ private:
72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 77 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
73 const TextureInfo& texture); 78 const TextureInfo& texture);
74 79
80 Core::System& system;
81
75 OpenGLState state; 82 OpenGLState state;
76 83
77 // OpenGL object IDs 84 // OpenGL object IDs
@@ -96,7 +103,7 @@ private:
96 103
97 /// Used for transforming the framebuffer orientation 104 /// Used for transforming the framebuffer orientation
98 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 105 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
99 MathUtil::Rectangle<int> framebuffer_crop_rect; 106 Common::Rectangle<int> framebuffer_crop_rect;
100}; 107};
101 108
102} // namespace OpenGL 109} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
new file mode 100644
index 000000000..ba25b5bc7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vulkan/vulkan.hpp>
8
9namespace Vulkan {
10
11// vulkan.hpp unique handlers use DispatchLoaderStatic
12template <typename T>
13using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
14
15using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
16using UniqueBuffer = UniqueHandle<vk::Buffer>;
17using UniqueBufferView = UniqueHandle<vk::BufferView>;
18using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
19using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
20using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
21using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
22using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
23using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
24using UniqueDevice = UniqueHandle<vk::Device>;
25using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
26using UniqueEvent = UniqueHandle<vk::Event>;
27using UniqueFence = UniqueHandle<vk::Fence>;
28using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
29using UniqueImage = UniqueHandle<vk::Image>;
30using UniqueImageView = UniqueHandle<vk::ImageView>;
31using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
32using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
33using UniquePipeline = UniqueHandle<vk::Pipeline>;
34using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
35using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
36using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
37using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
38using UniqueSampler = UniqueHandle<vk::Sampler>;
39using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
40using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
41using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
42using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
43using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
44
45} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..95eab3fec
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,123 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <optional>
8#include <tuple>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "core/memory.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
21 std::size_t alignment, u8* host_ptr)
22 : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
23 host_ptr} {}
24
25VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
26 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
27 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
28 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
29 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
30 vk::BufferUsageFlagBits::eIndexBuffer |
31 vk::BufferUsageFlagBits::eUniformBuffer;
32 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
33 vk::AccessFlagBits::eUniformRead;
34 stream_buffer =
35 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
36 vk::PipelineStageFlagBits::eAllCommands);
37 buffer_handle = stream_buffer->GetBuffer();
38}
39
40VKBufferCache::~VKBufferCache() = default;
41
42u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
43 bool cache) {
44 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
45 ASSERT_MSG(cpu_addr, "Invalid GPU address");
46
47 // Cache management is a big overhead, so only cache entries with a given size.
48 // TODO: Figure out which size is the best for given games.
49 cache &= size >= 2048;
50
51 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
52 if (cache) {
53 auto entry = TryGet(host_ptr);
54 if (entry) {
55 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
56 return entry->GetOffset();
57 }
58 Unregister(entry);
59 }
60 }
61
62 AlignBuffer(alignment);
63 const u64 uploaded_offset = buffer_offset;
64
65 if (!host_ptr) {
66 return uploaded_offset;
67 }
68
69 std::memcpy(buffer_ptr, host_ptr, size);
70 buffer_ptr += size;
71 buffer_offset += size;
72
73 if (cache) {
74 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
75 alignment, host_ptr);
76 Register(entry);
77 }
78
79 return uploaded_offset;
80}
81
82u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
83 AlignBuffer(alignment);
84 std::memcpy(buffer_ptr, raw_pointer, size);
85 const u64 uploaded_offset = buffer_offset;
86
87 buffer_ptr += size;
88 buffer_offset += size;
89 return uploaded_offset;
90}
91
92std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
93 AlignBuffer(alignment);
94 u8* const uploaded_ptr = buffer_ptr;
95 const u64 uploaded_offset = buffer_offset;
96
97 buffer_ptr += size;
98 buffer_offset += size;
99 return {uploaded_ptr, uploaded_offset};
100}
101
102void VKBufferCache::Reserve(std::size_t max_size) {
103 bool invalidate;
104 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
105 buffer_offset = buffer_offset_base;
106
107 if (invalidate) {
108 InvalidateAll();
109 }
110}
111
112VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
113 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
114}
115
116void VKBufferCache::AlignBuffer(std::size_t alignment) {
117 // Align the offset, not the mapped pointer
118 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
119 buffer_ptr += offset_aligned - buffer_offset;
120 buffer_offset = offset_aligned;
121}
122
123} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..8b415744b
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,104 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <tuple>
9
10#include "common/common_types.h"
11#include "video_core/gpu.h"
12#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15
16namespace Tegra {
17class MemoryManager;
18}
19
20namespace Vulkan {
21
22class VKDevice;
23class VKFence;
24class VKMemoryManager;
25class VKStreamBuffer;
26
27class CachedBufferEntry final : public RasterizerCacheObject {
28public:
29 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
30 u8* host_ptr);
31
32 VAddr GetCpuAddr() const override {
33 return cpu_addr;
34 }
35
36 std::size_t GetSizeInBytes() const override {
37 return size;
38 }
39
40 std::size_t GetSize() const {
41 return size;
42 }
43
44 u64 GetOffset() const {
45 return offset;
46 }
47
48 std::size_t GetAlignment() const {
49 return alignment;
50 }
51
52 // We do not have to flush this cache as things in it are never modified by us.
53 void Flush() override {}
54
55private:
56 VAddr cpu_addr{};
57 std::size_t size{};
58 u64 offset{};
59 std::size_t alignment{};
60};
61
62class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
63public:
64 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
65 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
66 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
67 ~VKBufferCache();
68
69 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
70 /// allocated.
71 u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
72 bool cache = true);
73
74 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
75 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
76
77 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
78 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
79
80 /// Reserves a region of memory to be used in subsequent upload/reserve operations.
81 void Reserve(std::size_t max_size);
82
83 /// Ensures that the set data is sent to the device.
84 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
85
86 /// Returns the buffer cache handle.
87 vk::Buffer GetBuffer() const {
88 return buffer_handle;
89 }
90
91private:
92 void AlignBuffer(std::size_t alignment);
93
94 Tegra::MemoryManager& tegra_memory_manager;
95
96 std::unique_ptr<VKStreamBuffer> stream_buffer;
97 vk::Buffer buffer_handle;
98
99 u8* buffer_ptr = nullptr;
100 u64 buffer_offset = 0;
101 u64 buffer_offset_base = 0;
102};
103
104} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
new file mode 100644
index 000000000..00242ecbe
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,238 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <optional>
7#include <set>
8#include <vector>
9#include "common/assert.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12
13namespace Vulkan {
14
15namespace Alternatives {
16
17constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
18 vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
19constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
20 vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
21
22} // namespace Alternatives
23
24constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
25 switch (format) {
26 case vk::Format::eD24UnormS8Uint:
27 return Alternatives::Depth24UnormS8Uint.data();
28 case vk::Format::eD16UnormS8Uint:
29 return Alternatives::Depth16UnormS8Uint.data();
30 default:
31 return nullptr;
32 }
33}
34
35constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
36 FormatType format_type) {
37 switch (format_type) {
38 case FormatType::Linear:
39 return properties.linearTilingFeatures;
40 case FormatType::Optimal:
41 return properties.optimalTilingFeatures;
42 case FormatType::Buffer:
43 return properties.bufferFeatures;
44 default:
45 return {};
46 }
47}
48
49VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
50 vk::SurfaceKHR surface)
51 : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
52 SetupFamilies(dldi, surface);
53 SetupProperties(dldi);
54}
55
56VKDevice::~VKDevice() = default;
57
58bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
59 const auto queue_cis = GetDeviceQueueCreateInfos();
60 vk::PhysicalDeviceFeatures device_features{};
61
62 const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
63 const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
64 0, nullptr, static_cast<u32>(extensions.size()),
65 extensions.data(), &device_features);
66 vk::Device dummy_logical;
67 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
68 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
69 return false;
70 }
71
72 dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
73 logical = UniqueDevice(
74 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
75
76 graphics_queue = logical->getQueue(graphics_family, 0, dld);
77 present_queue = logical->getQueue(present_family, 0, dld);
78 return true;
79}
80
81vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
82 vk::FormatFeatureFlags wanted_usage,
83 FormatType format_type) const {
84 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
85 return wanted_format;
86 }
87 // The wanted format is not supported by hardware, search for alternatives
88 const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
89 if (alternatives == nullptr) {
90 LOG_CRITICAL(Render_Vulkan,
91 "Format={} with usage={} and type={} has no defined alternatives and host "
92 "hardware does not support it",
93 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
94 static_cast<u32>(format_type));
95 UNREACHABLE();
96 return wanted_format;
97 }
98
99 std::size_t i = 0;
100 for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
101 alternative = alternatives[++i]) {
102 if (!IsFormatSupported(alternative, wanted_usage, format_type))
103 continue;
104 LOG_WARNING(Render_Vulkan,
105 "Emulating format={} with alternative format={} with usage={} and type={}",
106 static_cast<u32>(wanted_format), static_cast<u32>(alternative),
107 static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
108 return alternative;
109 }
110
111 // No alternatives found, panic
112 LOG_CRITICAL(Render_Vulkan,
113 "Format={} with usage={} and type={} is not supported by the host hardware and "
114 "doesn't support any of the alternatives",
115 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
116 static_cast<u32>(format_type));
117 UNREACHABLE();
118 return wanted_format;
119}
120
121bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 UNREACHABLE();
127 return true;
128 }
129 const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
130 return (supported_usage & wanted_usage) == wanted_usage;
131}
132
133bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
134 vk::SurfaceKHR surface) {
135 const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
136
137 bool has_swapchain{};
138 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
139 has_swapchain |= prop.extensionName == swapchain_extension;
140 }
141 if (!has_swapchain) {
142 // The device doesn't support creating swapchains.
143 return false;
144 }
145
146 bool has_graphics{}, has_present{};
147 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
148 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
149 const auto& family = queue_family_properties[i];
150 if (family.queueCount == 0)
151 continue;
152
153 has_graphics |=
154 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
155 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
156 }
157 if (!has_graphics || !has_present) {
158 // The device doesn't have a graphics and present queue.
159 return false;
160 }
161
162 // TODO(Rodrigo): Check if the device matches all requeriments.
163 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
164 if (props.limits.maxUniformBufferRange < 65536) {
165 return false;
166 }
167
168 // Device is suitable.
169 return true;
170}
171
172void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
173 std::optional<u32> graphics_family_, present_family_;
174
175 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
176 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
177 if (graphics_family_ && present_family_)
178 break;
179
180 const auto& queue_family = queue_family_properties[i];
181 if (queue_family.queueCount == 0)
182 continue;
183
184 if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
185 graphics_family_ = i;
186 if (physical.getSurfaceSupportKHR(i, surface, dldi))
187 present_family_ = i;
188 }
189 ASSERT(graphics_family_ && present_family_);
190
191 graphics_family = *graphics_family_;
192 present_family = *present_family_;
193}
194
195void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
196 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
197 device_type = props.deviceType;
198 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
199}
200
201std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
202 static const float QUEUE_PRIORITY = 1.f;
203
204 std::set<u32> unique_queue_families = {graphics_family, present_family};
205 std::vector<vk::DeviceQueueCreateInfo> queue_cis;
206
207 for (u32 queue_family : unique_queue_families)
208 queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
209
210 return queue_cis;
211}
212
213std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
214 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
215 std::map<vk::Format, vk::FormatProperties> format_properties;
216
217 const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
219 };
220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
234
235 return format_properties;
236}
237
238} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
new file mode 100644
index 000000000..e87c7a508
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <vector>
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14/// Format usage descriptor
15enum class FormatType { Linear, Optimal, Buffer };
16
17/// Handles data specific to a physical device.
18class VKDevice final {
19public:
20 explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
21 vk::SurfaceKHR surface);
22 ~VKDevice();
23
24 /// Initializes the device. Returns true on success.
25 bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
26
27 /**
28 * Returns a format supported by the device for the passed requeriments.
29 * @param wanted_format The ideal format to be returned. It may not be the returned format.
30 * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
31 * @param format_type Format type usage.
32 * @returns A format supported by the device.
33 */
34 vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
35 FormatType format_type) const;
36
37 /// Returns the dispatch loader with direct function pointers of the device
38 const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
39 return dld;
40 }
41
42 /// Returns the logical device
43 vk::Device GetLogical() const {
44 return logical.get();
45 }
46
47 /// Returns the physical device.
48 vk::PhysicalDevice GetPhysical() const {
49 return physical;
50 }
51
52 /// Returns the main graphics queue.
53 vk::Queue GetGraphicsQueue() const {
54 return graphics_queue;
55 }
56
57 /// Returns the main present queue.
58 vk::Queue GetPresentQueue() const {
59 return present_queue;
60 }
61
62 /// Returns main graphics queue family index.
63 u32 GetGraphicsFamily() const {
64 return graphics_family;
65 }
66
67 /// Returns main present queue family index.
68 u32 GetPresentFamily() const {
69 return present_family;
70 }
71
72 /// Returns if the device is integrated with the host CPU
73 bool IsIntegrated() const {
74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
75 }
76
77 /// Returns uniform buffer alignment requeriment
78 u64 GetUniformBufferAlignment() const {
79 return uniform_buffer_alignment;
80 }
81
82 /// Checks if the physical device is suitable.
83 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
84 vk::SurfaceKHR surface);
85
86private:
87 /// Sets up queue families.
88 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
89
90 /// Sets up device properties.
91 void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
92
93 /// Returns a list of queue initialization descriptors.
94 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
95
96 /// Returns true if a format is supported.
97 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
98 FormatType format_type) const;
99
100 /// Returns the device properties for Vulkan formats.
101 static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
102 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
103
104 const vk::PhysicalDevice physical; ///< Physical device
105 vk::DispatchLoaderDynamic dld; ///< Device function pointers
106 UniqueDevice logical; ///< Logical device
107 vk::Queue graphics_queue; ///< Main graphics queue
108 vk::Queue present_queue; ///< Main present queue
109 u32 graphics_family{}; ///< Main graphics queue family index
110 u32 present_family{}; ///< Main present queue family index
111 vk::PhysicalDeviceType device_type; ///< Physical device type
112 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment
113 std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
114};
115
116} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
new file mode 100644
index 000000000..0451babbf
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <tuple>
8#include <vector>
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16
17namespace Vulkan {
18
19// TODO(Rodrigo): Fine tune this number
20constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
21
22class VKMemoryAllocation final {
23public:
24 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
25 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
26 : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
27 shifted_type{ShiftType(type)}, is_mappable{properties &
28 vk::MemoryPropertyFlagBits::eHostVisible} {
29 if (is_mappable) {
30 const auto dev = device.GetLogical();
31 const auto& dld = device.GetDispatchLoader();
32 base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
33 }
34 }
35
36 ~VKMemoryAllocation() {
37 const auto dev = device.GetLogical();
38 const auto& dld = device.GetDispatchLoader();
39 if (is_mappable)
40 dev.unmapMemory(memory, dld);
41 dev.free(memory, nullptr, dld);
42 }
43
44 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
45 auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
46 static_cast<u64>(alignment));
47 if (!found) {
48 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
49 static_cast<u64>(alignment));
50 if (!found) {
51 // Signal out of memory, it'll try to do more allocations.
52 return nullptr;
53 }
54 }
55 u8* address = is_mappable ? base_address + *found : nullptr;
56 auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
57 *found + commit_size);
58 commits.push_back(commit.get());
59
60 // Last commit's address is highly probable to be free.
61 free_iterator = *found + commit_size;
62
63 return commit;
64 }
65
66 void Free(const VKMemoryCommitImpl* commit) {
67 ASSERT(commit);
68 const auto it =
69 std::find_if(commits.begin(), commits.end(),
70 [&](const auto& stored_commit) { return stored_commit == commit; });
71 if (it == commits.end()) {
72 LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
73 UNREACHABLE();
74 return;
75 }
76 commits.erase(it);
77 }
78
79 /// Returns whether this allocation is compatible with the arguments.
80 bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
81 return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
82 (type_mask & shifted_type) != 0;
83 }
84
85private:
86 static constexpr u32 ShiftType(u32 type) {
87 return 1U << type;
88 }
89
90 /// A memory allocator, it may return a free region between "start" and "end" with the solicited
91 /// requeriments.
92 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
93 u64 iterator = start;
94 while (iterator + size < end) {
95 const u64 try_left = Common::AlignUp(iterator, alignment);
96 const u64 try_right = try_left + size;
97
98 bool overlap = false;
99 for (const auto& commit : commits) {
100 const auto [commit_left, commit_right] = commit->interval;
101 if (try_left < commit_right && commit_left < try_right) {
102 // There's an overlap, continue the search where the overlapping commit ends.
103 iterator = commit_right;
104 overlap = true;
105 break;
106 }
107 }
108 if (!overlap) {
109 // A free address has been found.
110 return try_left;
111 }
112 }
113 // No free regions where found, return an empty optional.
114 return std::nullopt;
115 }
116
117 const VKDevice& device; ///< Vulkan device.
118 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
119 const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
120 const u64 alloc_size; ///< Size of this allocation.
121 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
122 const bool is_mappable; ///< Whether the allocation is mappable.
123
124 /// Base address of the mapped pointer.
125 u8* base_address{};
126
127 /// Hints where the next free region is likely going to be.
128 u64 free_iterator{};
129
130 /// Stores all commits done from this allocation.
131 std::vector<const VKMemoryCommitImpl*> commits;
132};
133
134VKMemoryManager::VKMemoryManager(const VKDevice& device)
135 : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
136 is_memory_unified{GetMemoryUnified(props)} {}
137
138VKMemoryManager::~VKMemoryManager() = default;
139
140VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
141 ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
142
143 // When a host visible commit is asked, search for host visible and coherent, otherwise search
144 // for a fast device local type.
145 const vk::MemoryPropertyFlags wanted_properties =
146 host_visible
147 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
148 : vk::MemoryPropertyFlagBits::eDeviceLocal;
149
150 const auto TryCommit = [&]() -> VKMemoryCommit {
151 for (auto& alloc : allocs) {
152 if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
153 continue;
154
155 if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
156 return commit;
157 }
158 }
159 return {};
160 };
161
162 if (auto commit = TryCommit(); commit) {
163 return commit;
164 }
165
166 // Commit has failed, allocate more memory.
167 if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
168 // TODO(Rodrigo): Try to use host memory.
169 LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
170 UNREACHABLE();
171 }
172
173 // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
174 // there's a bug.
175 auto commit = TryCommit();
176 ASSERT(commit);
177 return commit;
178}
179
180VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
181 const auto dev = device.GetLogical();
182 const auto& dld = device.GetDispatchLoader();
183 const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
184 auto commit = Commit(requeriments, host_visible);
185 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
186 return commit;
187}
188
189VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
190 const auto dev = device.GetLogical();
191 const auto& dld = device.GetDispatchLoader();
192 const auto requeriments = dev.getImageMemoryRequirements(image, dld);
193 auto commit = Commit(requeriments, host_visible);
194 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
195 return commit;
196}
197
198bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
199 u64 size) {
200 const u32 type = [&]() {
201 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
202 const auto flags = props.memoryTypes[type_index].propertyFlags;
203 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
204 // The type matches in type and in the wanted properties.
205 return type_index;
206 }
207 }
208 LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
209 UNREACHABLE();
210 return 0u;
211 }();
212
213 const auto dev = device.GetLogical();
214 const auto& dld = device.GetDispatchLoader();
215
216 // Try to allocate found type.
217 const vk::MemoryAllocateInfo memory_ai(size, type);
218 vk::DeviceMemory memory;
219 if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
220 res != vk::Result::eSuccess) {
221 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
222 return false;
223 }
224 allocs.push_back(
225 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
226 return true;
227}
228
229/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
230 for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
231 if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
232 // Memory is considered unified when heaps are device local only.
233 return false;
234 }
235 }
236 return true;
237}
238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end)
241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this);
245}
246
247u8* VKMemoryCommitImpl::GetData() const {
248 ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
249 return data;
250}
251
252} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
new file mode 100644
index 000000000..073597b35
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <utility>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKMemoryAllocation;
17class VKMemoryCommitImpl;
18
19using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
20
21class VKMemoryManager final {
22public:
23 explicit VKMemoryManager(const VKDevice& device);
24 ~VKMemoryManager();
25
26 /**
27 * Commits a memory with the specified requeriments.
28 * @param reqs Requeriments returned from a Vulkan call.
29 * @param host_visible Signals the allocator that it *must* use host visible and coherent
30 * memory. When passing false, it will try to allocate device local memory.
31 * @returns A memory commit.
32 */
33 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
34
35 /// Commits memory required by the buffer and binds it.
36 VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
37
38 /// Commits memory required by the image and binds it.
39 VKMemoryCommit Commit(vk::Image image, bool host_visible);
40
41 /// Returns true if the memory allocations are done always in host visible and coherent memory.
42 bool IsMemoryUnified() const {
43 return is_memory_unified;
44 }
45
46private:
47 /// Allocates a chunk of memory.
48 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
49
50 /// Returns true if the device uses an unified memory model.
51 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
52
53 const VKDevice& device; ///< Device handler.
54 const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
55 const bool is_memory_unified; ///< True if memory model is unified.
56 std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
57};
58
59class VKMemoryCommitImpl final {
60 friend VKMemoryAllocation;
61
62public:
63 explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
64 u64 begin, u64 end);
65 ~VKMemoryCommitImpl();
66
67 /// Returns the writeable memory map. The commit has to be mappable.
68 u8* GetData() const;
69
70 /// Returns the Vulkan memory handler.
71 vk::DeviceMemory GetMemory() const {
72 return memory;
73 }
74
75 /// Returns the start position of the commit relative to the allocation.
76 vk::DeviceSize GetOffset() const {
77 return static_cast<vk::DeviceSize>(interval.first);
78 }
79
80private:
81 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
82 vk::DeviceMemory memory; ///< Vulkan device memory handler.
83 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
84 u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
85};
86
87} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
new file mode 100644
index 000000000..a1e117443
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -0,0 +1,285 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include "common/assert.h"
8#include "common/logging/log.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_resource_manager.h"
12
13namespace Vulkan {
14
15// TODO(Rodrigo): Fine tune these numbers.
16constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
17constexpr std::size_t FENCES_GROW_STEP = 0x40;
18
19class CommandBufferPool final : public VKFencedPool {
20public:
21 CommandBufferPool(const VKDevice& device)
22 : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
23
24 void Allocate(std::size_t begin, std::size_t end) {
25 const auto dev = device.GetLogical();
26 const auto& dld = device.GetDispatchLoader();
27 const u32 graphics_family = device.GetGraphicsFamily();
28
29 auto pool = std::make_unique<Pool>();
30
31 // Command buffers are going to be commited, recorded, executed every single usage cycle.
32 // They are also going to be reseted when commited.
33 const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
34 vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
35 const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
36 pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
37
38 const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
39 vk::CommandBufferLevel::ePrimary,
40 static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
41 pool->cmdbufs =
42 dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
43
44 pools.push_back(std::move(pool));
45 }
46
47 vk::CommandBuffer Commit(VKFence& fence) {
48 const std::size_t index = CommitResource(fence);
49 const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
50 const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
51 return *pools[pool_index]->cmdbufs[sub_index];
52 }
53
54private:
55 struct Pool {
56 UniqueCommandPool handle;
57 std::vector<UniqueCommandBuffer> cmdbufs;
58 };
59
60 const VKDevice& device;
61
62 std::vector<std::unique_ptr<Pool>> pools;
63};
64
65VKResource::VKResource() = default;
66
67VKResource::~VKResource() = default;
68
69VKFence::VKFence(const VKDevice& device, UniqueFence handle)
70 : device{device}, handle{std::move(handle)} {}
71
72VKFence::~VKFence() = default;
73
74void VKFence::Wait() {
75 const auto dev = device.GetLogical();
76 const auto& dld = device.GetDispatchLoader();
77 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
78}
79
80void VKFence::Release() {
81 is_owned = false;
82}
83
84void VKFence::Commit() {
85 is_owned = true;
86 is_used = true;
87}
88
89bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
90 if (!is_used) {
91 // If a fence is not used it's always free.
92 return true;
93 }
94 if (is_owned && !owner_wait) {
95 // The fence is still being owned (Release has not been called) and ownership wait has
96 // not been asked.
97 return false;
98 }
99
100 const auto dev = device.GetLogical();
101 const auto& dld = device.GetDispatchLoader();
102 if (gpu_wait) {
103 // Wait for the fence if it has been requested.
104 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
105 } else {
106 if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
107 // Vulkan fence is not ready, not much it can do here
108 return false;
109 }
110 }
111
112 // Broadcast resources their free state.
113 for (auto* resource : protected_resources) {
114 resource->OnFenceRemoval(this);
115 }
116 protected_resources.clear();
117
118 // Prepare fence for reusage.
119 dev.resetFences({*handle}, dld);
120 is_used = false;
121 return true;
122}
123
124void VKFence::Protect(VKResource* resource) {
125 protected_resources.push_back(resource);
126}
127
128void VKFence::Unprotect(VKResource* resource) {
129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
130 ASSERT(it != protected_resources.end());
131
132 resource->OnFenceRemoval(this);
133 protected_resources.erase(it);
134}
135
136VKFenceWatch::VKFenceWatch() = default;
137
138VKFenceWatch::~VKFenceWatch() {
139 if (fence) {
140 fence->Unprotect(this);
141 }
142}
143
144void VKFenceWatch::Wait() {
145 if (fence == nullptr) {
146 return;
147 }
148 fence->Wait();
149 fence->Unprotect(this);
150}
151
152void VKFenceWatch::Watch(VKFence& new_fence) {
153 Wait();
154 fence = &new_fence;
155 fence->Protect(this);
156}
157
158bool VKFenceWatch::TryWatch(VKFence& new_fence) {
159 if (fence) {
160 return false;
161 }
162 fence = &new_fence;
163 fence->Protect(this);
164 return true;
165}
166
167void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
168 ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
169 fence = nullptr;
170}
171
172VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
173
174VKFencedPool::~VKFencedPool() = default;
175
176std::size_t VKFencedPool::CommitResource(VKFence& fence) {
177 const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
178 for (std::size_t iterator = begin; iterator < end; ++iterator) {
179 if (watches[iterator]->TryWatch(fence)) {
180 // The resource is now being watched, a free resource was successfully found.
181 return iterator;
182 }
183 }
184 return {};
185 };
186 // Try to find a free resource from the hinted position to the end.
187 auto found = Search(free_iterator, watches.size());
188 if (!found) {
189 // Search from beginning to the hinted position.
190 found = Search(0, free_iterator);
191 if (!found) {
192 // Both searches failed, the pool is full; handle it.
193 const std::size_t free_resource = ManageOverflow();
194
195 // Watch will wait for the resource to be free.
196 watches[free_resource]->Watch(fence);
197 found = free_resource;
198 }
199 }
200 // Free iterator is hinted to the resource after the one that's been commited.
201 free_iterator = (*found + 1) % watches.size();
202 return *found;
203}
204
205std::size_t VKFencedPool::ManageOverflow() {
206 const std::size_t old_capacity = watches.size();
207 Grow();
208
209 // The last entry is guaranted to be free, since it's the first element of the freshly
210 // allocated resources.
211 return old_capacity;
212}
213
214void VKFencedPool::Grow() {
215 const std::size_t old_capacity = watches.size();
216 watches.resize(old_capacity + grow_step);
217 std::generate(watches.begin() + old_capacity, watches.end(),
218 []() { return std::make_unique<VKFenceWatch>(); });
219 Allocate(old_capacity, old_capacity + grow_step);
220}
221
222VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
223 GrowFences(FENCES_GROW_STEP);
224 command_buffer_pool = std::make_unique<CommandBufferPool>(device);
225}
226
227VKResourceManager::~VKResourceManager() = default;
228
229VKFence& VKResourceManager::CommitFence() {
230 const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
231 const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
232 const auto hinted = fences.begin() + fences_iterator;
233
234 auto it = std::find_if(hinted, fences.end(), Tick);
235 if (it == fences.end()) {
236 it = std::find_if(fences.begin(), hinted, Tick);
237 if (it == hinted) {
238 return nullptr;
239 }
240 }
241 fences_iterator = std::distance(fences.begin(), it) + 1;
242 if (fences_iterator >= fences.size())
243 fences_iterator = 0;
244
245 auto& fence = *it;
246 fence->Commit();
247 return fence.get();
248 };
249
250 VKFence* found_fence = StepFences(false, false);
251 if (!found_fence) {
252 // Try again, this time waiting.
253 found_fence = StepFences(true, false);
254
255 if (!found_fence) {
256 // Allocate new fences and try again.
257 LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
258 fences.size() + FENCES_GROW_STEP);
259
260 GrowFences(FENCES_GROW_STEP);
261 found_fence = StepFences(true, false);
262 ASSERT(found_fence != nullptr);
263 }
264 }
265 return *found_fence;
266}
267
268vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
269 return command_buffer_pool->Commit(fence);
270}
271
272void VKResourceManager::GrowFences(std::size_t new_fences_count) {
273 const auto dev = device.GetLogical();
274 const auto& dld = device.GetDispatchLoader();
275 const vk::FenceCreateInfo fence_ci;
276
277 const std::size_t previous_size = fences.size();
278 fences.resize(previous_size + new_fences_count);
279
280 std::generate(fences.begin() + previous_size, fences.end(), [&]() {
281 return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
282 });
283}
284
285} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
new file mode 100644
index 000000000..5bfe4cead
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -0,0 +1,180 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9#include <vector>
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14class VKDevice;
15class VKFence;
16class VKResourceManager;
17
18class CommandBufferPool;
19
20/// Interface for a Vulkan resource
21class VKResource {
22public:
23 explicit VKResource();
24 virtual ~VKResource();
25
26 /**
27 * Signals the object that an owning fence has been signaled.
28 * @param signaling_fence Fence that signals its usage end.
29 */
30 virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
31};
32
33/**
34 * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
35 * They must be commited from the resource manager. Their usage flow is: commit the fence from the
36 * resource manager, protect resources with it and use them, send the fence to an execution queue
37 * and Wait for it if needed and then call Release. Used resources will automatically be signaled
38 * when they are free to be reused.
39 * @brief Protects resources for concurrent usage and signals its release.
40 */
41class VKFence {
42 friend class VKResourceManager;
43
44public:
45 explicit VKFence(const VKDevice& device, UniqueFence handle);
46 ~VKFence();
47
48 /**
49 * Waits for the fence to be signaled.
50 * @warning You must have ownership of the fence and it has to be previously sent to a queue to
51 * call this function.
52 */
53 void Wait();
54
55 /**
56 * Releases ownership of the fence. Pass after it has been sent to an execution queue.
57 * Unmanaged usage of the fence after the call will result in undefined behavior because it may
58 * be being used for something else.
59 */
60 void Release();
61
62 /// Protects a resource with this fence.
63 void Protect(VKResource* resource);
64
65 /// Removes protection for a resource.
66 void Unprotect(VKResource* resource);
67
68 /// Retreives the fence.
69 operator vk::Fence() const {
70 return *handle;
71 }
72
73private:
74 /// Take ownership of the fence.
75 void Commit();
76
77 /**
78 * Updates the fence status.
79 * @warning Waiting for the owner might soft lock the execution.
80 * @param gpu_wait Wait for the fence to be signaled by the driver.
81 * @param owner_wait Wait for the owner to signal its freedom.
82 * @returns True if the fence is free. Waiting for gpu and owner will always return true.
83 */
84 bool Tick(bool gpu_wait, bool owner_wait);
85
86 const VKDevice& device; ///< Device handler
87 UniqueFence handle; ///< Vulkan fence
88 std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
89 bool is_owned = false; ///< The fence has been commited but not released yet.
90 bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
91};
92
93/**
94 * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
95 * resources without having to inherit VKResource from their handlers.
96 */
97class VKFenceWatch final : public VKResource {
98public:
99 explicit VKFenceWatch();
100 ~VKFenceWatch();
101
102 /// Waits for the fence to be released.
103 void Wait();
104
105 /**
106 * Waits for a previous fence and watches a new one.
107 * @param new_fence New fence to wait to.
108 */
109 void Watch(VKFence& new_fence);
110
111 /**
112 * Checks if it's currently being watched and starts watching it if it's available.
113 * @returns True if a watch has started, false if it's being watched.
114 */
115 bool TryWatch(VKFence& new_fence);
116
117 void OnFenceRemoval(VKFence* signaling_fence) override;
118
119private:
120 VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
121};
122
123/**
124 * Handles a pool of resources protected by fences. Manages resource overflow allocating more
125 * resources.
126 */
127class VKFencedPool {
128public:
129 explicit VKFencedPool(std::size_t grow_step);
130 virtual ~VKFencedPool();
131
132protected:
133 /**
134 * Commits a free resource and protects it with a fence. It may allocate new resources.
135 * @param fence Fence that protects the commited resource.
136 * @returns Index of the resource commited.
137 */
138 std::size_t CommitResource(VKFence& fence);
139
140 /// Called when a chunk of resources have to be allocated.
141 virtual void Allocate(std::size_t begin, std::size_t end) = 0;
142
143private:
144 /// Manages pool overflow allocating new resources.
145 std::size_t ManageOverflow();
146
147 /// Allocates a new page of resources.
148 void Grow();
149
150 std::size_t grow_step = 0; ///< Number of new resources created after an overflow
151 std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
152 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
153};
154
155/**
156 * The resource manager handles all resources that can be protected with a fence avoiding
157 * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
158 */
159class VKResourceManager final {
160public:
161 explicit VKResourceManager(const VKDevice& device);
162 ~VKResourceManager();
163
164 /// Commits a fence. It has to be sent to a queue and released.
165 VKFence& CommitFence();
166
167 /// Commits an unused command buffer and protects it with a fence.
168 vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
169
170private:
171 /// Allocates new fences.
172 void GrowFences(std::size_t new_fences_count);
173
174 const VKDevice& device; ///< Device handler.
175 std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
176 std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
177 std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
178};
179
180} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
new file mode 100644
index 000000000..ed3178f09
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <optional>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/cityhash.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
13#include "video_core/renderer_vulkan/vk_sampler_cache.h"
14#include "video_core/textures/texture.h"
15
16namespace Vulkan {
17
18static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return vk::BorderColor::eFloatTransparentBlack;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return vk::BorderColor::eFloatOpaqueBlack;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return vk::BorderColor::eFloatOpaqueWhite;
26 } else {
27 return {};
28 }
29}
30
31std::size_t SamplerCacheKey::Hash() const {
32 static_assert(sizeof(raw) % sizeof(u64) == 0);
33 return static_cast<std::size_t>(
34 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
35}
36
37bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
38 return raw == rhs.raw;
39}
40
41VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
42
43VKSamplerCache::~VKSamplerCache() = default;
44
45vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
46 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
47 auto& sampler = entry->second;
48 if (is_cache_miss) {
49 sampler = CreateSampler(tsc);
50 }
51 return *sampler;
52}
53
54UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
55 const float max_anisotropy = tsc.GetMaxAnisotropy();
56 const bool has_anisotropy = max_anisotropy > 1.0f;
57
58 const auto border_color = tsc.GetBorderColor();
59 const auto vk_border_color = TryConvertBorderColor(border_color);
60 UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
61 border_color[0], border_color[1], border_color[2], border_color[3]);
62
63 constexpr bool unnormalized_coords = false;
64
65 const vk::SamplerCreateInfo sampler_ci(
66 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
67 MaxwellToVK::Sampler::Filter(tsc.min_filter),
68 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
69 MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
70 MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
71 max_anisotropy, tsc.depth_compare_enabled,
72 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
73 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
74 unnormalized_coords);
75
76 const auto& dld = device.GetDispatchLoader();
77 const auto dev = device.GetLogical();
78 return dev.createSamplerUnique(sampler_ci, nullptr, dld);
79}
80
81} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
new file mode 100644
index 000000000..c6394dc87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/textures/texture.h"
12
13namespace Vulkan {
14
15class VKDevice;
16
17struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
18 std::size_t Hash() const;
19
20 bool operator==(const SamplerCacheKey& rhs) const;
21
22 bool operator!=(const SamplerCacheKey& rhs) const {
23 return !operator==(rhs);
24 }
25};
26
27} // namespace Vulkan
28
29namespace std {
30
31template <>
32struct hash<Vulkan::SamplerCacheKey> {
33 std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
34 return k.Hash();
35 }
36};
37
38} // namespace std
39
40namespace Vulkan {
41
42class VKSamplerCache {
43public:
44 explicit VKSamplerCache(const VKDevice& device);
45 ~VKSamplerCache();
46
47 vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
48
49private:
50 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
51
52 const VKDevice& device;
53 std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
54};
55
56} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
new file mode 100644
index 000000000..f1fea1871
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/renderer_vulkan/declarations.h"
8#include "video_core/renderer_vulkan/vk_device.h"
9#include "video_core/renderer_vulkan/vk_resource_manager.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h"
11
12namespace Vulkan {
13
14VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
15 : device{device}, resource_manager{resource_manager} {
16 next_fence = &resource_manager.CommitFence();
17 AllocateNewContext();
18}
19
20VKScheduler::~VKScheduler() = default;
21
22VKExecutionContext VKScheduler::GetExecutionContext() const {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore);
28 current_fence->Release();
29 AllocateNewContext();
30 return GetExecutionContext();
31}
32
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
34 SubmitExecution(semaphore);
35 current_fence->Wait();
36 current_fence->Release();
37 AllocateNewContext();
38 return GetExecutionContext();
39}
40
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
42 const auto& dld = device.GetDispatchLoader();
43 current_cmdbuf.end(dld);
44
45 const auto queue = device.GetGraphicsQueue();
46 const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
47 &semaphore);
48 queue.submit({submit_info}, *current_fence, dld);
49}
50
51void VKScheduler::AllocateNewContext() {
52 current_fence = next_fence;
53 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
54 next_fence = &resource_manager.CommitFence();
55
56 const auto& dld = device.GetDispatchLoader();
57 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
58}
59
60} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
new file mode 100644
index 000000000..cfaf5376f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/declarations.h"
9
10namespace Vulkan {
11
12class VKDevice;
13class VKExecutionContext;
14class VKFence;
15class VKResourceManager;
16
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler {
20public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler();
23
24 /// Gets the current execution context.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const;
26
27 /// Sends the current execution context to the GPU. It invalidates the current execution context
28 /// and returns a new one.
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
32 /// the current execution context and returns a new one.
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34
35private:
36 void SubmitExecution(vk::Semaphore semaphore);
37
38 void AllocateNewContext();
39
40 const VKDevice& device;
41 VKResourceManager& resource_manager;
42 vk::CommandBuffer current_cmdbuf;
43 VKFence* current_fence = nullptr;
44 VKFence* next_fence = nullptr;
45};
46
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
27 pipeline_stage} {
28 CreateBuffers(memory_manager, usage);
29 ReserveWatches(WATCHES_INITIAL_RESERVE);
30}
31
32VKStreamBuffer::~VKStreamBuffer() = default;
33
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
35 ASSERT(size <= buffer_size);
36 mapped_size = size;
37
38 if (offset + size > buffer_size) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and
40 // reset the state.
41 invalidation_mark = used_watches;
42 used_watches = 0;
43 offset = 0;
44 }
45
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47}
48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51
52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59
60 if (used_watches + 1 >= watches.size()) {
61 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 }
64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence());
66
67 offset += size;
68
69 return exctx;
70}
71
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
73 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
74 nullptr);
75
76 const auto dev = device.GetLogical();
77 const auto& dld = device.GetDispatchLoader();
78 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
79 commit = memory_manager.Commit(*buffer, true);
80 mapped_pointer = commit->GetData();
81}
82
83void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
84 const std::size_t previous_size = watches.size();
85 watches.resize(previous_size + grow_size);
86 std::generate(watches.begin() + previous_size, watches.end(),
87 []() { return std::make_unique<VKFenceWatch>(); });
88}
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <tuple>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler;
23
24class VKStreamBuffer {
25public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer();
30
31 /**
32 * Reserves a region of memory from the stream buffer.
33 * @param size Size to reserve.
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated.
36 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
41
42 vk::Buffer GetBuffer() const {
43 return *buffer;
44 }
45
46private:
47 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
49
50 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size);
52
53 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58
59 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62
63 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy.
65
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t>
69 invalidation_mark{}; ///< Number of watches used in the current invalidation.
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 812983a99..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
121 return exit_method = ExitMethod::AlwaysReturn; 121 return exit_method = ExitMethod::AlwaysReturn;
122} 122}
123 123
124BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { 124NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
125 BasicBlock basic_block; 125 NodeBlock basic_block;
126 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { 126 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
127 pc = DecodeInstr(basic_block, pc); 127 pc = DecodeInstr(basic_block, pc);
128 } 128 }
129 return basic_block; 129 return basic_block;
130} 130}
131 131
132u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { 132u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
133 // Ignore sched instructions when generating code. 133 // Ignore sched instructions when generating code.
134 if (IsSchedInstruction(pc, main_offset)) { 134 if (IsSchedInstruction(pc, main_offset)) {
135 return pc + 1; 135 return pc + 1;
@@ -151,39 +151,39 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
151 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, 151 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
152 "NeverExecute predicate not implemented"); 152 "NeverExecute predicate not implemented");
153 153
154 static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> 154 static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
155 decoders = { 155 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
156 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, 156 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
157 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, 157 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
158 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, 158 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
159 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, 159 {OpCode::Type::Shift, &ShaderIR::DecodeShift},
160 {OpCode::Type::Shift, &ShaderIR::DecodeShift}, 160 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
161 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, 161 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
162 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, 162 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
163 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, 163 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
164 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, 164 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
165 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
172 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, 172 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
173 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, 173 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
174 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, 174 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
175 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, 175 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
176 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, 176 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
177 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, 177 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
178 {OpCode::Type::Video, &ShaderIR::DecodeVideo}, 178 {OpCode::Type::Video, &ShaderIR::DecodeVideo},
179 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, 179 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
180 }; 180 };
181 181
182 std::vector<Node> tmp_block; 182 std::vector<Node> tmp_block;
183 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { 183 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
184 pc = (this->*decoder->second)(tmp_block, bb, pc); 184 pc = (this->*decoder->second)(tmp_block, pc);
185 } else { 185 } else {
186 pc = DecodeOther(tmp_block, bb, pc); 186 pc = DecodeOther(tmp_block, pc);
187 } 187 }
188 188
189 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally 189 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
@@ -192,11 +192,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
192 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 192 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
193 193
194 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { 194 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
195 bb.push_back( 195 const Node conditional =
196 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); 196 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
197 global_code.push_back(conditional);
198 bb.push_back(conditional);
197 } else { 199 } else {
198 for (auto& node : tmp_block) { 200 for (auto& node : tmp_block) {
199 bb.push_back(std::move(node)); 201 global_code.push_back(node);
202 bb.push_back(node);
200 } 203 }
201 } 204 }
202 205
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 51b8d55d4..3190e2d7c 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14using Tegra::Shader::SubOp; 14using Tegra::Shader::SubOp;
15 15
16u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { 16u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 37eef2bf2..baee89107 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7b4f7d284..c2164ba50 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 4fd3db54e..0d139c0d2 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index cc9a76a19..9fd4b273e 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 15using Tegra::Shader::Pred;
16using Tegra::Shader::Register; 16using Tegra::Shader::Register;
17 17
18u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { 18u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]}; 19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr); 20 const auto opcode = OpCode::Decode(instr);
21 21
@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
41 41
42 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); 42 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
43 43
44 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); 44 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
45 SetRegister(bb, instr.gpr0, value); 45 SetRegister(bb, instr.gpr0, value);
46 break; 46 break;
47 } 47 }
@@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
242 return pc; 242 return pc;
243} 243}
244 244
245void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, 245void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
246 Node imm_lut, bool sets_cc) { 246 Node imm_lut, bool sets_cc) {
247 constexpr u32 lop_iterations = 32; 247 constexpr u32 lop_iterations = 32;
248 const Node one = Immediate(1); 248 const Node one = Immediate(1);
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
284 SetRegister(bb, dest, value); 284 SetRegister(bb, dest, value);
285} 285}
286 286
287} // namespace VideoCommon::Shader \ No newline at end of file 287} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index b26a6e473..3ed5ccc5a 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::Pred;
16using Tegra::Shader::PredicateResultMode; 16using Tegra::Shader::PredicateResultMode;
17using Tegra::Shader::Register; 17using Tegra::Shader::Register;
18 18
19u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 19u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 21 const auto opcode = OpCode::Decode(instr);
22 22
@@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock&
54 return pc; 54 return pc;
55} 55}
56 56
57void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, 57void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
58 Node op_a, Node op_b, PredicateResultMode predicate_mode, 58 Node op_b, PredicateResultMode predicate_mode, Pred predicate,
59 Pred predicate, bool sets_cc) { 59 bool sets_cc) {
60 const Node result = [&]() { 60 const Node result = [&]() {
61 switch (logic_op) { 61 switch (logic_op) {
62 case LogicOperation::And: 62 case LogicOperation::And:
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index 0734141b0..6a95dc928 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index 942d6729d..601d66f1f 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 728a393a1..55a6fbbf2 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14using Tegra::Shader::Register; 14using Tegra::Shader::Register;
15 15
16u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { 16u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
118 118
119 value = [&]() { 119 value = [&]() {
120 switch (instr.conversion.f2i.rounding) { 120 switch (instr.conversion.f2i.rounding) {
121 case Tegra::Shader::F2iRoundingOp::None: 121 case Tegra::Shader::F2iRoundingOp::RoundEven:
122 return value; 122 return Operation(OperationCode::FRoundEven, PRECISE, value);
123 case Tegra::Shader::F2iRoundingOp::Floor: 123 case Tegra::Shader::F2iRoundingOp::Floor:
124 return Operation(OperationCode::FFloor, PRECISE, value); 124 return Operation(OperationCode::FFloor, PRECISE, value);
125 case Tegra::Shader::F2iRoundingOp::Ceil: 125 case Tegra::Shader::F2iRoundingOp::Ceil:
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
146 return pc; 146 return pc;
147} 147}
148 148
149} // namespace VideoCommon::Shader \ No newline at end of file 149} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 52f39d3ff..0559cc8de 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index 9f9da2278..1bd6755dd 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index dd3aef6f2..9285b8d05 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred; 14using Tegra::Shader::Pred;
15 15
16u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 16u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index dfd7cb98f..748368555 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -14,7 +14,7 @@ namespace VideoCommon::Shader {
14using Tegra::Shader::Instruction; 14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode; 15using Tegra::Shader::OpCode;
16 16
17u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { 17u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 53c44ae5a..e68512692 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred; 14using Tegra::Shader::Pred;
15 15
16u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 16u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 43a0a9e10..7a07c5ec6 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;
16using Tegra::Shader::Instruction; 16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode; 17using Tegra::Shader::OpCode;
18 18
19u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { 19u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 21 const auto opcode = OpCode::Decode(instr);
22 22
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 16eb3985f..a3bf17eba 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index daf97174b..aad836d24 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred; 14using Tegra::Shader::Pred;
15 15
16u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 16u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 3dd26da20..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,26 +17,8 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
41 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
42 24
@@ -48,7 +30,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
48 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, 30 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
49 "Unaligned attribute loads are not supported"); 31 "Unaligned attribute loads are not supported");
50 32
51 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, 33 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
52 Tegra::Shader::IpaSampleMode::Default}; 34 Tegra::Shader::IpaSampleMode::Default};
53 35
54 u64 next_element = instr.attribute.fmt20.element; 36 u64 next_element = instr.attribute.fmt20.element;
@@ -160,7 +142,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
160 }(); 142 }();
161 143
162 const Node addr_register = GetRegister(instr.gpr8); 144 const Node addr_register = GetRegister(instr.gpr8);
163 const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); 145 const Node base_address =
146 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
164 const auto cbuf = std::get_if<CbufNode>(base_address); 147 const auto cbuf = std::get_if<CbufNode>(base_address);
165 ASSERT(cbuf != nullptr); 148 ASSERT(cbuf != nullptr);
166 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); 149 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
@@ -246,197 +229,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
246 } 229 }
247 break; 230 break;
248 } 231 }
249 case OpCode::Id::TEX: {
250 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
251 "AOFFI is not implemented");
252
253 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
254 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
255 }
256
257 const TextureType texture_type{instr.tex.texture_type};
258 const bool is_array = instr.tex.array != 0;
259 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
260 const auto process_mode = instr.tex.GetTextureProcessMode();
261 WriteTexInstructionFloat(
262 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
263 break;
264 }
265 case OpCode::Id::TEXS: {
266 const TextureType texture_type{instr.texs.GetTextureType()};
267 const bool is_array{instr.texs.IsArrayTexture()};
268 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
269 const auto process_mode = instr.texs.GetTextureProcessMode();
270
271 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
272 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
273 }
274
275 const Node4 components =
276 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
277
278 if (instr.texs.fp32_flag) {
279 WriteTexsInstructionFloat(bb, instr, components);
280 } else {
281 WriteTexsInstructionHalfFloat(bb, instr, components);
282 }
283 break;
284 }
285 case OpCode::Id::TLD4: {
286 ASSERT(instr.tld4.array == 0);
287 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
288 "AOFFI is not implemented");
289 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
290 "NDV is not implemented");
291 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
292 "PTP is not implemented");
293
294 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
295 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
296 }
297
298 const auto texture_type = instr.tld4.texture_type.Value();
299 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
300 const bool is_array = instr.tld4.array != 0;
301 WriteTexInstructionFloat(bb, instr,
302 GetTld4Code(instr, texture_type, depth_compare, is_array));
303 break;
304 }
305 case OpCode::Id::TLD4S: {
306 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
307 "AOFFI is not implemented");
308
309 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
310 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
311 }
312
313 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
314 const Node op_a = GetRegister(instr.gpr8);
315 const Node op_b = GetRegister(instr.gpr20);
316
317 std::vector<Node> coords;
318
319 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
320 if (depth_compare) {
321 // Note: TLD4S coordinate encoding works just like TEXS's
322 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
323 coords.push_back(op_a);
324 coords.push_back(op_y);
325 coords.push_back(op_b);
326 } else {
327 coords.push_back(op_a);
328 coords.push_back(op_b);
329 }
330 const auto num_coords = static_cast<u32>(coords.size());
331 coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
332
333 const auto& sampler =
334 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
335
336 Node4 values;
337 for (u32 element = 0; element < values.size(); ++element) {
338 auto params = coords;
339 MetaTexture meta{sampler, element, num_coords};
340 values[element] =
341 Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
342 }
343
344 WriteTexsInstructionFloat(bb, instr, values);
345 break;
346 }
347 case OpCode::Id::TXQ: {
348 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
349 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
350 }
351
352 // TODO: The new commits on the texture refactor, change the way samplers work.
353 // Sadly, not all texture instructions specify the type of texture their sampler
354 // uses. This must be fixed at a later instance.
355 const auto& sampler =
356 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
357
358 u32 indexer = 0;
359 switch (instr.txq.query_type) {
360 case Tegra::Shader::TextureQueryType::Dimension: {
361 for (u32 element = 0; element < 4; ++element) {
362 if (instr.txq.IsComponentEnabled(element)) {
363 MetaTexture meta{sampler, element};
364 const Node value = Operation(OperationCode::F4TextureQueryDimensions,
365 std::move(meta), GetRegister(instr.gpr8));
366 SetTemporal(bb, indexer++, value);
367 }
368 }
369 for (u32 i = 0; i < indexer; ++i) {
370 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
371 }
372 break;
373 }
374 default:
375 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
376 static_cast<u32>(instr.txq.query_type.Value()));
377 }
378 break;
379 }
380 case OpCode::Id::TMML: {
381 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
382 "NDV is not implemented");
383
384 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
385 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
386 }
387
388 auto texture_type = instr.tmml.texture_type.Value();
389 const bool is_array = instr.tmml.array != 0;
390 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
391
392 std::vector<Node> coords;
393
394 // TODO: Add coordinates for different samplers once other texture types are implemented.
395 switch (texture_type) {
396 case TextureType::Texture1D:
397 coords.push_back(GetRegister(instr.gpr8));
398 break;
399 case TextureType::Texture2D:
400 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
401 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
402 break;
403 default:
404 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
405
406 // Fallback to interpreting as a 2D texture for now
407 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
408 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
409 texture_type = TextureType::Texture2D;
410 }
411
412 for (u32 element = 0; element < 2; ++element) {
413 auto params = coords;
414 MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
415 const Node value =
416 Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
417 SetTemporal(bb, element, value);
418 }
419 for (u32 element = 0; element < 2; ++element) {
420 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
421 }
422
423 break;
424 }
425 case OpCode::Id::TLDS: {
426 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
427 const bool is_array{instr.tlds.IsArrayTexture()};
428
429 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
430 "AOFFI is not implemented");
431 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
432
433 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
434 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
435 }
436
437 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
438 break;
439 }
440 default: 232 default:
441 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
442 } 234 }
@@ -444,328 +236,4 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
444 return pc; 236 return pc;
445} 237}
446 238
447const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
448 bool is_array, bool is_shadow) {
449 const auto offset = static_cast<std::size_t>(sampler.index.Value());
450
451 // If this sampler has already been used, return the existing mapping.
452 const auto itr =
453 std::find_if(used_samplers.begin(), used_samplers.end(),
454 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
455 if (itr != used_samplers.end()) {
456 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
457 itr->IsShadow() == is_shadow);
458 return *itr;
459 }
460
461 // Otherwise create a new mapping for this sampler
462 const std::size_t next_index = used_samplers.size();
463 const Sampler entry{offset, next_index, type, is_array, is_shadow};
464 return *used_samplers.emplace(entry).first;
465}
466
467void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
468 const Node4& components) {
469 u32 dest_elem = 0;
470 for (u32 elem = 0; elem < 4; ++elem) {
471 if (!instr.tex.IsComponentEnabled(elem)) {
472 // Skip disabled components
473 continue;
474 }
475 SetTemporal(bb, dest_elem++, components[elem]);
476 }
477 // After writing values in temporals, move them to the real registers
478 for (u32 i = 0; i < dest_elem; ++i) {
479 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
480 }
481}
482
483void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
484 const Node4& components) {
485 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
486 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
487
488 u32 dest_elem = 0;
489 for (u32 component = 0; component < 4; ++component) {
490 if (!instr.texs.IsComponentEnabled(component))
491 continue;
492 SetTemporal(bb, dest_elem++, components[component]);
493 }
494
495 for (u32 i = 0; i < dest_elem; ++i) {
496 if (i < 2) {
497 // Write the first two swizzle components to gpr0 and gpr0+1
498 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
499 } else {
500 ASSERT(instr.texs.HasTwoDestinations());
501 // Write the rest of the swizzle components to gpr28 and gpr28+1
502 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
503 }
504 }
505}
506
507void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
508 const Node4& components) {
509 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
510 // float instruction).
511
512 Node4 values;
513 u32 dest_elem = 0;
514 for (u32 component = 0; component < 4; ++component) {
515 if (!instr.texs.IsComponentEnabled(component))
516 continue;
517 values[dest_elem++] = components[component];
518 }
519 if (dest_elem == 0)
520 return;
521
522 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
523
524 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
525 if (dest_elem <= 2) {
526 SetRegister(bb, instr.gpr0, first_value);
527 return;
528 }
529
530 SetTemporal(bb, 0, first_value);
531 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
532
533 SetRegister(bb, instr.gpr0, GetTemporal(0));
534 SetRegister(bb, instr.gpr28, GetTemporal(1));
535}
536
537Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
538 TextureProcessMode process_mode, bool depth_compare, bool is_array,
539 std::size_t array_offset, std::size_t bias_offset,
540 std::vector<Node>&& coords) {
541 UNIMPLEMENTED_IF_MSG(
542 (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
543 (texture_type == TextureType::TextureCube && is_array && depth_compare),
544 "This method is not supported.");
545
546 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
547
548 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
549 process_mode == TextureProcessMode::LL ||
550 process_mode == TextureProcessMode::LLA;
551
552 // LOD selection (either via bias or explicit textureLod) not supported in GL for
553 // sampler2DArrayShadow and samplerCubeArrayShadow.
554 const bool gl_lod_supported =
555 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
556 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
557
558 const OperationCode read_method =
559 lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
560
561 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
562
563 std::optional<u32> array_offset_value;
564 if (is_array)
565 array_offset_value = static_cast<u32>(array_offset);
566
567 const auto coords_count = static_cast<u32>(coords.size());
568
569 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
570 if (process_mode == TextureProcessMode::LZ) {
571 coords.push_back(Immediate(0.0f));
572 } else {
573 // If present, lod or bias are always stored in the register indexed by the gpr20
574 // field with an offset depending on the usage of the other registers
575 coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
576 }
577 }
578
579 Node4 values;
580 for (u32 element = 0; element < values.size(); ++element) {
581 auto params = coords;
582 MetaTexture meta{sampler, element, coords_count, array_offset_value};
583 values[element] = Operation(read_method, std::move(meta), std::move(params));
584 }
585
586 return values;
587}
588
589Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
590 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
591 const bool lod_bias_enabled =
592 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
593
594 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
595 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
596 // If enabled arrays index is always stored in the gpr8 field
597 const u64 array_register = instr.gpr8.Value();
598 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
599 const u64 coord_register = array_register + (is_array ? 1 : 0);
600
601 std::vector<Node> coords;
602 for (std::size_t i = 0; i < coord_count; ++i) {
603 coords.push_back(GetRegister(coord_register + i));
604 }
605 // 1D.DC in opengl the 2nd component is ignored.
606 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
607 coords.push_back(Immediate(0.0f));
608 }
609 std::size_t array_offset{};
610 if (is_array) {
611 array_offset = coords.size();
612 coords.push_back(GetRegister(array_register));
613 }
614 if (depth_compare) {
615 // Depth is always stored in the register signaled by gpr20
616 // or in the next register if lod or bias are used
617 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
618 coords.push_back(GetRegister(depth_register));
619 }
620 // Fill ignored coordinates
621 while (coords.size() < total_coord_count) {
622 coords.push_back(Immediate(0));
623 }
624
625 return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
626 0, std::move(coords));
627}
628
629Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
630 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
631 const bool lod_bias_enabled =
632 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
633
634 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
635 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
636 // If enabled arrays index is always stored in the gpr8 field
637 const u64 array_register = instr.gpr8.Value();
638 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
639 const u64 coord_register = array_register + (is_array ? 1 : 0);
640 const u64 last_coord_register =
641 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
642 ? static_cast<u64>(instr.gpr20.Value())
643 : coord_register + 1;
644
645 std::vector<Node> coords;
646 for (std::size_t i = 0; i < coord_count; ++i) {
647 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
648 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
649 }
650
651 std::size_t array_offset{};
652 if (is_array) {
653 array_offset = coords.size();
654 coords.push_back(GetRegister(array_register));
655 }
656 if (depth_compare) {
657 // Depth is always stored in the register signaled by gpr20
658 // or in the next register if lod or bias are used
659 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
660 coords.push_back(GetRegister(depth_register));
661 }
662 // Fill ignored coordinates
663 while (coords.size() < total_coord_count) {
664 coords.push_back(Immediate(0));
665 }
666
667 return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
668 (coord_count > 2 ? 1 : 0), std::move(coords));
669}
670
671Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
672 bool is_array) {
673 const std::size_t coord_count = GetCoordCount(texture_type);
674 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
675 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
676
677 // If enabled arrays index is always stored in the gpr8 field
678 const u64 array_register = instr.gpr8.Value();
679 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
680 const u64 coord_register = array_register + (is_array ? 1 : 0);
681
682 std::vector<Node> coords;
683
684 for (size_t i = 0; i < coord_count; ++i) {
685 coords.push_back(GetRegister(coord_register + i));
686 }
687 std::optional<u32> array_offset;
688 if (is_array) {
689 array_offset = static_cast<u32>(coords.size());
690 coords.push_back(GetRegister(array_register));
691 }
692
693 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
694
695 Node4 values;
696 for (u32 element = 0; element < values.size(); ++element) {
697 auto params = coords;
698 MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
699 values[element] =
700 Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
701 }
702
703 return values;
704}
705
706Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
707 const std::size_t type_coord_count = GetCoordCount(texture_type);
708 const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
709 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
710
711 // If enabled arrays index is always stored in the gpr8 field
712 const u64 array_register = instr.gpr8.Value();
713 // if is array gpr20 is used
714 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
715
716 const u64 last_coord_register =
717 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
718 ? static_cast<u64>(instr.gpr20.Value())
719 : coord_register + 1;
720
721 std::vector<Node> coords;
722
723 for (std::size_t i = 0; i < type_coord_count; ++i) {
724 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
725 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
726 }
727 std::optional<u32> array_offset;
728 if (is_array) {
729 array_offset = static_cast<u32>(coords.size());
730 coords.push_back(GetRegister(array_register));
731 }
732 const auto coords_count = static_cast<u32>(coords.size());
733
734 if (lod_enabled) {
735 // When lod is used always is in grp20
736 coords.push_back(GetRegister(instr.gpr20));
737 } else {
738 coords.push_back(Immediate(0));
739 }
740
741 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
742
743 Node4 values;
744 for (u32 element = 0; element < values.size(); ++element) {
745 auto params = coords;
746 MetaTexture meta{sampler, element, coords_count, array_offset};
747 values[element] =
748 Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
749 }
750 return values;
751}
752
753std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
754 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
755 std::size_t max_coords, std::size_t max_inputs) {
756 const std::size_t coord_count = GetCoordCount(texture_type);
757
758 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
759 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
760 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
761 UNIMPLEMENTED_MSG("Unsupported Texture operation");
762 total_coord_count = std::min(total_coord_count, max_coords);
763 }
764 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
765 total_coord_count +=
766 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
767
768 return {coord_count, total_coord_count};
769}
770
771} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index c1e5f4efb..d750a2936 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::Register; 15using Tegra::Shader::Register;
16 16
17u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { 17u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
135 instr.ipa.sample_mode.Value()}; 135 instr.ipa.sample_mode.Value()};
136 136
137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); 137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
138 const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); 138 Node value = attr;
139 const Tegra::Shader::Attribute::Index index = attribute.index.Value();
140 if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
141 index <= Tegra::Shader::Attribute::Index::Attribute_31) {
142 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
143 // In theory by setting them as perspective, OpenGL does the perspective correction.
144 // A way must figured to reverse the last step of it.
145 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
146 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
147 }
148 }
149 value = GetSaturatedFloat(value, instr.ipa.saturate);
139 150
140 SetRegister(bb, instr.gpr0, value); 151 SetRegister(bb, instr.gpr0, value);
141 break; 152 break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
175 return pc; 186 return pc;
176} 187}
177 188
178} // namespace VideoCommon::Shader \ No newline at end of file 189} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
index 1717f0653..83c61680e 100644
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14using Tegra::Shader::Pred; 14using Tegra::Shader::Pred;
15 15
16u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 16u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 8bd15fb00..d0495995d 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index bdb4424a6..f070e8912 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 6623f8ff9..951e85f44 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a99ae19bf
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,534 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) {
329 const bool is_array = array;
330 const bool is_shadow = depth_compare;
331
332 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
333 (texture_type == TextureType::TextureCube && is_array && is_shadow),
334 "This method is not supported.");
335
336 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
337
338 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
339 process_mode == TextureProcessMode::LL ||
340 process_mode == TextureProcessMode::LLA;
341
342 // LOD selection (either via bias or explicit textureLod) not supported in GL for
343 // sampler2DArrayShadow and samplerCubeArrayShadow.
344 const bool gl_lod_supported =
345 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
346 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
347
348 const OperationCode read_method =
349 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
350
351 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
352
353 Node bias = {};
354 Node lod = {};
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 switch (process_mode) {
357 case TextureProcessMode::LZ:
358 lod = Immediate(0.0f);
359 break;
360 case TextureProcessMode::LB:
361 // If present, lod or bias are always stored in the register indexed by the gpr20
362 // field with an offset depending on the usage of the other registers
363 bias = GetRegister(instr.gpr20.Value() + bias_offset);
364 break;
365 case TextureProcessMode::LL:
366 lod = GetRegister(instr.gpr20.Value() + bias_offset);
367 break;
368 default:
369 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
370 break;
371 }
372 }
373
374 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 }
380
381 return values;
382}
383
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
386 const bool lod_bias_enabled =
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
388
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
391 // If enabled arrays index is always stored in the gpr8 field
392 const u64 array_register = instr.gpr8.Value();
393 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
394 const u64 coord_register = array_register + (is_array ? 1 : 0);
395
396 std::vector<Node> coords;
397 for (std::size_t i = 0; i < coord_count; ++i) {
398 coords.push_back(GetRegister(coord_register + i));
399 }
400 // 1D.DC in OpenGL the 2nd component is ignored.
401 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
402 coords.push_back(Immediate(0.0f));
403 }
404
405 const Node array = is_array ? GetRegister(array_register) : nullptr;
406
407 Node dc{};
408 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
412 dc = GetRegister(depth_register);
413 }
414
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
416}
417
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
419 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
420 const bool lod_bias_enabled =
421 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
422
423 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
424 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
425 // If enabled arrays index is always stored in the gpr8 field
426 const u64 array_register = instr.gpr8.Value();
427 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
428 const u64 coord_register = array_register + (is_array ? 1 : 0);
429 const u64 last_coord_register =
430 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
431 ? static_cast<u64>(instr.gpr20.Value())
432 : coord_register + 1;
433 const u32 bias_offset = coord_count > 2 ? 1 : 0;
434
435 std::vector<Node> coords;
436 for (std::size_t i = 0; i < coord_count; ++i) {
437 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
438 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
439 }
440
441 const Node array = is_array ? GetRegister(array_register) : nullptr;
442
443 Node dc{};
444 if (depth_compare) {
445 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
446 // or bias are used
447 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
448 dc = GetRegister(depth_register);
449 }
450
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
452}
453
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) {
456 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
459
460 // If enabled arrays index is always stored in the gpr8 field
461 const u64 array_register = instr.gpr8.Value();
462 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
463 const u64 coord_register = array_register + (is_array ? 1 : 0);
464
465 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i)
467 coords.push_back(GetRegister(coord_register + i));
468
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470
471 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 }
477
478 return values;
479}
480
481Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
482 const std::size_t type_coord_count = GetCoordCount(texture_type);
483 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
484
485 // If enabled arrays index is always stored in the gpr8 field
486 const u64 array_register = instr.gpr8.Value();
487 // if is array gpr20 is used
488 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
489
490 const u64 last_coord_register =
491 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
492 ? static_cast<u64>(instr.gpr20.Value())
493 : coord_register + 1;
494
495 std::vector<Node> coords;
496 for (std::size_t i = 0; i < type_coord_count; ++i) {
497 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
498 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
499 }
500
501 const Node array = is_array ? GetRegister(array_register) : nullptr;
502 // When lod is used always is in gpr20
503 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
504
505 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
506
507 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 }
513 return values;
514}
515
516std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
517 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
518 std::size_t max_coords, std::size_t max_inputs) {
519 const std::size_t coord_count = GetCoordCount(texture_type);
520
521 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
522 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
523 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
524 UNIMPLEMENTED_MSG("Unsupported Texture operation");
525 total_coord_count = std::min(total_coord_count, max_coords);
526 }
527 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
528 total_coord_count +=
529 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
530
531 return {coord_count, total_coord_count};
532}
533
534} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index c3432356d..956c01d9b 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::Pred;
15using Tegra::Shader::VideoType; 15using Tegra::Shader::VideoType;
16using Tegra::Shader::VmadShr; 16using Tegra::Shader::VmadShr;
17 17
18u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { 18u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]}; 19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr); 20 const auto opcode = OpCode::Decode(instr);
21 21
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 9cb864500..c34843307 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
12using Tegra::Shader::Instruction; 12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 13using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { 15u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index d7747103e..ac5112d78 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -337,27 +337,27 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
337 } 337 }
338} 338}
339 339
340void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { 340void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
341 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); 341 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
342} 342}
343 343
344void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { 344void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
345 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); 345 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
346} 346}
347 347
348void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { 348void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
349 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); 349 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
350} 350}
351 351
352void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { 352void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
353 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); 353 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
354} 354}
355 355
356void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { 356void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
357 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 357 SetRegister(bb, Register::ZeroIndex + 1 + id, value);
358} 358}
359 359
360void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { 360void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
361 if (!sets_cc) { 361 if (!sets_cc) {
362 return; 362 return;
363 } 363 }
@@ -366,7 +366,7 @@ void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_c
366 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 366 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
367} 367}
368 368
369void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { 369void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
370 if (!sets_cc) { 370 if (!sets_cc) {
371 return; 371 return;
372 } 372 }
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6e42e3dfb..5bc3a3900 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -39,7 +39,7 @@ using NodeData =
39 PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; 39 PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
40using Node = const NodeData*; 40using Node = const NodeData*;
41using Node4 = std::array<Node, 4>; 41using Node4 = std::array<Node, 4>;
42using BasicBlock = std::vector<Node>; 42using NodeBlock = std::vector<Node>;
43 43
44constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; 44constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
45 45
@@ -156,12 +156,12 @@ enum class OperationCode {
156 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 156 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
157 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 157 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
158 158
159 F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 159 Texture, /// (MetaTexture, float[N] coords) -> float4
160 F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 160 TextureLod, /// (MetaTexture, float[N] coords) -> float4
161 F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 161 TextureGather, /// (MetaTexture, float[N] coords) -> float4
162 F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 162 TextureQueryDimensions, /// (MetaTexture, float a) -> float4
163 F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 163 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
164 F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 164 TexelFetch, /// (MetaTexture, int[N], int) -> float4
165 165
166 Branch, /// (uint branch_target) -> void 166 Branch, /// (uint branch_target) -> void
167 PushFlowStack, /// (uint branch_target) -> void 167 PushFlowStack, /// (uint branch_target) -> void
@@ -236,6 +236,11 @@ private:
236 236
237class ConstBuffer { 237class ConstBuffer {
238public: 238public:
239 explicit ConstBuffer(u32 max_offset, bool is_indirect)
240 : max_offset{max_offset}, is_indirect{is_indirect} {}
241
242 ConstBuffer() = default;
243
239 void MarkAsUsed(u64 offset) { 244 void MarkAsUsed(u64 offset) {
240 max_offset = std::max(max_offset, static_cast<u32>(offset)); 245 max_offset = std::max(max_offset, static_cast<u32>(offset));
241 } 246 }
@@ -252,6 +257,10 @@ public:
252 return max_offset + sizeof(float); 257 return max_offset + sizeof(float);
253 } 258 }
254 259
260 u32 GetMaxOffset() const {
261 return max_offset;
262 }
263
255private: 264private:
256 u32 max_offset{}; 265 u32 max_offset{};
257 bool is_indirect{}; 266 bool is_indirect{};
@@ -279,9 +288,12 @@ struct MetaHalfArithmetic {
279 288
280struct MetaTexture { 289struct MetaTexture {
281 const Sampler& sampler; 290 const Sampler& sampler;
291 Node array{};
292 Node depth_compare{};
293 Node bias{};
294 Node lod{};
295 Node component{};
282 u32 element{}; 296 u32 element{};
283 u32 coords_count{};
284 std::optional<u32> array_index;
285}; 297};
286 298
287constexpr MetaArithmetic PRECISE = {true}; 299constexpr MetaArithmetic PRECISE = {true};
@@ -530,7 +542,7 @@ public:
530 Decode(); 542 Decode();
531 } 543 }
532 544
533 const std::map<u32, BasicBlock>& GetBasicBlocks() const { 545 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
534 return basic_blocks; 546 return basic_blocks;
535 } 547 }
536 548
@@ -581,7 +593,7 @@ private:
581 593
582 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); 594 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
583 595
584 BasicBlock DecodeRange(u32 begin, u32 end); 596 NodeBlock DecodeRange(u32 begin, u32 end);
585 597
586 /** 598 /**
587 * Decodes a single instruction from Tegra to IR. 599 * Decodes a single instruction from Tegra to IR.
@@ -589,33 +601,34 @@ private:
589 * @param pc Program counter. Offset to decode. 601 * @param pc Program counter. Offset to decode.
590 * @return Next address to decode. 602 * @return Next address to decode.
591 */ 603 */
592 u32 DecodeInstr(BasicBlock& bb, u32 pc); 604 u32 DecodeInstr(NodeBlock& bb, u32 pc);
593 605
594 u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); 606 u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
595 u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); 607 u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
596 u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); 608 u32 DecodeBfe(NodeBlock& bb, u32 pc);
597 u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); 609 u32 DecodeBfi(NodeBlock& bb, u32 pc);
598 u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); 610 u32 DecodeShift(NodeBlock& bb, u32 pc);
599 u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); 611 u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
600 u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); 612 u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
601 u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); 613 u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
602 u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); 614 u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
603 u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); 615 u32 DecodeFfma(NodeBlock& bb, u32 pc);
604 u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); 616 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
605 u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); 617 u32 DecodeConversion(NodeBlock& bb, u32 pc);
606 u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); 618 u32 DecodeMemory(NodeBlock& bb, u32 pc);
607 u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); 619 u32 DecodeTexture(NodeBlock& bb, u32 pc);
608 u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); 620 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
609 u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); 621 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
610 u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); 622 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
611 u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); 623 u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
612 u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); 624 u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
613 u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); 625 u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
614 u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); 626 u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
615 u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); 627 u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
616 u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); 628 u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
617 u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); 629 u32 DecodeVideo(NodeBlock& bb, u32 pc);
618 u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); 630 u32 DecodeXmad(NodeBlock& bb, u32 pc);
631 u32 DecodeOther(NodeBlock& bb, u32 pc);
619 632
620 /// Internalizes node's data and returns a managed pointer to a clone of that node 633 /// Internalizes node's data and returns a managed pointer to a clone of that node
621 Node StoreNode(NodeData&& node_data); 634 Node StoreNode(NodeData&& node_data);
@@ -664,20 +677,20 @@ private:
664 Node GetTemporal(u32 id); 677 Node GetTemporal(u32 id);
665 678
666 /// Sets a register. src value must be a number-evaluated node. 679 /// Sets a register. src value must be a number-evaluated node.
667 void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); 680 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
668 /// Sets a predicate. src value must be a bool-evaluated node 681 /// Sets a predicate. src value must be a bool-evaluated node
669 void SetPredicate(BasicBlock& bb, u64 dest, Node src); 682 void SetPredicate(NodeBlock& bb, u64 dest, Node src);
670 /// Sets an internal flag. src value must be a bool-evaluated node 683 /// Sets an internal flag. src value must be a bool-evaluated node
671 void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); 684 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
672 /// Sets a local memory address. address and value must be a number-evaluated node 685 /// Sets a local memory address. address and value must be a number-evaluated node
673 void SetLocalMemory(BasicBlock& bb, Node address, Node value); 686 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
674 /// Sets a temporal. Internally it uses a post-RZ register 687 /// Sets a temporal. Internally it uses a post-RZ register
675 void SetTemporal(BasicBlock& bb, u32 id, Node value); 688 void SetTemporal(NodeBlock& bb, u32 id, Node value);
676 689
677 /// Sets internal flags from a float 690 /// Sets internal flags from a float
678 void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); 691 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
679 /// Sets internal flags from an integer 692 /// Sets internal flags from an integer
680 void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); 693 void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
681 694
682 /// Conditionally absolute/negated float. Absolute is applied first 695 /// Conditionally absolute/negated float. Absolute is applied first
683 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); 696 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
@@ -718,12 +731,12 @@ private:
718 /// Extracts a sequence of bits from a node 731 /// Extracts a sequence of bits from a node
719 Node BitfieldExtract(Node value, u32 offset, u32 bits); 732 Node BitfieldExtract(Node value, u32 offset, u32 bits);
720 733
721 void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, 734 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
722 const Node4& components); 735 const Node4& components);
723 736
724 void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, 737 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
725 const Node4& components); 738 const Node4& components);
726 void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, 739 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
727 const Node4& components); 740 const Node4& components);
728 741
729 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 742 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
@@ -745,23 +758,22 @@ private:
745 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); 758 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
746 759
747 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 760 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
748 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 761 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
749 bool is_array, std::size_t array_offset, std::size_t bias_offset, 762 Node array, Node depth_compare, u32 bias_offset);
750 std::vector<Node>&& coords);
751 763
752 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, 764 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
753 u64 byte_height); 765 u64 byte_height);
754 766
755 void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, 767 void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
756 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, 768 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
757 Tegra::Shader::PredicateResultMode predicate_mode, 769 Tegra::Shader::PredicateResultMode predicate_mode,
758 Tegra::Shader::Pred predicate, bool sets_cc); 770 Tegra::Shader::Pred predicate, bool sets_cc);
759 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 771 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
760 Node op_c, Node imm_lut, bool sets_cc); 772 Node op_c, Node imm_lut, bool sets_cc);
761 773
762 Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); 774 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
763 775
764 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); 776 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
765 777
766 template <typename... T> 778 template <typename... T>
767 Node Operation(OperationCode code, const T*... operands) { 779 Node Operation(OperationCode code, const T*... operands) {
@@ -803,7 +815,8 @@ private:
803 u32 coverage_end{}; 815 u32 coverage_end{};
804 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; 816 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
805 817
806 std::map<u32, BasicBlock> basic_blocks; 818 std::map<u32, NodeBlock> basic_blocks;
819 NodeBlock global_code;
807 820
808 std::vector<std::unique_ptr<NodeData>> stored_nodes; 821 std::vector<std::unique_ptr<NodeData>> stored_nodes;
809 822
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index d6d29ee9f..33b071747 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -11,7 +11,7 @@
11namespace VideoCommon::Shader { 11namespace VideoCommon::Shader {
12 12
13namespace { 13namespace {
14std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, 14std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
15 OperationCode operation_code) { 15 OperationCode operation_code) {
16 for (; cursor >= 0; --cursor) { 16 for (; cursor >= 0; --cursor) {
17 const Node node = code[cursor]; 17 const Node node = code[cursor];
@@ -19,12 +19,19 @@ std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
19 if (operation->GetCode() == operation_code) 19 if (operation->GetCode() == operation_code)
20 return {node, cursor}; 20 return {node, cursor};
21 } 21 }
22 if (const auto conditional = std::get_if<ConditionalNode>(node)) {
23 const auto& conditional_code = conditional->GetCode();
24 const auto [found, internal_cursor] = FindOperation(
25 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
26 if (found)
27 return {found, cursor};
28 }
22 } 29 }
23 return {}; 30 return {};
24} 31}
25} // namespace 32} // namespace
26 33
27Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { 34Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
28 if (const auto cbuf = std::get_if<CbufNode>(tracked)) { 35 if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
29 // Cbuf found, but it has to be immediate 36 // Cbuf found, but it has to be immediate
30 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; 37 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -50,10 +57,14 @@ Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
50 } 57 }
51 return nullptr; 58 return nullptr;
52 } 59 }
60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
61 const auto& conditional_code = conditional->GetCode();
62 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
63 }
53 return nullptr; 64 return nullptr;
54} 65}
55 66
56std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, 67std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
57 s64 cursor) { 68 s64 cursor) {
58 for (; cursor >= 0; --cursor) { 69 for (; cursor >= 0; --cursor) {
59 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); 70 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 2f6612a35..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
89 89
90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
91 switch (format) { 91 switch (format) {
92 // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
93 // gamma.
94 case Tegra::RenderTargetFormat::RGBA8_SRGB: 92 case Tegra::RenderTargetFormat::RGBA8_SRGB:
95 return PixelFormat::RGBA8_SRGB; 93 return PixelFormat::RGBA8_SRGB;
96 case Tegra::RenderTargetFormat::RGBA8_UNORM: 94 case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -426,6 +424,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
426 switch (format) { 424 switch (format) {
427 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 425 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
428 return PixelFormat::ABGR8U; 426 return PixelFormat::ABGR8U;
427 case Tegra::FramebufferConfig::PixelFormat::BGRA8:
428 return PixelFormat::BGRA8;
429 default: 429 default:
430 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 430 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
431 UNREACHABLE(); 431 UNREACHABLE();
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/textures/astc.h"
14#include "video_core/textures/convert.h"
15
16namespace Tegra::Texture {
17
18using VideoCore::Surface::PixelFormat;
19
20template <bool reverse>
21void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
22 union S8Z24 {
23 BitField<0, 24, u32> z24;
24 BitField<24, 8, u32> s8;
25 };
26 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
27
28 union Z24S8 {
29 BitField<0, 8, u32> s8;
30 BitField<8, 24, u32> z24;
31 };
32 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
33
34 S8Z24 s8z24_pixel{};
35 Z24S8 z24s8_pixel{};
36 constexpr auto bpp{
37 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
38 for (std::size_t y = 0; y < height; ++y) {
39 for (std::size_t x = 0; x < width; ++x) {
40 const std::size_t offset{bpp * (y * width + x)};
41 if constexpr (reverse) {
42 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
43 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
44 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
45 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
46 } else {
47 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
48 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
49 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
50 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
51 }
52 }
53 }
54}
55
56static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
57 SwapS8Z24ToZ24S8<false>(data, width, height);
58}
59
60static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
61 SwapS8Z24ToZ24S8<true>(data, width, height);
62}
63
64void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
65 bool convert_astc, bool convert_s8z24) {
66 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
67 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
68 u32 block_width{};
69 u32 block_height{};
70 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
71 const std::vector<u8> rgba8_data =
72 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
73 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
74
75 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
76 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
77 }
78}
79
80void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
81 bool convert_astc, bool convert_s8z24) {
82 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
83 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
84 static_cast<u32>(pixel_format));
85 UNREACHABLE();
86
87 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
88 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
89 }
90}
91
92} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace Tegra::Texture {
11
12void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
13 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
14
15void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "core/memory.h"
10#include "video_core/gpu.h" 9#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -103,8 +102,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 102 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 103 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 104 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 105 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 106 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 107 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 108 }
110 pixel_base += stride_x; 109 pixel_base += stride_x;
@@ -154,7 +153,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 153 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 154 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 155 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 156 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 157 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 158 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 159 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
230 } 229 }
231} 230}
232 231
233void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 232void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
234 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 233 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
235 u32 block_depth, u32 width_spacing) { 234 u32 block_depth, u32 width_spacing) {
236 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 235 CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
237 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 236 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
238 bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, 237 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
239 block_height, block_depth, width_spacing); 238 width_spacing);
240} 239}
241 240
242std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 241std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
243 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 242 u32 width, u32 height, u32 depth, u32 block_height,
244 u32 block_height, u32 block_depth, u32 width_spacing) { 243 u32 block_depth, u32 width_spacing) {
245 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 244 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
246 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 245 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
247 width, height, depth, block_height, block_depth, width_spacing); 246 width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
249} 248}
250 249
251void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 250void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
252 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 251 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
253 u32 block_height) {
254 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 252 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
255 gob_size_x}; 253 gob_size_x};
256 for (u32 line = 0; line < subrect_height; ++line) { 254 for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
262 const u32 gob_address = 260 const u32 gob_address =
263 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 261 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
264 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 262 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
265 const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 263 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
266 const VAddr dest_addr = swizzled_data + swizzled_offset; 264 u8* dest_addr = swizzled_data + swizzled_offset;
267 265
268 Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); 266 std::memcpy(dest_addr, source_line, bytes_per_pixel);
269 } 267 }
270 } 268 }
271} 269}
272 270
273void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 271void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
274 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 272 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
275 u32 block_height, u32 offset_x, u32 offset_y) { 273 u32 offset_x, u32 offset_y) {
276 for (u32 line = 0; line < subrect_height; ++line) { 274 for (u32 line = 0; line < subrect_height; ++line) {
277 const u32 y2 = line + offset_y; 275 const u32 y2 = line + offset_y;
278 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + 276 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
282 const u32 x2 = (x + offset_x) * bytes_per_pixel; 280 const u32 x2 = (x + offset_x) * bytes_per_pixel;
283 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; 281 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
284 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; 282 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
285 const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; 283 u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
286 const VAddr source_addr = swizzled_data + swizzled_offset; 284 u8* source_addr = swizzled_data + swizzled_offset;
287 285
288 Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); 286 std::memcpy(dest_line, source_addr, bytes_per_pixel);
289 } 287 }
290 } 288 }
291} 289}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,18 +16,15 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format. 20void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */ 26std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 27 u32 width, u32 height, u32 depth,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
32 u32 block_depth = TICEntry::DefaultBlockHeight, 29 u32 block_depth = TICEntry::DefaultBlockHeight,
33 u32 width_spacing = 0); 30 u32 width_spacing = 0);
@@ -37,25 +34,21 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
52/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
55 u32 block_height); 48
56/// Copies a tiled subrectangle into a linear surface. 49/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
59 u32 block_height, u32 offset_x, u32 offset_y); 52 u32 offset_x, u32 offset_y);
60 53
61} // namespace Tegra::Texture 54} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e7c78bee2..93ecc6e31 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
@@ -182,7 +183,7 @@ struct TICEntry {
182 }; 183 };
183 union { 184 union {
184 BitField<0, 16, u32> height_minus_1; 185 BitField<0, 16, u32> height_minus_1;
185 BitField<16, 15, u32> depth_minus_1; 186 BitField<16, 14, u32> depth_minus_1;
186 }; 187 };
187 union { 188 union {
188 BitField<6, 13, u32> mip_lod_bias; 189 BitField<6, 13, u32> mip_lod_bias;
@@ -282,34 +283,62 @@ enum class TextureMipmapFilter : u32 {
282 283
283struct TSCEntry { 284struct TSCEntry {
284 union { 285 union {
285 BitField<0, 3, WrapMode> wrap_u; 286 struct {
286 BitField<3, 3, WrapMode> wrap_v; 287 union {
287 BitField<6, 3, WrapMode> wrap_p; 288 BitField<0, 3, WrapMode> wrap_u;
288 BitField<9, 1, u32> depth_compare_enabled; 289 BitField<3, 3, WrapMode> wrap_v;
289 BitField<10, 3, DepthCompareFunc> depth_compare_func; 290 BitField<6, 3, WrapMode> wrap_p;
290 BitField<13, 1, u32> srgb_conversion; 291 BitField<9, 1, u32> depth_compare_enabled;
291 BitField<20, 3, u32> max_anisotropy; 292 BitField<10, 3, DepthCompareFunc> depth_compare_func;
293 BitField<13, 1, u32> srgb_conversion;
294 BitField<20, 3, u32> max_anisotropy;
295 };
296 union {
297 BitField<0, 2, TextureFilter> mag_filter;
298 BitField<4, 2, TextureFilter> min_filter;
299 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
300 BitField<9, 1, u32> cubemap_interface_filtering;
301 BitField<12, 13, u32> mip_lod_bias;
302 };
303 union {
304 BitField<0, 12, u32> min_lod_clamp;
305 BitField<12, 12, u32> max_lod_clamp;
306 BitField<24, 8, u32> srgb_border_color_r;
307 };
308 union {
309 BitField<12, 8, u32> srgb_border_color_g;
310 BitField<20, 8, u32> srgb_border_color_b;
311 };
312 std::array<f32, 4> border_color;
313 };
314 std::array<u8, 0x20> raw;
292 }; 315 };
293 union { 316
294 BitField<0, 2, TextureFilter> mag_filter; 317 float GetMaxAnisotropy() const {
295 BitField<4, 2, TextureFilter> min_filter; 318 return static_cast<float>(1U << max_anisotropy);
296 BitField<6, 2, TextureMipmapFilter> mip_filter; 319 }
297 BitField<9, 1, u32> cubemap_interface_filtering; 320
298 BitField<12, 13, u32> mip_lod_bias; 321 float GetMinLod() const {
299 }; 322 return static_cast<float>(min_lod_clamp) / 256.0f;
300 union { 323 }
301 BitField<0, 12, u32> min_lod_clamp; 324
302 BitField<12, 12, u32> max_lod_clamp; 325 float GetMaxLod() const {
303 BitField<24, 8, u32> srgb_border_color_r; 326 return static_cast<float>(max_lod_clamp) / 256.0f;
304 }; 327 }
305 union { 328
306 BitField<12, 8, u32> srgb_border_color_g; 329 float GetLodBias() const {
307 BitField<20, 8, u32> srgb_border_color_b; 330 // Sign extend the 13-bit value.
308 }; 331 constexpr u32 mask = 1U << (13 - 1);
309 float border_color_r; 332 return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
310 float border_color_g; 333 }
311 float border_color_b; 334
312 float border_color_a; 335 std::array<float, 4> GetBorderColor() const {
336 if (srgb_conversion) {
337 return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
338 srgb_border_color_b / 255.0f, border_color[3]};
339 }
340 return border_color;
341 }
313}; 342};
314static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 343static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
315 344
@@ -317,7 +346,6 @@ struct FullTextureInfo {
317 u32 index; 346 u32 index;
318 TICEntry tic; 347 TICEntry tic;
319 TSCEntry tsc; 348 TSCEntry tsc;
320 bool enabled;
321}; 349};
322 350
323/// Returns the number of bytes per pixel of the input texture format. 351/// Returns the number of bytes per pixel of the input texture format.
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 0b8ccdd44..cb82ecf3f 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -11,8 +11,9 @@
11 11
12namespace VideoCore { 12namespace VideoCore {
13 13
14std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) { 14std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
15 return std::make_unique<OpenGL::RendererOpenGL>(emu_window); 15 Core::System& system) {
16 return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
16} 17}
17 18
18u16 GetResolutionScaleFactor(const RendererBase& renderer) { 19u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 5b373bcb1..3c583f195 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -6,6 +6,10 @@
6 6
7#include <memory> 7#include <memory>
8 8
9namespace Core {
10class System;
11}
12
9namespace Core::Frontend { 13namespace Core::Frontend {
10class EmuWindow; 14class EmuWindow;
11} 15}
@@ -20,7 +24,8 @@ class RendererBase;
20 * @note The returned renderer instance is simply allocated. Its Init() 24 * @note The returned renderer instance is simply allocated. Its Init()
21 * function still needs to be called to fully complete its setup. 25 * function still needs to be called to fully complete its setup.
22 */ 26 */
23std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window); 27std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
28 Core::System& system);
24 29
25u16 GetResolutionScaleFactor(const RendererBase& renderer); 30u16 GetResolutionScaleFactor(const RendererBase& renderer);
26 31
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <future>
9#include <string> 7#include <string>
10 8
11namespace WebService { 9namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..40da1a4e2 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/web_result.h" 12#include "common/web_result.h"
13#include "core/settings.h"
14#include "web_service/web_backend.h" 13#include "web_service/web_backend.h"
15 14
16namespace WebService { 15namespace WebService {
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..979b9ec14 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index f74cb693a..05ad19e1d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,16 +20,25 @@
20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
21 21
22void EmuThread::run() { 22void EmuThread::run() {
23 if (!Settings::values.use_multi_core) { 23 render_window->MakeCurrent();
24 // Single core mode must acquire OpenGL context for entire emulation session
25 render_window->MakeCurrent();
26 }
27 24
28 MicroProfileOnThreadCreate("EmuThread"); 25 MicroProfileOnThreadCreate("EmuThread");
29 26
30 stop_run = false; 27 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
28
29 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
30 stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
31 emit LoadProgress(stage, value, total);
32 });
33
34 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
35
36 if (Settings::values.use_asynchronous_gpu_emulation) {
37 // Release OpenGL context for the GPU thread
38 render_window->DoneCurrent();
39 }
31 40
32 // holds whether the cpu was running during the last iteration, 41 // Holds whether the cpu was running during the last iteration,
33 // so that the DebugModeLeft signal can be emitted before the 42 // so that the DebugModeLeft signal can be emitted before the
34 // next execution step 43 // next execution step
35 bool was_active = false; 44 bool was_active = false;
@@ -112,7 +121,6 @@ GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
112 setAttribute(Qt::WA_AcceptTouchEvents); 121 setAttribute(Qt::WA_AcceptTouchEvents);
113 122
114 InputCommon::Init(); 123 InputCommon::Init();
115 InputCommon::StartJoystickEventHandler();
116 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent), 124 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent),
117 &GMainWindow::OnLoadComplete); 125 &GMainWindow::OnLoadComplete);
118} 126}
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index d1f37e503..7226e690e 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -22,6 +22,10 @@ class GGLWidgetInternal;
22class GMainWindow; 22class GMainWindow;
23class GRenderWindow; 23class GRenderWindow;
24 24
25namespace VideoCore {
26enum class LoadCallbackStage;
27}
28
25class EmuThread : public QThread { 29class EmuThread : public QThread {
26 Q_OBJECT 30 Q_OBJECT
27 31
@@ -75,7 +79,7 @@ public:
75private: 79private:
76 bool exec_step = false; 80 bool exec_step = false;
77 bool running = false; 81 bool running = false;
78 std::atomic<bool> stop_run{false}; 82 std::atomic_bool stop_run{false};
79 std::mutex running_mutex; 83 std::mutex running_mutex;
80 std::condition_variable running_cv; 84 std::condition_variable running_cv;
81 85
@@ -101,6 +105,8 @@ signals:
101 void DebugModeLeft(); 105 void DebugModeLeft();
102 106
103 void ErrorThrown(Core::System::ResultStatus, std::string); 107 void ErrorThrown(Core::System::ResultStatus, std::string);
108
109 void LoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total);
104}; 110};
105 111
106class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow { 112class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 5f0896f84..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,15 +53,15 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
61 button(QWizard::CancelButton)->setVisible(false); 61 button(QWizard::CancelButton)->setVisible(false);
62 62
63 testcase_watcher.setFuture(QtConcurrent::run( 63 testcase_watcher.setFuture(QtConcurrent::run(
64 [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); 64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
65 break; 65 break;
66 default: 66 default:
67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); 67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index ddf4cf552..4650f96a3 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -209,7 +209,7 @@ void Config::ReadPlayerValues() {
209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
210 auto& player = Settings::values.players[p]; 210 auto& player = Settings::values.players[p];
211 211
212 player.connected = qt_config->value(QString("player_%1_connected").arg(p), false).toBool(); 212 player.connected = ReadSetting(QString("player_%1_connected").arg(p), false).toBool();
213 213
214 player.type = static_cast<Settings::ControllerType>( 214 player.type = static_cast<Settings::ControllerType>(
215 qt_config 215 qt_config
@@ -269,7 +269,7 @@ void Config::ReadPlayerValues() {
269} 269}
270 270
271void Config::ReadDebugValues() { 271void Config::ReadDebugValues() {
272 Settings::values.debug_pad_enabled = qt_config->value("debug_pad_enabled", false).toBool(); 272 Settings::values.debug_pad_enabled = ReadSetting("debug_pad_enabled", false).toBool();
273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); 274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
275 Settings::values.debug_pad_buttons[i] = 275 Settings::values.debug_pad_buttons[i] =
@@ -298,7 +298,7 @@ void Config::ReadDebugValues() {
298} 298}
299 299
300void Config::ReadKeyboardValues() { 300void Config::ReadKeyboardValues() {
301 Settings::values.keyboard_enabled = qt_config->value("keyboard_enabled", false).toBool(); 301 Settings::values.keyboard_enabled = ReadSetting("keyboard_enabled", false).toBool();
302 302
303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(), 303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam); 304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -311,7 +311,7 @@ void Config::ReadKeyboardValues() {
311} 311}
312 312
313void Config::ReadMouseValues() { 313void Config::ReadMouseValues() {
314 Settings::values.mouse_enabled = qt_config->value("mouse_enabled", false).toBool(); 314 Settings::values.mouse_enabled = ReadSetting("mouse_enabled", false).toBool();
315 315
316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]); 317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
@@ -327,16 +327,14 @@ void Config::ReadMouseValues() {
327} 327}
328 328
329void Config::ReadTouchscreenValues() { 329void Config::ReadTouchscreenValues() {
330 Settings::values.touchscreen.enabled = qt_config->value("touchscreen_enabled", true).toBool(); 330 Settings::values.touchscreen.enabled = ReadSetting("touchscreen_enabled", true).toBool();
331 Settings::values.touchscreen.device = 331 Settings::values.touchscreen.device =
332 qt_config->value("touchscreen_device", "engine:emu_window").toString().toStdString(); 332 ReadSetting("touchscreen_device", "engine:emu_window").toString().toStdString();
333 333
334 Settings::values.touchscreen.finger = qt_config->value("touchscreen_finger", 0).toUInt(); 334 Settings::values.touchscreen.finger = ReadSetting("touchscreen_finger", 0).toUInt();
335 Settings::values.touchscreen.rotation_angle = qt_config->value("touchscreen_angle", 0).toUInt(); 335 Settings::values.touchscreen.rotation_angle = ReadSetting("touchscreen_angle", 0).toUInt();
336 Settings::values.touchscreen.diameter_x = 336 Settings::values.touchscreen.diameter_x = ReadSetting("touchscreen_diameter_x", 15).toUInt();
337 qt_config->value("touchscreen_diameter_x", 15).toUInt(); 337 Settings::values.touchscreen.diameter_y = ReadSetting("touchscreen_diameter_y", 15).toUInt();
338 Settings::values.touchscreen.diameter_y =
339 qt_config->value("touchscreen_diameter_y", 15).toUInt();
340 qt_config->endGroup(); 338 qt_config->endGroup();
341} 339}
342 340
@@ -357,38 +355,41 @@ void Config::ReadValues() {
357 ReadTouchscreenValues(); 355 ReadTouchscreenValues();
358 356
359 Settings::values.motion_device = 357 Settings::values.motion_device =
360 qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01") 358 ReadSetting("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
361 .toString() 359 .toString()
362 .toStdString(); 360 .toStdString();
363 361
364 qt_config->beginGroup("Core"); 362 qt_config->beginGroup("Core");
365 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 363 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
366 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 364 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
367 qt_config->endGroup(); 365 qt_config->endGroup();
368 366
369 qt_config->beginGroup("Renderer"); 367 qt_config->beginGroup("Renderer");
370 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); 368 Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat();
371 Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool(); 369 Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
372 Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt(); 370 Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
371 Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool();
373 Settings::values.use_accurate_gpu_emulation = 372 Settings::values.use_accurate_gpu_emulation =
374 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 373 ReadSetting("use_accurate_gpu_emulation", false).toBool();
374 Settings::values.use_asynchronous_gpu_emulation =
375 ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
375 376
376 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 377 Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
377 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 378 Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
378 Settings::values.bg_blue = qt_config->value("bg_blue", 0.0).toFloat(); 379 Settings::values.bg_blue = ReadSetting("bg_blue", 0.0).toFloat();
379 qt_config->endGroup(); 380 qt_config->endGroup();
380 381
381 qt_config->beginGroup("Audio"); 382 qt_config->beginGroup("Audio");
382 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); 383 Settings::values.sink_id = ReadSetting("output_engine", "auto").toString().toStdString();
383 Settings::values.enable_audio_stretching = 384 Settings::values.enable_audio_stretching =
384 qt_config->value("enable_audio_stretching", true).toBool(); 385 ReadSetting("enable_audio_stretching", true).toBool();
385 Settings::values.audio_device_id = 386 Settings::values.audio_device_id =
386 qt_config->value("output_device", "auto").toString().toStdString(); 387 ReadSetting("output_device", "auto").toString().toStdString();
387 Settings::values.volume = qt_config->value("volume", 1).toFloat(); 388 Settings::values.volume = ReadSetting("volume", 1).toFloat();
388 qt_config->endGroup(); 389 qt_config->endGroup();
389 390
390 qt_config->beginGroup("Data Storage"); 391 qt_config->beginGroup("Data Storage");
391 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 392 Settings::values.use_virtual_sd = ReadSetting("use_virtual_sd", true).toBool();
392 FileUtil::GetUserPath( 393 FileUtil::GetUserPath(
393 FileUtil::UserPath::NANDDir, 394 FileUtil::UserPath::NANDDir,
394 qt_config 395 qt_config
@@ -406,30 +407,30 @@ void Config::ReadValues() {
406 qt_config->endGroup(); 407 qt_config->endGroup();
407 408
408 qt_config->beginGroup("Core"); 409 qt_config->beginGroup("Core");
409 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 410 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
410 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 411 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
411 qt_config->endGroup(); 412 qt_config->endGroup();
412 413
413 qt_config->beginGroup("System"); 414 qt_config->beginGroup("System");
414 Settings::values.use_docked_mode = qt_config->value("use_docked_mode", false).toBool(); 415 Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
415 Settings::values.enable_nfc = qt_config->value("enable_nfc", true).toBool(); 416 Settings::values.enable_nfc = ReadSetting("enable_nfc", true).toBool();
416 417
417 Settings::values.current_user = std::clamp<int>(qt_config->value("current_user", 0).toInt(), 0, 418 Settings::values.current_user =
418 Service::Account::MAX_USERS - 1); 419 std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
419 420
420 Settings::values.language_index = qt_config->value("language_index", 1).toInt(); 421 Settings::values.language_index = ReadSetting("language_index", 1).toInt();
421 422
422 const auto rng_seed_enabled = qt_config->value("rng_seed_enabled", false).toBool(); 423 const auto rng_seed_enabled = ReadSetting("rng_seed_enabled", false).toBool();
423 if (rng_seed_enabled) { 424 if (rng_seed_enabled) {
424 Settings::values.rng_seed = qt_config->value("rng_seed", 0).toULongLong(); 425 Settings::values.rng_seed = ReadSetting("rng_seed", 0).toULongLong();
425 } else { 426 } else {
426 Settings::values.rng_seed = std::nullopt; 427 Settings::values.rng_seed = std::nullopt;
427 } 428 }
428 429
429 const auto custom_rtc_enabled = qt_config->value("custom_rtc_enabled", false).toBool(); 430 const auto custom_rtc_enabled = ReadSetting("custom_rtc_enabled", false).toBool();
430 if (custom_rtc_enabled) { 431 if (custom_rtc_enabled) {
431 Settings::values.custom_rtc = 432 Settings::values.custom_rtc =
432 std::chrono::seconds(qt_config->value("custom_rtc", 0).toULongLong()); 433 std::chrono::seconds(ReadSetting("custom_rtc", 0).toULongLong());
433 } else { 434 } else {
434 Settings::values.custom_rtc = std::nullopt; 435 Settings::values.custom_rtc = std::nullopt;
435 } 436 }
@@ -437,35 +438,35 @@ void Config::ReadValues() {
437 qt_config->endGroup(); 438 qt_config->endGroup();
438 439
439 qt_config->beginGroup("Miscellaneous"); 440 qt_config->beginGroup("Miscellaneous");
440 Settings::values.log_filter = qt_config->value("log_filter", "*:Info").toString().toStdString(); 441 Settings::values.log_filter = ReadSetting("log_filter", "*:Info").toString().toStdString();
441 Settings::values.use_dev_keys = qt_config->value("use_dev_keys", false).toBool(); 442 Settings::values.use_dev_keys = ReadSetting("use_dev_keys", false).toBool();
442 qt_config->endGroup(); 443 qt_config->endGroup();
443 444
444 qt_config->beginGroup("Debugging"); 445 qt_config->beginGroup("Debugging");
445 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool(); 446 Settings::values.use_gdbstub = ReadSetting("use_gdbstub", false).toBool();
446 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt(); 447 Settings::values.gdbstub_port = ReadSetting("gdbstub_port", 24689).toInt();
447 Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString(); 448 Settings::values.program_args = ReadSetting("program_args", "").toString().toStdString();
448 Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool(); 449 Settings::values.dump_exefs = ReadSetting("dump_exefs", false).toBool();
449 Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool(); 450 Settings::values.dump_nso = ReadSetting("dump_nso", false).toBool();
450 qt_config->endGroup(); 451 qt_config->endGroup();
451 452
452 qt_config->beginGroup("WebService"); 453 qt_config->beginGroup("WebService");
453 Settings::values.enable_telemetry = qt_config->value("enable_telemetry", true).toBool(); 454 Settings::values.enable_telemetry = ReadSetting("enable_telemetry", true).toBool();
454 Settings::values.web_api_url = 455 Settings::values.web_api_url =
455 qt_config->value("web_api_url", "https://api.yuzu-emu.org").toString().toStdString(); 456 ReadSetting("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
456 Settings::values.yuzu_username = qt_config->value("yuzu_username").toString().toStdString(); 457 Settings::values.yuzu_username = ReadSetting("yuzu_username").toString().toStdString();
457 Settings::values.yuzu_token = qt_config->value("yuzu_token").toString().toStdString(); 458 Settings::values.yuzu_token = ReadSetting("yuzu_token").toString().toStdString();
458 qt_config->endGroup(); 459 qt_config->endGroup();
459 460
460 const auto size = qt_config->beginReadArray("DisabledAddOns"); 461 const auto size = qt_config->beginReadArray("DisabledAddOns");
461 for (int i = 0; i < size; ++i) { 462 for (int i = 0; i < size; ++i) {
462 qt_config->setArrayIndex(i); 463 qt_config->setArrayIndex(i);
463 const auto title_id = qt_config->value("title_id", 0).toULongLong(); 464 const auto title_id = ReadSetting("title_id", 0).toULongLong();
464 std::vector<std::string> out; 465 std::vector<std::string> out;
465 const auto d_size = qt_config->beginReadArray("disabled"); 466 const auto d_size = qt_config->beginReadArray("disabled");
466 for (int j = 0; j < d_size; ++j) { 467 for (int j = 0; j < d_size; ++j) {
467 qt_config->setArrayIndex(j); 468 qt_config->setArrayIndex(j);
468 out.push_back(qt_config->value("d", "").toString().toStdString()); 469 out.push_back(ReadSetting("d", "").toString().toStdString());
469 } 470 }
470 qt_config->endArray(); 471 qt_config->endArray();
471 Settings::values.disabled_addons.insert_or_assign(title_id, out); 472 Settings::values.disabled_addons.insert_or_assign(title_id, out);
@@ -473,41 +474,38 @@ void Config::ReadValues() {
473 qt_config->endArray(); 474 qt_config->endArray();
474 475
475 qt_config->beginGroup("UI"); 476 qt_config->beginGroup("UI");
476 UISettings::values.theme = qt_config->value("theme", UISettings::themes[0].second).toString(); 477 UISettings::values.theme = ReadSetting("theme", UISettings::themes[0].second).toString();
477 UISettings::values.enable_discord_presence = 478 UISettings::values.enable_discord_presence =
478 qt_config->value("enable_discord_presence", true).toBool(); 479 ReadSetting("enable_discord_presence", true).toBool();
479 UISettings::values.screenshot_resolution_factor = 480 UISettings::values.screenshot_resolution_factor =
480 static_cast<u16>(qt_config->value("screenshot_resolution_factor", 0).toUInt()); 481 static_cast<u16>(ReadSetting("screenshot_resolution_factor", 0).toUInt());
481 UISettings::values.select_user_on_boot = 482 UISettings::values.select_user_on_boot = ReadSetting("select_user_on_boot", false).toBool();
482 qt_config->value("select_user_on_boot", false).toBool();
483 483
484 qt_config->beginGroup("UIGameList"); 484 qt_config->beginGroup("UIGameList");
485 UISettings::values.show_unknown = qt_config->value("show_unknown", true).toBool(); 485 UISettings::values.show_unknown = ReadSetting("show_unknown", true).toBool();
486 UISettings::values.show_add_ons = qt_config->value("show_add_ons", true).toBool(); 486 UISettings::values.show_add_ons = ReadSetting("show_add_ons", true).toBool();
487 UISettings::values.icon_size = qt_config->value("icon_size", 64).toUInt(); 487 UISettings::values.icon_size = ReadSetting("icon_size", 64).toUInt();
488 UISettings::values.row_1_text_id = qt_config->value("row_1_text_id", 3).toUInt(); 488 UISettings::values.row_1_text_id = ReadSetting("row_1_text_id", 3).toUInt();
489 UISettings::values.row_2_text_id = qt_config->value("row_2_text_id", 2).toUInt(); 489 UISettings::values.row_2_text_id = ReadSetting("row_2_text_id", 2).toUInt();
490 qt_config->endGroup(); 490 qt_config->endGroup();
491 491
492 qt_config->beginGroup("UILayout"); 492 qt_config->beginGroup("UILayout");
493 UISettings::values.geometry = qt_config->value("geometry").toByteArray(); 493 UISettings::values.geometry = ReadSetting("geometry").toByteArray();
494 UISettings::values.state = qt_config->value("state").toByteArray(); 494 UISettings::values.state = ReadSetting("state").toByteArray();
495 UISettings::values.renderwindow_geometry = 495 UISettings::values.renderwindow_geometry = ReadSetting("geometryRenderWindow").toByteArray();
496 qt_config->value("geometryRenderWindow").toByteArray(); 496 UISettings::values.gamelist_header_state = ReadSetting("gameListHeaderState").toByteArray();
497 UISettings::values.gamelist_header_state =
498 qt_config->value("gameListHeaderState").toByteArray();
499 UISettings::values.microprofile_geometry = 497 UISettings::values.microprofile_geometry =
500 qt_config->value("microProfileDialogGeometry").toByteArray(); 498 ReadSetting("microProfileDialogGeometry").toByteArray();
501 UISettings::values.microprofile_visible = 499 UISettings::values.microprofile_visible =
502 qt_config->value("microProfileDialogVisible", false).toBool(); 500 ReadSetting("microProfileDialogVisible", false).toBool();
503 qt_config->endGroup(); 501 qt_config->endGroup();
504 502
505 qt_config->beginGroup("Paths"); 503 qt_config->beginGroup("Paths");
506 UISettings::values.roms_path = qt_config->value("romsPath").toString(); 504 UISettings::values.roms_path = ReadSetting("romsPath").toString();
507 UISettings::values.symbols_path = qt_config->value("symbolsPath").toString(); 505 UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
508 UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString(); 506 UISettings::values.gamedir = ReadSetting("gameListRootDir", ".").toString();
509 UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool(); 507 UISettings::values.gamedir_deepscan = ReadSetting("gameListDeepScan", false).toBool();
510 UISettings::values.recent_files = qt_config->value("recentFiles").toStringList(); 508 UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
511 qt_config->endGroup(); 509 qt_config->endGroup();
512 510
513 qt_config->beginGroup("Shortcuts"); 511 qt_config->beginGroup("Shortcuts");
@@ -520,8 +518,8 @@ void Config::ReadValues() {
520 qt_config->beginGroup(hotkey); 518 qt_config->beginGroup(hotkey);
521 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut( 519 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut(
522 group + "/" + hotkey, 520 group + "/" + hotkey,
523 UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(), 521 UISettings::ContextualShortcut(ReadSetting("KeySeq").toString(),
524 qt_config->value("Context").toInt()))); 522 ReadSetting("Context").toInt())));
525 qt_config->endGroup(); 523 qt_config->endGroup();
526 } 524 }
527 525
@@ -529,16 +527,16 @@ void Config::ReadValues() {
529 } 527 }
530 qt_config->endGroup(); 528 qt_config->endGroup();
531 529
532 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool(); 530 UISettings::values.single_window_mode = ReadSetting("singleWindowMode", true).toBool();
533 UISettings::values.fullscreen = qt_config->value("fullscreen", false).toBool(); 531 UISettings::values.fullscreen = ReadSetting("fullscreen", false).toBool();
534 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool(); 532 UISettings::values.display_titlebar = ReadSetting("displayTitleBars", true).toBool();
535 UISettings::values.show_filter_bar = qt_config->value("showFilterBar", true).toBool(); 533 UISettings::values.show_filter_bar = ReadSetting("showFilterBar", true).toBool();
536 UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool(); 534 UISettings::values.show_status_bar = ReadSetting("showStatusBar", true).toBool();
537 UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool(); 535 UISettings::values.confirm_before_closing = ReadSetting("confirmClose", true).toBool();
538 UISettings::values.first_start = qt_config->value("firstStart", true).toBool(); 536 UISettings::values.first_start = ReadSetting("firstStart", true).toBool();
539 UISettings::values.callout_flags = qt_config->value("calloutFlags", 0).toUInt(); 537 UISettings::values.callout_flags = ReadSetting("calloutFlags", 0).toUInt();
540 UISettings::values.show_console = qt_config->value("showConsole", false).toBool(); 538 UISettings::values.show_console = ReadSetting("showConsole", false).toBool();
541 UISettings::values.profile_index = qt_config->value("profileIndex", 0).toUInt(); 539 UISettings::values.profile_index = ReadSetting("profileIndex", 0).toUInt();
542 540
543 ApplyDefaultProfileIfInputInvalid(); 541 ApplyDefaultProfileIfInputInvalid();
544 542
@@ -549,62 +547,79 @@ void Config::SavePlayerValues() {
549 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 547 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
550 const auto& player = Settings::values.players[p]; 548 const auto& player = Settings::values.players[p];
551 549
552 qt_config->setValue(QString("player_%1_connected").arg(p), player.connected); 550 WriteSetting(QString("player_%1_connected").arg(p), player.connected, false);
553 qt_config->setValue(QString("player_%1_type").arg(p), static_cast<u8>(player.type)); 551 WriteSetting(QString("player_%1_type").arg(p), static_cast<u8>(player.type),
552 static_cast<u8>(Settings::ControllerType::DualJoycon));
554 553
555 qt_config->setValue(QString("player_%1_body_color_left").arg(p), player.body_color_left); 554 WriteSetting(QString("player_%1_body_color_left").arg(p), player.body_color_left,
556 qt_config->setValue(QString("player_%1_body_color_right").arg(p), player.body_color_right); 555 Settings::JOYCON_BODY_NEON_BLUE);
557 qt_config->setValue(QString("player_%1_button_color_left").arg(p), 556 WriteSetting(QString("player_%1_body_color_right").arg(p), player.body_color_right,
558 player.button_color_left); 557 Settings::JOYCON_BODY_NEON_RED);
559 qt_config->setValue(QString("player_%1_button_color_right").arg(p), 558 WriteSetting(QString("player_%1_button_color_left").arg(p), player.button_color_left,
560 player.button_color_right); 559 Settings::JOYCON_BUTTONS_NEON_BLUE);
560 WriteSetting(QString("player_%1_button_color_right").arg(p), player.button_color_right,
561 Settings::JOYCON_BUTTONS_NEON_RED);
561 562
562 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 563 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
563 qt_config->setValue(QString("player_%1_").arg(p) + 564 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
564 QString::fromStdString(Settings::NativeButton::mapping[i]), 565 WriteSetting(QString("player_%1_").arg(p) +
565 QString::fromStdString(player.buttons[i])); 566 QString::fromStdString(Settings::NativeButton::mapping[i]),
567 QString::fromStdString(player.buttons[i]),
568 QString::fromStdString(default_param));
566 } 569 }
567 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 570 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
568 qt_config->setValue(QString("player_%1_").arg(p) + 571 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
569 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 572 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
570 QString::fromStdString(player.analogs[i])); 573 default_analogs[i][3], default_analogs[i][4], 0.5f);
574 WriteSetting(QString("player_%1_").arg(p) +
575 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
576 QString::fromStdString(player.analogs[i]),
577 QString::fromStdString(default_param));
571 } 578 }
572 } 579 }
573} 580}
574 581
575void Config::SaveDebugValues() { 582void Config::SaveDebugValues() {
576 qt_config->setValue("debug_pad_enabled", Settings::values.debug_pad_enabled); 583 WriteSetting("debug_pad_enabled", Settings::values.debug_pad_enabled, false);
577 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 584 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
578 qt_config->setValue(QString("debug_pad_") + 585 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
579 QString::fromStdString(Settings::NativeButton::mapping[i]), 586 WriteSetting(QString("debug_pad_") +
580 QString::fromStdString(Settings::values.debug_pad_buttons[i])); 587 QString::fromStdString(Settings::NativeButton::mapping[i]),
588 QString::fromStdString(Settings::values.debug_pad_buttons[i]),
589 QString::fromStdString(default_param));
581 } 590 }
582 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 591 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
583 qt_config->setValue(QString("debug_pad_") + 592 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
584 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 593 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
585 QString::fromStdString(Settings::values.debug_pad_analogs[i])); 594 default_analogs[i][3], default_analogs[i][4], 0.5f);
595 WriteSetting(QString("debug_pad_") +
596 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
597 QString::fromStdString(Settings::values.debug_pad_analogs[i]),
598 QString::fromStdString(default_param));
586 } 599 }
587} 600}
588 601
589void Config::SaveMouseValues() { 602void Config::SaveMouseValues() {
590 qt_config->setValue("mouse_enabled", Settings::values.mouse_enabled); 603 WriteSetting("mouse_enabled", Settings::values.mouse_enabled, false);
591 604
592 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 605 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
593 qt_config->setValue(QString("mouse_") + 606 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
594 QString::fromStdString(Settings::NativeMouseButton::mapping[i]), 607 WriteSetting(QString("mouse_") +
595 QString::fromStdString(Settings::values.mouse_buttons[i])); 608 QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
609 QString::fromStdString(Settings::values.mouse_buttons[i]),
610 QString::fromStdString(default_param));
596 } 611 }
597} 612}
598 613
599void Config::SaveTouchscreenValues() { 614void Config::SaveTouchscreenValues() {
600 qt_config->setValue("touchscreen_enabled", Settings::values.touchscreen.enabled); 615 WriteSetting("touchscreen_enabled", Settings::values.touchscreen.enabled, true);
601 qt_config->setValue("touchscreen_device", 616 WriteSetting("touchscreen_device", QString::fromStdString(Settings::values.touchscreen.device),
602 QString::fromStdString(Settings::values.touchscreen.device)); 617 "engine:emu_window");
603 618
604 qt_config->setValue("touchscreen_finger", Settings::values.touchscreen.finger); 619 WriteSetting("touchscreen_finger", Settings::values.touchscreen.finger, 0);
605 qt_config->setValue("touchscreen_angle", Settings::values.touchscreen.rotation_angle); 620 WriteSetting("touchscreen_angle", Settings::values.touchscreen.rotation_angle, 0);
606 qt_config->setValue("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x); 621 WriteSetting("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x, 15);
607 qt_config->setValue("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y); 622 WriteSetting("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y, 15);
608} 623}
609 624
610void Config::SaveValues() { 625void Config::SaveValues() {
@@ -615,88 +630,96 @@ void Config::SaveValues() {
615 SaveMouseValues(); 630 SaveMouseValues();
616 SaveTouchscreenValues(); 631 SaveTouchscreenValues();
617 632
618 qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device)); 633 WriteSetting("motion_device", QString::fromStdString(Settings::values.motion_device),
619 qt_config->setValue("keyboard_enabled", Settings::values.keyboard_enabled); 634 "engine:motion_emu,update_period:100,sensitivity:0.01");
635 WriteSetting("keyboard_enabled", Settings::values.keyboard_enabled, false);
620 636
621 qt_config->endGroup(); 637 qt_config->endGroup();
622 638
623 qt_config->beginGroup("Core"); 639 qt_config->beginGroup("Core");
624 qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit); 640 WriteSetting("use_cpu_jit", Settings::values.use_cpu_jit, true);
625 qt_config->setValue("use_multi_core", Settings::values.use_multi_core); 641 WriteSetting("use_multi_core", Settings::values.use_multi_core, false);
626 qt_config->endGroup(); 642 qt_config->endGroup();
627 643
628 qt_config->beginGroup("Renderer"); 644 qt_config->beginGroup("Renderer");
629 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); 645 WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0);
630 qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit); 646 WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
631 qt_config->setValue("frame_limit", Settings::values.frame_limit); 647 WriteSetting("frame_limit", Settings::values.frame_limit, 100);
632 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 648 WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true);
649 WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
650 WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
651 false);
633 652
634 // Cast to double because Qt's written float values are not human-readable 653 // Cast to double because Qt's written float values are not human-readable
635 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 654 WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
636 qt_config->setValue("bg_green", (double)Settings::values.bg_green); 655 WriteSetting("bg_green", (double)Settings::values.bg_green, 0.0);
637 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 656 WriteSetting("bg_blue", (double)Settings::values.bg_blue, 0.0);
638 qt_config->endGroup(); 657 qt_config->endGroup();
639 658
640 qt_config->beginGroup("Audio"); 659 qt_config->beginGroup("Audio");
641 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); 660 WriteSetting("output_engine", QString::fromStdString(Settings::values.sink_id), "auto");
642 qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); 661 WriteSetting("enable_audio_stretching", Settings::values.enable_audio_stretching, true);
643 qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id)); 662 WriteSetting("output_device", QString::fromStdString(Settings::values.audio_device_id), "auto");
644 qt_config->setValue("volume", Settings::values.volume); 663 WriteSetting("volume", Settings::values.volume, 1.0f);
645 qt_config->endGroup(); 664 qt_config->endGroup();
646 665
647 qt_config->beginGroup("Data Storage"); 666 qt_config->beginGroup("Data Storage");
648 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 667 WriteSetting("use_virtual_sd", Settings::values.use_virtual_sd, true);
649 qt_config->setValue("nand_directory", 668 WriteSetting("nand_directory",
650 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir))); 669 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
651 qt_config->setValue("sdmc_directory", 670 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
652 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir))); 671 WriteSetting("sdmc_directory",
672 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
673 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
653 qt_config->endGroup(); 674 qt_config->endGroup();
654 675
655 qt_config->beginGroup("System"); 676 qt_config->beginGroup("System");
656 qt_config->setValue("use_docked_mode", Settings::values.use_docked_mode); 677 WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
657 qt_config->setValue("enable_nfc", Settings::values.enable_nfc); 678 WriteSetting("enable_nfc", Settings::values.enable_nfc, true);
658 qt_config->setValue("current_user", Settings::values.current_user); 679 WriteSetting("current_user", Settings::values.current_user, 0);
659 qt_config->setValue("language_index", Settings::values.language_index); 680 WriteSetting("language_index", Settings::values.language_index, 1);
660 681
661 qt_config->setValue("rng_seed_enabled", Settings::values.rng_seed.has_value()); 682 WriteSetting("rng_seed_enabled", Settings::values.rng_seed.has_value(), false);
662 qt_config->setValue("rng_seed", Settings::values.rng_seed.value_or(0)); 683 WriteSetting("rng_seed", Settings::values.rng_seed.value_or(0), 0);
663 684
664 qt_config->setValue("custom_rtc_enabled", Settings::values.custom_rtc.has_value()); 685 WriteSetting("custom_rtc_enabled", Settings::values.custom_rtc.has_value(), false);
665 qt_config->setValue("custom_rtc", 686 WriteSetting("custom_rtc",
666 QVariant::fromValue<long long>( 687 QVariant::fromValue<long long>(
667 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count())); 688 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
689 0);
668 690
669 qt_config->endGroup(); 691 qt_config->endGroup();
670 692
671 qt_config->beginGroup("Miscellaneous"); 693 qt_config->beginGroup("Miscellaneous");
672 qt_config->setValue("log_filter", QString::fromStdString(Settings::values.log_filter)); 694 WriteSetting("log_filter", QString::fromStdString(Settings::values.log_filter), "*:Info");
673 qt_config->setValue("use_dev_keys", Settings::values.use_dev_keys); 695 WriteSetting("use_dev_keys", Settings::values.use_dev_keys, false);
674 qt_config->endGroup(); 696 qt_config->endGroup();
675 697
676 qt_config->beginGroup("Debugging"); 698 qt_config->beginGroup("Debugging");
677 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub); 699 WriteSetting("use_gdbstub", Settings::values.use_gdbstub, false);
678 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port); 700 WriteSetting("gdbstub_port", Settings::values.gdbstub_port, 24689);
679 qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args)); 701 WriteSetting("program_args", QString::fromStdString(Settings::values.program_args), "");
680 qt_config->setValue("dump_exefs", Settings::values.dump_exefs); 702 WriteSetting("dump_exefs", Settings::values.dump_exefs, false);
681 qt_config->setValue("dump_nso", Settings::values.dump_nso); 703 WriteSetting("dump_nso", Settings::values.dump_nso, false);
682 qt_config->endGroup(); 704 qt_config->endGroup();
683 705
684 qt_config->beginGroup("WebService"); 706 qt_config->beginGroup("WebService");
685 qt_config->setValue("enable_telemetry", Settings::values.enable_telemetry); 707 WriteSetting("enable_telemetry", Settings::values.enable_telemetry, true);
686 qt_config->setValue("web_api_url", QString::fromStdString(Settings::values.web_api_url)); 708 WriteSetting("web_api_url", QString::fromStdString(Settings::values.web_api_url),
687 qt_config->setValue("yuzu_username", QString::fromStdString(Settings::values.yuzu_username)); 709 "https://api.yuzu-emu.org");
688 qt_config->setValue("yuzu_token", QString::fromStdString(Settings::values.yuzu_token)); 710 WriteSetting("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
711 WriteSetting("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
689 qt_config->endGroup(); 712 qt_config->endGroup();
690 713
691 qt_config->beginWriteArray("DisabledAddOns"); 714 qt_config->beginWriteArray("DisabledAddOns");
692 int i = 0; 715 int i = 0;
693 for (const auto& elem : Settings::values.disabled_addons) { 716 for (const auto& elem : Settings::values.disabled_addons) {
694 qt_config->setArrayIndex(i); 717 qt_config->setArrayIndex(i);
695 qt_config->setValue("title_id", QVariant::fromValue<u64>(elem.first)); 718 WriteSetting("title_id", QVariant::fromValue<u64>(elem.first), 0);
696 qt_config->beginWriteArray("disabled"); 719 qt_config->beginWriteArray("disabled");
697 for (std::size_t j = 0; j < elem.second.size(); ++j) { 720 for (std::size_t j = 0; j < elem.second.size(); ++j) {
698 qt_config->setArrayIndex(static_cast<int>(j)); 721 qt_config->setArrayIndex(static_cast<int>(j));
699 qt_config->setValue("d", QString::fromStdString(elem.second[j])); 722 WriteSetting("d", QString::fromStdString(elem.second[j]), "");
700 } 723 }
701 qt_config->endArray(); 724 qt_config->endArray();
702 ++i; 725 ++i;
@@ -704,60 +727,86 @@ void Config::SaveValues() {
704 qt_config->endArray(); 727 qt_config->endArray();
705 728
706 qt_config->beginGroup("UI"); 729 qt_config->beginGroup("UI");
707 qt_config->setValue("theme", UISettings::values.theme); 730 WriteSetting("theme", UISettings::values.theme, UISettings::themes[0].second);
708 qt_config->setValue("enable_discord_presence", UISettings::values.enable_discord_presence); 731 WriteSetting("enable_discord_presence", UISettings::values.enable_discord_presence, true);
709 qt_config->setValue("screenshot_resolution_factor", 732 WriteSetting("screenshot_resolution_factor", UISettings::values.screenshot_resolution_factor,
710 UISettings::values.screenshot_resolution_factor); 733 0);
711 qt_config->setValue("select_user_on_boot", UISettings::values.select_user_on_boot); 734 WriteSetting("select_user_on_boot", UISettings::values.select_user_on_boot, false);
712 735
713 qt_config->beginGroup("UIGameList"); 736 qt_config->beginGroup("UIGameList");
714 qt_config->setValue("show_unknown", UISettings::values.show_unknown); 737 WriteSetting("show_unknown", UISettings::values.show_unknown, true);
715 qt_config->setValue("show_add_ons", UISettings::values.show_add_ons); 738 WriteSetting("show_add_ons", UISettings::values.show_add_ons, true);
716 qt_config->setValue("icon_size", UISettings::values.icon_size); 739 WriteSetting("icon_size", UISettings::values.icon_size, 64);
717 qt_config->setValue("row_1_text_id", UISettings::values.row_1_text_id); 740 WriteSetting("row_1_text_id", UISettings::values.row_1_text_id, 3);
718 qt_config->setValue("row_2_text_id", UISettings::values.row_2_text_id); 741 WriteSetting("row_2_text_id", UISettings::values.row_2_text_id, 2);
719 qt_config->endGroup(); 742 qt_config->endGroup();
720 743
721 qt_config->beginGroup("UILayout"); 744 qt_config->beginGroup("UILayout");
722 qt_config->setValue("geometry", UISettings::values.geometry); 745 WriteSetting("geometry", UISettings::values.geometry);
723 qt_config->setValue("state", UISettings::values.state); 746 WriteSetting("state", UISettings::values.state);
724 qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry); 747 WriteSetting("geometryRenderWindow", UISettings::values.renderwindow_geometry);
725 qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state); 748 WriteSetting("gameListHeaderState", UISettings::values.gamelist_header_state);
726 qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry); 749 WriteSetting("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
727 qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible); 750 WriteSetting("microProfileDialogVisible", UISettings::values.microprofile_visible, false);
728 qt_config->endGroup(); 751 qt_config->endGroup();
729 752
730 qt_config->beginGroup("Paths"); 753 qt_config->beginGroup("Paths");
731 qt_config->setValue("romsPath", UISettings::values.roms_path); 754 WriteSetting("romsPath", UISettings::values.roms_path);
732 qt_config->setValue("symbolsPath", UISettings::values.symbols_path); 755 WriteSetting("symbolsPath", UISettings::values.symbols_path);
733 qt_config->setValue("screenshotPath", UISettings::values.screenshot_path); 756 WriteSetting("screenshotPath", UISettings::values.screenshot_path);
734 qt_config->setValue("gameListRootDir", UISettings::values.gamedir); 757 WriteSetting("gameListRootDir", UISettings::values.gamedir, ".");
735 qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan); 758 WriteSetting("gameListDeepScan", UISettings::values.gamedir_deepscan, false);
736 qt_config->setValue("recentFiles", UISettings::values.recent_files); 759 WriteSetting("recentFiles", UISettings::values.recent_files);
737 qt_config->endGroup(); 760 qt_config->endGroup();
738 761
739 qt_config->beginGroup("Shortcuts"); 762 qt_config->beginGroup("Shortcuts");
740 for (auto shortcut : UISettings::values.shortcuts) { 763 for (auto shortcut : UISettings::values.shortcuts) {
741 qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first); 764 WriteSetting(shortcut.first + "/KeySeq", shortcut.second.first);
742 qt_config->setValue(shortcut.first + "/Context", shortcut.second.second); 765 WriteSetting(shortcut.first + "/Context", shortcut.second.second);
743 } 766 }
744 qt_config->endGroup(); 767 qt_config->endGroup();
745 768
746 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode); 769 WriteSetting("singleWindowMode", UISettings::values.single_window_mode, true);
747 qt_config->setValue("fullscreen", UISettings::values.fullscreen); 770 WriteSetting("fullscreen", UISettings::values.fullscreen, false);
748 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar); 771 WriteSetting("displayTitleBars", UISettings::values.display_titlebar, true);
749 qt_config->setValue("showFilterBar", UISettings::values.show_filter_bar); 772 WriteSetting("showFilterBar", UISettings::values.show_filter_bar, true);
750 qt_config->setValue("showStatusBar", UISettings::values.show_status_bar); 773 WriteSetting("showStatusBar", UISettings::values.show_status_bar, true);
751 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing); 774 WriteSetting("confirmClose", UISettings::values.confirm_before_closing, true);
752 qt_config->setValue("firstStart", UISettings::values.first_start); 775 WriteSetting("firstStart", UISettings::values.first_start, true);
753 qt_config->setValue("calloutFlags", UISettings::values.callout_flags); 776 WriteSetting("calloutFlags", UISettings::values.callout_flags, 0);
754 qt_config->setValue("showConsole", UISettings::values.show_console); 777 WriteSetting("showConsole", UISettings::values.show_console, false);
755 qt_config->setValue("profileIndex", UISettings::values.profile_index); 778 WriteSetting("profileIndex", UISettings::values.profile_index, 0);
756 qt_config->endGroup(); 779 qt_config->endGroup();
757} 780}
758 781
782QVariant Config::ReadSetting(const QString& name) const {
783 return qt_config->value(name);
784}
785
786QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) const {
787 QVariant result;
788 if (qt_config->value(name + "/default", false).toBool()) {
789 result = default_value;
790 } else {
791 result = qt_config->value(name, default_value);
792 }
793 return result;
794}
795
796void Config::WriteSetting(const QString& name, const QVariant& value) {
797 qt_config->setValue(name, value);
798}
799
800void Config::WriteSetting(const QString& name, const QVariant& value,
801 const QVariant& default_value) {
802 qt_config->setValue(name + "/default", value == default_value);
803 qt_config->setValue(name, value);
804}
805
759void Config::Reload() { 806void Config::Reload() {
760 ReadValues(); 807 ReadValues();
808 // To apply default value changes
809 SaveValues();
761 Settings::Apply(); 810 Settings::Apply();
762} 811}
763 812
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index e73ad19bb..f4185db18 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,6 +42,11 @@ private:
42 void SaveMouseValues(); 42 void SaveMouseValues();
43 void SaveTouchscreenValues(); 43 void SaveTouchscreenValues();
44 44
45 QVariant ReadSetting(const QString& name) const;
46 QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
47 void WriteSetting(const QString& name, const QVariant& value);
48 void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
49
45 std::unique_ptr<QSettings> qt_config; 50 std::unique_ptr<QSettings> qt_config;
46 std::string qt_config_loc; 51 std::string qt_config_loc;
47}; 52};
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 8290b4384..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -62,9 +62,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
62 const QColor new_bg_color = QColorDialog::getColor(bg_color); 62 const QColor new_bg_color = QColorDialog::getColor(bg_color);
63 if (!new_bg_color.isValid()) 63 if (!new_bg_color.isValid())
64 return; 64 return;
65 bg_color = new_bg_color; 65 UpdateBackgroundColorButton(new_bg_color);
66 ui->bg_button->setStyleSheet(
67 QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
68 }); 66 });
69} 67}
70 68
@@ -75,11 +73,12 @@ void ConfigureGraphics::setConfiguration() {
75 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); 73 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
76 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); 74 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
77 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
78 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
79 bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
80 Settings::values.bg_blue); 79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
81 ui->bg_button->setStyleSheet( 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
82 QString("QPushButton { background-color: %1 }").arg(bg_color.name())); 81 Settings::values.bg_blue));
83} 82}
84 83
85void ConfigureGraphics::applyConfiguration() { 84void ConfigureGraphics::applyConfiguration() {
@@ -87,8 +86,21 @@ void ConfigureGraphics::applyConfiguration() {
87 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 86 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
88 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); 87 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
89 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
91 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
92 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
93 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
94} 96}
97
98void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
99 bg_color = color;
100
101 QPixmap pixmap(ui->bg_button->size());
102 pixmap.fill(bg_color);
103
104 const QIcon color_icon(pixmap);
105 ui->bg_button->setIcon(color_icon);
106}
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index d6ffc6fde..f2799822d 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -23,6 +23,8 @@ public:
23private: 23private:
24 void setConfiguration(); 24 void setConfiguration();
25 25
26 void UpdateBackgroundColorButton(QColor color);
27
26 std::unique_ptr<Ui::ConfigureGraphics> ui; 28 std::unique_ptr<Ui::ConfigureGraphics> ui;
27 QColor bg_color; 29 QColor bg_color;
28}; 30};
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index e278cdd05..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -50,6 +50,13 @@
50 </layout> 50 </layout>
51 </item> 51 </item>
52 <item> 52 <item>
53 <widget class="QCheckBox" name="use_disk_shader_cache">
54 <property name="text">
55 <string>Use disk shader cache</string>
56 </property>
57 </widget>
58 </item>
59 <item>
53 <widget class="QCheckBox" name="use_accurate_gpu_emulation"> 60 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
54 <property name="text"> 61 <property name="text">
55 <string>Use accurate GPU emulation (slow)</string> 62 <string>Use accurate GPU emulation (slow)</string>
@@ -57,6 +64,13 @@
57 </widget> 64 </widget>
58 </item> 65 </item>
59 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
60 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
61 <item> 75 <item>
62 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 209798521..29f01dfb2 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
383 // TODO: Implement a good way to visualize alpha components! 383 // TODO: Implement a good way to visualize alpha components!
384 384
385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); 385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
386 std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
387 386
388 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. 387 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
389 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. 388 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
390 auto unswizzled_data = Tegra::Texture::UnswizzleTexture( 389 auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
391 *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, 390 gpu.MemoryManager().GetPointer(surface_address), 1, 1,
392 surface_height, 1U); 391 Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);
393 392
394 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, 393 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
395 surface_width, surface_height); 394 surface_width, surface_height);
@@ -398,7 +397,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
398 397
399 for (unsigned int y = 0; y < surface_height; ++y) { 398 for (unsigned int y = 0; y < surface_height; ++y) {
400 for (unsigned int x = 0; x < surface_width; ++x) { 399 for (unsigned int x = 0; x < surface_width; ++x) {
401 Math::Vec4<u8> color; 400 Common::Vec4<u8> color;
402 color[0] = texture_data[x + y * surface_width + 0]; 401 color[0] = texture_data[x + y * surface_width + 0];
403 color[1] = texture_data[x + y * surface_width + 1]; 402 color[1] = texture_data[x + y * surface_width + 1];
404 color[2] = texture_data[x + y * surface_width + 2]; 403 color[2] = texture_data[x + y * surface_width + 2];
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index f50225d5f..06ad74ffe 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const {
81 return text; 81 return text;
82} 82}
83 83
84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { 84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
85 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 85 : mutex_address(mutex_address) {
86
87 mutex_value = Memory::Read32(mutex_address); 86 mutex_value = Memory::Read32(mutex_address);
88 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); 87 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
89 owner = handle_table.Get<Kernel::Thread>(owner_handle); 88 owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -316,7 +315,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
316 315
317 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 316 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
318 if (mutex_wait_address != 0) { 317 if (mutex_wait_address != 0) {
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); 318 const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
320 } else { 320 } else {
321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); 321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
322 } 322 }
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 365c3dbfe..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,6 +17,7 @@
17class EmuThread; 17class EmuThread;
18 18
19namespace Kernel { 19namespace Kernel {
20class HandleTable;
20class ReadableEvent; 21class ReadableEvent;
21class WaitObject; 22class WaitObject;
22class Thread; 23class Thread;
@@ -72,7 +73,7 @@ public:
72class WaitTreeMutexInfo : public WaitTreeExpandableItem { 73class WaitTreeMutexInfo : public WaitTreeExpandableItem {
73 Q_OBJECT 74 Q_OBJECT
74public: 75public:
75 explicit WaitTreeMutexInfo(VAddr mutex_address); 76 explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
76 ~WaitTreeMutexInfo() override; 77 ~WaitTreeMutexInfo() override;
77 78
78 QString GetText() const override; 79 QString GetText() const override;
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 907aac4f1..86f6d0165 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -43,6 +43,7 @@ QProgressBar {
43} 43}
44QProgressBar::chunk { 44QProgressBar::chunk {
45 background-color: #0ab9e6; 45 background-color: #0ab9e6;
46 width: 1px;
46})"; 47})";
47 48
48constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"( 49constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"(
@@ -53,7 +54,8 @@ QProgressBar {
53 padding: 2px; 54 padding: 2px;
54} 55}
55QProgressBar::chunk { 56QProgressBar::chunk {
56 background-color: #ff3c28; 57 background-color: #ff3c28;
58 width: 1px;
57})"; 59})";
58 60
59constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"( 61constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"(
diff --git a/src/yuzu/loading_screen.ui b/src/yuzu/loading_screen.ui
index a67d273fd..820b47536 100644
--- a/src/yuzu/loading_screen.ui
+++ b/src/yuzu/loading_screen.ui
@@ -132,7 +132,7 @@ border-radius: 15px;
132font: 75 15pt &quot;Arial&quot;;</string> 132font: 75 15pt &quot;Arial&quot;;</string>
133 </property> 133 </property>
134 <property name="text"> 134 <property name="text">
135 <string>Stage 1 of 2. Estimate Time 5m 4s</string> 135 <string>Estimated Time 5m 4s</string>
136 </property> 136 </property>
137 </widget> 137 </widget>
138 </item> 138 </item>
@@ -146,6 +146,9 @@ font: 75 15pt &quot;Arial&quot;;</string>
146 <property name="text"> 146 <property name="text">
147 <string/> 147 <string/>
148 </property> 148 </property>
149 <property name="alignment">
150 <set>Qt::AlignCenter</set>
151 </property>
149 <property name="margin"> 152 <property name="margin">
150 <number>30</number> 153 <number>30</number>
151 </property> 154 </property>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ab403b3ac..41ba3c4c6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,6 +11,7 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
@@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
339 .arg(QString::fromStdString(std::to_string(key_code)))); 340 .arg(QString::fromStdString(std::to_string(key_code))));
340 }; 341 };
341 342
343 QMessageBox::information(
344 this, tr("Exit"),
345 tr("To exit the web application, use the game provided controls to select exit, select the "
346 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
347
342 bool running_exit_check = false; 348 bool running_exit_check = false;
343 while (!finished) { 349 while (!finished) {
344 QApplication::processEvents(); 350 QApplication::processEvents();
@@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() {
522 Qt::ApplicationShortcut); 528 Qt::ApplicationShortcut);
523 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", 529 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
524 QKeySequence(QKeySequence::Print)); 530 QKeySequence(QKeySequence::Print));
531 hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10));
525 532
526 hotkey_registry.LoadHotkeys(); 533 hotkey_registry.LoadHotkeys();
527 534
@@ -561,7 +568,10 @@ void GMainWindow::InitializeHotkeys() {
561 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 568 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
562 UpdateStatusBar(); 569 UpdateStatusBar();
563 }); 570 });
564 constexpr u16 SPEED_LIMIT_STEP = 5; 571 // TODO: Remove this comment/static whenever the next major release of
572 // MSVC occurs and we make it a requirement (see:
573 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
574 static constexpr u16 SPEED_LIMIT_STEP = 5;
565 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), 575 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
566 &QShortcut::activated, this, [&] { 576 &QShortcut::activated, this, [&] {
567 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 577 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -588,6 +598,12 @@ void GMainWindow::InitializeHotkeys() {
588 OnCaptureScreenshot(); 598 OnCaptureScreenshot();
589 } 599 }
590 }); 600 });
601 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
602 &QShortcut::activated, this, [&] {
603 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
604 OnDockedModeChanged(!Settings::values.use_docked_mode,
605 Settings::values.use_docked_mode);
606 });
591} 607}
592 608
593void GMainWindow::SetDefaultUIGeometry() { 609void GMainWindow::SetDefaultUIGeometry() {
@@ -846,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
846 } 862 }
847 game_path = filename; 863 game_path = filename;
848 864
849 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 865 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
850 return true; 866 return true;
851} 867}
852 868
@@ -887,6 +903,9 @@ void GMainWindow::BootGame(const QString& filename) {
887 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget, 903 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
888 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection); 904 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
889 905
906 connect(emu_thread.get(), &EmuThread::LoadProgress, loading_screen,
907 &LoadingScreen::OnLoadProgress, Qt::QueuedConnection);
908
890 // Update the GUI 909 // Update the GUI
891 if (ui.action_Single_Window_Mode->isChecked()) { 910 if (ui.action_Single_Window_Mode->isChecked()) {
892 game_list->hide(); 911 game_list->hide();
@@ -1682,12 +1701,16 @@ void GMainWindow::OnToggleFilterBar() {
1682 1701
1683void GMainWindow::OnCaptureScreenshot() { 1702void GMainWindow::OnCaptureScreenshot() {
1684 OnPauseGame(); 1703 OnPauseGame();
1685 const QString path = 1704 QFileDialog png_dialog(this, tr("Capture Screenshot"), UISettings::values.screenshot_path,
1686 QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), 1705 tr("PNG Image (*.png)"));
1687 UISettings::values.screenshot_path, tr("PNG Image (*.png)")); 1706 png_dialog.setAcceptMode(QFileDialog::AcceptSave);
1688 if (!path.isEmpty()) { 1707 png_dialog.setDefaultSuffix("png");
1689 UISettings::values.screenshot_path = QFileInfo(path).path(); 1708 if (png_dialog.exec()) {
1690 render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path); 1709 const QString path = png_dialog.selectedFiles().first();
1710 if (!path.isEmpty()) {
1711 UISettings::values.screenshot_path = QFileInfo(path).path();
1712 render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
1713 }
1691 } 1714 }
1692 OnStartGame(); 1715 OnStartGame();
1693} 1716}
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7a77f76e8..32e78049c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -346,23 +346,28 @@ void Config::ReadValues() {
346 346
347 // Renderer 347 // Renderer
348 Settings::values.resolution_factor = 348 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 349 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
353 Settings::values.use_disk_shader_cache =
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
353 Settings::values.use_accurate_gpu_emulation = 355 Settings::values.use_accurate_gpu_emulation =
354 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
357 Settings::values.use_asynchronous_gpu_emulation =
358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
355 359
356 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 360 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
357 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 361 Settings::values.bg_green =
358 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 362 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
363 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
359 364
360 // Audio 365 // Audio
361 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 366 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
362 Settings::values.enable_audio_stretching = 367 Settings::values.enable_audio_stretching =
363 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 368 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
364 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 369 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
365 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 370 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
366 371
367 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 372 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
368 373
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index ba51a4a51..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -110,10 +110,18 @@ use_frame_limit =
110# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default) 110# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
111frame_limit = 111frame_limit =
112 112
113# Whether to use disk based shader cache
114# 0 (default): Off, 1 : On
115use_disk_shader_cache =
116
113# Whether to use accurate GPU emulation 117# Whether to use accurate GPU emulation
114# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
115use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
116 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
117# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
118# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
119bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7df8eff53..de7a26e14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -135,16 +135,16 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
135} 135}
136 136
137EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { 137EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
138 InputCommon::Init();
139
140 SDL_SetMainReady();
141
142 // Initialize the window 138 // Initialize the window
143 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { 139 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
144 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); 140 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
145 exit(1); 141 exit(1);
146 } 142 }
147 143
144 InputCommon::Init();
145
146 SDL_SetMainReady();
147
148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); 148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
149 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); 149 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
150 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); 150 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
@@ -201,11 +201,9 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
201} 201}
202 202
203EmuWindow_SDL2::~EmuWindow_SDL2() { 203EmuWindow_SDL2::~EmuWindow_SDL2() {
204 InputCommon::SDL::CloseSDLJoysticks(); 204 InputCommon::Shutdown();
205 SDL_GL_DeleteContext(gl_context); 205 SDL_GL_DeleteContext(gl_context);
206 SDL_Quit(); 206 SDL_Quit();
207
208 InputCommon::Shutdown();
209} 207}
210 208
211void EmuWindow_SDL2::SwapBuffers() { 209void EmuWindow_SDL2::SwapBuffers() {
@@ -262,7 +260,6 @@ void EmuWindow_SDL2::PollEvents() {
262 is_open = false; 260 is_open = false;
263 break; 261 break;
264 default: 262 default:
265 InputCommon::SDL::HandleGameControllerEvent(event);
266 break; 263 break;
267 } 264 }
268 } 265 }
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 806127b12..245f25847 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -28,6 +28,7 @@
28#include "core/loader/loader.h" 28#include "core/loader/loader.h"
29#include "core/settings.h" 29#include "core/settings.h"
30#include "core/telemetry_session.h" 30#include "core/telemetry_session.h"
31#include "video_core/renderer_base.h"
31#include "yuzu_cmd/config.h" 32#include "yuzu_cmd/config.h"
32#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 33#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
33 34
@@ -113,9 +114,9 @@ int main(int argc, char** argv) {
113 }; 114 };
114 115
115 while (optind < argc) { 116 while (optind < argc) {
116 char arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index); 117 int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
117 if (arg != -1) { 118 if (arg != -1) {
118 switch (arg) { 119 switch (static_cast<char>(arg)) {
119 case 'g': 120 case 'g':
120 errno = 0; 121 errno = 0;
121 gdb_port = strtoul(optarg, &endarg, 0); 122 gdb_port = strtoul(optarg, &endarg, 0);
@@ -215,7 +216,9 @@ int main(int argc, char** argv) {
215 } 216 }
216 } 217 }
217 218
218 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
220
221 system.Renderer().Rasterizer().LoadDiskResources();
219 222
220 while (emu_window->IsOpen()) { 223 while (emu_window->IsOpen()) {
221 system.RunLoop(); 224 system.RunLoop();