summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml2
-rwxr-xr-x.travis/macos/build.sh2
-rw-r--r--CMakeLists.txt10
-rw-r--r--CMakeModules/GenerateSCMRev.cmake1
-rw-r--r--README.md2
m---------externals/Vulkan-Headers0
m---------externals/cubeb0
-rw-r--r--src/audio_core/audio_renderer.h14
-rw-r--r--src/audio_core/buffer.h2
-rw-r--r--src/audio_core/codec.cpp4
-rw-r--r--src/audio_core/cubeb_sink.cpp23
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/audio_core/stream.cpp2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/bit_field.h16
-rw-r--r--src/common/color.h40
-rw-r--r--src/common/logging/backend.cpp69
-rw-r--r--src/common/logging/backend.h5
-rw-r--r--src/common/math_util.h4
-rw-r--r--src/common/quaternion.h10
-rw-r--r--src/common/swap.h6
-rw-r--r--src/common/threadsafe_queue.h18
-rw-r--r--src/common/vector_math.h4
-rw-r--r--src/core/CMakeLists.txt5
-rw-r--r--src/core/core.cpp34
-rw-r--r--src/core/core.h4
-rw-r--r--src/core/core_cpu.cpp9
-rw-r--r--src/core/core_cpu.h8
-rw-r--r--src/core/cpu_core_manager.cpp3
-rw-r--r--src/core/crypto/key_manager.cpp3
-rw-r--r--src/core/file_sys/vfs_vector.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/frontend/framebuffer_layout.cpp12
-rw-r--r--src/core/frontend/framebuffer_layout.h2
-rw-r--r--src/core/frontend/input.h2
-rw-r--r--src/core/hle/ipc.h4
-rw-r--r--src/core/hle/ipc_helpers.h20
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp181
-rw-r--r--src/core/hle/kernel/address_arbiter.h80
-rw-r--r--src/core/hle/kernel/client_port.cpp9
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h9
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/handle_table.cpp40
-rw-r--r--src/core/hle/kernel/handle_table.h25
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/kernel.cpp20
-rw-r--r--src/core/hle/kernel/kernel.h20
-rw-r--r--src/core/hle/kernel/process.cpp26
-rw-r--r--src/core/hle/kernel/process.h26
-rw-r--r--src/core/hle/kernel/process_capability.cpp4
-rw-r--r--src/core/hle/kernel/process_capability.h4
-rw-r--r--src/core/hle/kernel/scheduler.cpp20
-rw-r--r--src/core/hle/kernel/scheduler.h6
-rw-r--r--src/core/hle/kernel/server_port.cpp4
-rw-r--r--src/core/hle/kernel/server_port.h35
-rw-r--r--src/core/hle/kernel/server_session.cpp91
-rw-r--r--src/core/hle/kernel/server_session.h53
-rw-r--r--src/core/hle/kernel/shared_memory.cpp5
-rw-r--r--src/core/hle/kernel/svc.cpp95
-rw-r--r--src/core/hle/kernel/thread.cpp15
-rw-r--r--src/core/hle/kernel/thread.h13
-rw-r--r--src/core/hle/kernel/vm_manager.cpp47
-rw-r--r--src/core/hle/kernel/vm_manager.h24
-rw-r--r--src/core/hle/result.h19
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/audio/audout_u.cpp17
-rw-r--r--src/core/hle/service/audio/audren_u.cpp64
-rw-r--r--src/core/hle/service/audio/audren_u.h3
-rw-r--r--src/core/hle/service/audio/errors.h15
-rw-r--r--src/core/hle/service/audio/hwopus.cpp176
-rw-r--r--src/core/hle/service/hid/hid.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h4
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp102
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h50
-rw-r--r--src/core/hle/service/service.cpp10
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/sm/sm.h2
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp71
-rw-r--r--src/core/hle/service/vi/display/vi_display.h98
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.cpp13
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.h52
-rw-r--r--src/core/hle/service/vi/vi.cpp67
-rw-r--r--src/core/hle/service/vi/vi.h40
-rw-r--r--src/core/hle/service/vi/vi_m.cpp12
-rw-r--r--src/core/hle/service/vi/vi_m.h19
-rw-r--r--src/core/hle/service/vi/vi_s.cpp12
-rw-r--r--src/core/hle/service/vi/vi_s.h19
-rw-r--r--src/core/hle/service/vi/vi_u.cpp12
-rw-r--r--src/core/hle/service/vi/vi_u.h19
-rw-r--r--src/core/memory.cpp38
-rw-r--r--src/core/settings.cpp3
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/input_common/CMakeLists.txt15
-rw-r--r--src/input_common/main.cpp23
-rw-r--r--src/input_common/main.h2
-rw-r--r--src/input_common/motion_emu.cpp28
-rw-r--r--src/input_common/sdl/sdl.cpp636
-rw-r--r--src/input_common/sdl/sdl.h53
-rw-r--r--src/input_common/sdl/sdl_impl.cpp669
-rw-r--r--src/input_common/sdl/sdl_impl.h64
-rw-r--r--src/tests/core/arm/arm_test_common.cpp8
-rw-r--r--src/video_core/CMakeLists.txt25
-rw-r--r--src/video_core/dma_pusher.cpp57
-rw-r--r--src/video_core/dma_pusher.h5
-rw-r--r--src/video_core/engines/fermi_2d.cpp15
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp3
-rw-r--r--src/video_core/engines/kepler_compute.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp11
-rw-r--r--src/video_core/engines/kepler_memory.h10
-rw-r--r--src/video_core/engines/maxwell_3d.cpp83
-rw-r--r--src/video_core/engines/maxwell_3d.h26
-rw-r--r--src/video_core/engines/maxwell_dma.cpp16
-rw-r--r--src/video_core/engines/maxwell_dma.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h32
-rw-r--r--src/video_core/engines/shader_header.h41
-rw-r--r--src/video_core/gpu.cpp11
-rw-r--r--src/video_core/gpu.h69
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp98
-rw-r--r--src/video_core/gpu_thread.h185
-rw-r--r--src/video_core/morton.cpp315
-rw-r--r--src/video_core/morton.h6
-rw-r--r--src/video_core/rasterizer_cache.h88
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h31
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h17
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp243
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp334
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h126
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp100
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp148
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp39
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp123
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h104
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp252
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp285
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h180
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h56
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h69
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp495
-rw-r--r--src/video_core/shader/decode/other.cpp15
-rw-r--r--src/video_core/shader/decode/texture.cpp534
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/shader/track.cpp10
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/decoders.h18
-rw-r--r--src/video_core/textures/texture.h83
-rw-r--r--src/web_service/verify_login.h2
-rw-r--r--src/web_service/web_backend.cpp1
-rw-r--r--src/yuzu/applets/web_browser.cpp2
-rw-r--r--src/yuzu/bootmanager.cpp11
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/config.cpp446
-rw-r--r--src/yuzu/configuration/config.h5
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp2
-rw-r--r--src/yuzu/debugger/wait_tree.cpp8
-rw-r--r--src/yuzu/debugger/wait_tree.h3
-rw-r--r--src/yuzu/main.cpp20
-rw-r--r--src/yuzu_cmd/config.cpp13
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp13
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
201 files changed, 6885 insertions, 3212 deletions
diff --git a/.travis.yml b/.travis.yml
index b0fbe3c5f..9512f7843 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
24 - os: osx 24 - os: osx
25 env: NAME="macos build" 25 env: NAME="macos build"
26 sudo: false 26 sudo: false
27 osx_image: xcode10 27 osx_image: xcode10.1
28 install: "./.travis/macos/deps.sh" 28 install: "./.travis/macos/deps.sh"
29 script: "./.travis/macos/build.sh" 29 script: "./.travis/macos/build.sh"
30 after_success: "./.travis/macos/upload.sh" 30 after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index 4a14837fc..b7b4c6f8c 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@
2 2
3set -o pipefail 3set -o pipefail
4 4
5export MACOSX_DEPLOYMENT_TARGET=10.13 5export MACOSX_DEPLOYMENT_TARGET=10.14
6export Qt5_DIR=$(brew --prefix)/opt/qt5 6export Qt5_DIR=$(brew --prefix)/opt/qt5
7export UNICORNDIR=$(pwd)/externals/unicorn 7export UNICORNDIR=$(pwd)/externals/unicorn
8export PATH="/usr/local/opt/ccache/libexec:$PATH" 8export PATH="/usr/local/opt/ccache/libexec:$PATH"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32cfa8580..9cc24cba6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -163,12 +163,6 @@ else()
163 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) 163 set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
164endif() 164endif()
165 165
166# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
167# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
168if (CMAKE_COMPILER_IS_GNUCC)
169 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
170endif()
171
172# Set file offset size to 64 bits. 166# Set file offset size to 64 bits.
173# 167#
174# On modern Unixes, this is typically already the case. The lone exception is 168# On modern Unixes, this is typically already the case. The lone exception is
@@ -185,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
185# System imported libraries 179# System imported libraries
186# ====================== 180# ======================
187 181
188find_package(Boost 1.63.0 QUIET) 182find_package(Boost 1.64.0 QUIET)
189if (NOT Boost_FOUND) 183if (NOT Boost_FOUND)
190 message(STATUS "Boost 1.63.0 or newer not found, falling back to externals") 184 message(STATUS "Boost 1.64.0 or newer not found, falling back to externals")
191 185
192 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost") 186 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
193 set(Boost_NO_SYSTEM_PATHS OFF) 187 set(Boost_NO_SYSTEM_PATHS OFF)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 78728e08b..08315a1f1 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 73 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp" 75 "${VIDEO_CORE}/shader/decode/memory.cpp"
76 "${VIDEO_CORE}/shader/decode/texture.cpp"
76 "${VIDEO_CORE}/shader/decode/other.cpp" 77 "${VIDEO_CORE}/shader/decode/other.cpp"
77 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 78 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
78 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 79 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/README.md b/README.md
index 1d5ee58cc..fa4233b2a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
7 7
8It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes. 8It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
9 9
10yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics. 10yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
11 11
12yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included. 12yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.
13 13
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
Subproject 7f02d9bb810f371de0fe833c80004c34f7ff8c5 Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202
diff --git a/externals/cubeb b/externals/cubeb
Subproject 12b78c0edfa40007e41dbdcd9dfe367fbb98d01 Subproject 6f2420de8f155b10330cf973900ac7bdbfee589
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 201ec7a3c..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -46,16 +46,18 @@ struct AudioRendererParameter {
46 u32_le sample_rate; 46 u32_le sample_rate;
47 u32_le sample_count; 47 u32_le sample_count;
48 u32_le mix_buffer_count; 48 u32_le mix_buffer_count;
49 u32_le unknown_c; 49 u32_le submix_count;
50 u32_le voice_count; 50 u32_le voice_count;
51 u32_le sink_count; 51 u32_le sink_count;
52 u32_le effect_count; 52 u32_le effect_count;
53 u32_le unknown_1c; 53 u32_le performance_frame_count;
54 u8 unknown_20; 54 u8 is_voice_drop_enabled;
55 INSERT_PADDING_BYTES(3); 55 u8 unknown_21;
56 u8 unknown_22;
57 u8 execution_mode;
56 u32_le splitter_count; 58 u32_le splitter_count;
57 u32_le unknown_2c; 59 u32_le num_splitter_send_channels;
58 INSERT_PADDING_WORDS(1); 60 u32_le unknown_30;
59 u32_le revision; 61 u32_le revision;
60}; 62};
61static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); 63static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
diff --git a/src/audio_core/buffer.h b/src/audio_core/buffer.h
index a323b23ec..5ee09e9aa 100644
--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {} 21 Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
22 22
23 /// Returns the raw audio data for the buffer 23 /// Returns the raw audio data for the buffer
24 std::vector<s16>& Samples() { 24 std::vector<s16>& GetSamples() {
25 return samples; 25 return samples;
26 } 26 }
27 27
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index 454de798b..c5a0d98ce 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
68 } 68 }
69 } 69 }
70 70
71 state.yn1 = yn1; 71 state.yn1 = static_cast<s16>(yn1);
72 state.yn2 = yn2; 72 state.yn2 = static_cast<s16>(yn2);
73 73
74 return ret; 74 return ret;
75} 75}
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 097328901..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _WIN32
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
46 } 50 }
47 } 51 }
48 52
49 ~CubebSinkStream() { 53 ~CubebSinkStream() override {
50 if (!ctx) { 54 if (!ctx) {
51 return; 55 return;
52 } 56 }
@@ -75,11 +79,11 @@ public:
75 queue.Push(samples); 79 queue.Push(samples);
76 } 80 }
77 81
78 std::size_t SamplesInQueue(u32 num_channels) const override { 82 std::size_t SamplesInQueue(u32 channel_count) const override {
79 if (!ctx) 83 if (!ctx)
80 return 0; 84 return 0;
81 85
82 return queue.Size() / num_channels; 86 return queue.Size() / channel_count;
83 } 87 }
84 88
85 void Flush() override { 89 void Flush() override {
@@ -98,7 +102,7 @@ private:
98 u32 num_channels{}; 102 u32 num_channels{};
99 103
100 Common::RingBuffer<s16, 0x10000> queue; 104 Common::RingBuffer<s16, 0x10000> queue;
101 std::array<s16, 2> last_frame; 105 std::array<s16, 2> last_frame{};
102 std::atomic<bool> should_flush{}; 106 std::atomic<bool> should_flush{};
103 TimeStretcher time_stretch; 107 TimeStretcher time_stretch;
104 108
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _WIN32
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index d89ff30b7..4b66a6786 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -95,7 +95,7 @@ void Stream::PlayNextBuffer() {
95 active_buffer = queued_buffers.front(); 95 active_buffer = queued_buffers.front();
96 queued_buffers.pop(); 96 queued_buffers.pop();
97 97
98 VolumeAdjustSamples(active_buffer->Samples()); 98 VolumeAdjustSamples(active_buffer->GetSamples());
99 99
100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 100 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
101 101
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index b0174b445..c538c6415 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp" 51 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..7433c39ba 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
111template <std::size_t Position, std::size_t Bits, typename T> 111template <std::size_t Position, std::size_t Bits, typename T>
112struct BitField { 112struct BitField {
113private: 113private:
114 // We hide the copy assigment operator here, because the default copy
115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable.
118 constexpr BitField& operator=(const BitField&) = default;
119
120 // UnderlyingType is T for non-enum types and the underlying type of T if 114 // UnderlyingType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 115 // T is an enumeration. Note that T is wrapped within an enable_if in the
122 // former case to workaround compile errors which arise when using 116 // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
163 BitField(T val) = delete; 157 BitField(T val) = delete;
164 BitField& operator=(T val) = delete; 158 BitField& operator=(T val) = delete;
165 159
166 // Force default constructor to be created 160 constexpr BitField() noexcept = default;
167 // so that we can use this within unions 161
168 constexpr BitField() = default; 162 constexpr BitField(const BitField&) noexcept = default;
163 constexpr BitField& operator=(const BitField&) noexcept = default;
164
165 constexpr BitField(BitField&&) noexcept = default;
166 constexpr BitField& operator=(BitField&&) noexcept = default;
169 167
170 constexpr FORCE_INLINE operator T() const { 168 constexpr FORCE_INLINE operator T() const {
171 return Value(); 169 return Value();
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
55/** 55/**
56 * Decode a color stored in RGBA8 format 56 * Decode a color stored in RGBA8 format
57 * @param bytes Pointer to encoded source color 57 * @param bytes Pointer to encoded source color
58 * @return Result color decoded as Math::Vec4<u8> 58 * @return Result color decoded as Common::Vec4<u8>
59 */ 59 */
60inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { 60inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
61 return {bytes[3], bytes[2], bytes[1], bytes[0]}; 61 return {bytes[3], bytes[2], bytes[1], bytes[0]};
62} 62}
63 63
64/** 64/**
65 * Decode a color stored in RGB8 format 65 * Decode a color stored in RGB8 format
66 * @param bytes Pointer to encoded source color 66 * @param bytes Pointer to encoded source color
67 * @return Result color decoded as Math::Vec4<u8> 67 * @return Result color decoded as Common::Vec4<u8>
68 */ 68 */
69inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { 69inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
70 return {bytes[2], bytes[1], bytes[0], 255}; 70 return {bytes[2], bytes[1], bytes[0], 255};
71} 71}
72 72
73/** 73/**
74 * Decode a color stored in RG8 (aka HILO8) format 74 * Decode a color stored in RG8 (aka HILO8) format
75 * @param bytes Pointer to encoded source color 75 * @param bytes Pointer to encoded source color
76 * @return Result color decoded as Math::Vec4<u8> 76 * @return Result color decoded as Common::Vec4<u8>
77 */ 77 */
78inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { 78inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
79 return {bytes[1], bytes[0], 0, 255}; 79 return {bytes[1], bytes[0], 0, 255};
80} 80}
81 81
82/** 82/**
83 * Decode a color stored in RGB565 format 83 * Decode a color stored in RGB565 format
84 * @param bytes Pointer to encoded source color 84 * @param bytes Pointer to encoded source color
85 * @return Result color decoded as Math::Vec4<u8> 85 * @return Result color decoded as Common::Vec4<u8>
86 */ 86 */
87inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { 87inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
88 u16_le pixel; 88 u16_le pixel;
89 std::memcpy(&pixel, bytes, sizeof(pixel)); 89 std::memcpy(&pixel, bytes, sizeof(pixel));
90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), 90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
94/** 94/**
95 * Decode a color stored in RGB5A1 format 95 * Decode a color stored in RGB5A1 format
96 * @param bytes Pointer to encoded source color 96 * @param bytes Pointer to encoded source color
97 * @return Result color decoded as Math::Vec4<u8> 97 * @return Result color decoded as Common::Vec4<u8>
98 */ 98 */
99inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { 99inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
100 u16_le pixel; 100 u16_le pixel;
101 std::memcpy(&pixel, bytes, sizeof(pixel)); 101 std::memcpy(&pixel, bytes, sizeof(pixel));
102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), 102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
106/** 106/**
107 * Decode a color stored in RGBA4 format 107 * Decode a color stored in RGBA4 format
108 * @param bytes Pointer to encoded source color 108 * @param bytes Pointer to encoded source color
109 * @return Result color decoded as Math::Vec4<u8> 109 * @return Result color decoded as Common::Vec4<u8>
110 */ 110 */
111inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { 111inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
112 u16_le pixel; 112 u16_le pixel;
113 std::memcpy(&pixel, bytes, sizeof(pixel)); 113 std::memcpy(&pixel, bytes, sizeof(pixel));
114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), 114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
138/** 138/**
139 * Decode a depth value and a stencil value stored in D24S8 format 139 * Decode a depth value and a stencil value stored in D24S8 format
140 * @param bytes Pointer to encoded source values 140 * @param bytes Pointer to encoded source values
141 * @return Resulting values stored as a Math::Vec2 141 * @return Resulting values stored as a Common::Vec2
142 */ 142 */
143inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { 143inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; 144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
145} 145}
146 146
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
149 * @param color Source color to encode 149 * @param color Source color to encode
150 * @param bytes Destination pointer to store encoded color 150 * @param bytes Destination pointer to store encoded color
151 */ 151 */
152inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { 152inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
153 bytes[3] = color.r(); 153 bytes[3] = color.r();
154 bytes[2] = color.g(); 154 bytes[2] = color.g();
155 bytes[1] = color.b(); 155 bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
161 * @param color Source color to encode 161 * @param color Source color to encode
162 * @param bytes Destination pointer to store encoded color 162 * @param bytes Destination pointer to store encoded color
163 */ 163 */
164inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { 164inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
165 bytes[2] = color.r(); 165 bytes[2] = color.r();
166 bytes[1] = color.g(); 166 bytes[1] = color.g();
167 bytes[0] = color.b(); 167 bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
172 * @param color Source color to encode 172 * @param color Source color to encode
173 * @param bytes Destination pointer to store encoded color 173 * @param bytes Destination pointer to store encoded color
174 */ 174 */
175inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { 175inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
176 bytes[1] = color.r(); 176 bytes[1] = color.r();
177 bytes[0] = color.g(); 177 bytes[0] = color.g();
178} 178}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
181 * @param color Source color to encode 181 * @param color Source color to encode
182 * @param bytes Destination pointer to store encoded color 182 * @param bytes Destination pointer to store encoded color
183 */ 183 */
184inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { 184inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
185 const u16_le data = 185 const u16_le data =
186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); 186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
187 187
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
193 * @param color Source color to encode 193 * @param color Source color to encode
194 * @param bytes Destination pointer to store encoded color 194 * @param bytes Destination pointer to store encoded color
195 */ 195 */
196inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { 196inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | 197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); 198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
199 199
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
205 * @param color Source color to encode 205 * @param color Source color to encode
206 * @param bytes Destination pointer to store encoded color 206 * @param bytes Destination pointer to store encoded color
207 */ 207 */
208inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { 208inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | 209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
211 211
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index a5e031189..4462ff3fb 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,10 +39,10 @@ public:
39 Impl(Impl const&) = delete; 39 Impl(Impl const&) = delete;
40 const Impl& operator=(Impl const&) = delete; 40 const Impl& operator=(Impl const&) = delete;
41 41
42 void PushEntry(Entry e) { 42 void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
43 std::lock_guard<std::mutex> lock(message_mutex); 43 const char* function, std::string message) {
44 message_queue.Push(std::move(e)); 44 message_queue.Push(
45 message_cv.notify_one(); 45 CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
46 } 46 }
47 47
48 void AddBackend(std::unique_ptr<Backend> backend) { 48 void AddBackend(std::unique_ptr<Backend> backend) {
@@ -86,15 +86,13 @@ private:
86 } 86 }
87 }; 87 };
88 while (true) { 88 while (true) {
89 { 89 entry = message_queue.PopWait();
90 std::unique_lock<std::mutex> lock(message_mutex); 90 if (entry.final_entry) {
91 message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
92 }
93 if (!running) {
94 break; 91 break;
95 } 92 }
96 write_logs(entry); 93 write_logs(entry);
97 } 94 }
95
98 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case 96 // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
99 // where a system is repeatedly spamming logs even on close. 97 // where a system is repeatedly spamming logs even on close.
100 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100; 98 const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
106 } 104 }
107 105
108 ~Impl() { 106 ~Impl() {
109 running = false; 107 Entry entry;
110 message_cv.notify_one(); 108 entry.final_entry = true;
109 message_queue.Push(entry);
111 backend_thread.join(); 110 backend_thread.join();
112 } 111 }
113 112
114 std::atomic_bool running{true}; 113 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
115 std::mutex message_mutex, writing_mutex; 114 const char* function, std::string message) const {
116 std::condition_variable message_cv; 115 using std::chrono::duration_cast;
116 using std::chrono::steady_clock;
117
118 Entry entry;
119 entry.timestamp =
120 duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
121 entry.log_class = log_class;
122 entry.log_level = log_level;
123 entry.filename = Common::TrimSourcePath(filename);
124 entry.line_num = line_nr;
125 entry.function = function;
126 entry.message = std::move(message);
127
128 return entry;
129 }
130
131 std::mutex writing_mutex;
117 std::thread backend_thread; 132 std::thread backend_thread;
118 std::vector<std::unique_ptr<Backend>> backends; 133 std::vector<std::unique_ptr<Backend>> backends;
119 Common::MPSCQueue<Log::Entry> message_queue; 134 Common::MPSCQueue<Log::Entry> message_queue;
120 Filter filter; 135 Filter filter;
136 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
121}; 137};
122 138
123void ConsoleBackend::Write(const Entry& entry) { 139void ConsoleBackend::Write(const Entry& entry) {
@@ -276,25 +292,6 @@ const char* GetLevelName(Level log_level) {
276#undef LVL 292#undef LVL
277} 293}
278 294
279Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
280 const char* function, std::string message) {
281 using std::chrono::duration_cast;
282 using std::chrono::steady_clock;
283
284 static steady_clock::time_point time_origin = steady_clock::now();
285
286 Entry entry;
287 entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
288 entry.log_class = log_class;
289 entry.log_level = log_level;
290 entry.filename = Common::TrimSourcePath(filename);
291 entry.line_num = line_nr;
292 entry.function = function;
293 entry.message = std::move(message);
294
295 return entry;
296}
297
298void SetGlobalFilter(const Filter& filter) { 295void SetGlobalFilter(const Filter& filter) {
299 Impl::Instance().SetGlobalFilter(filter); 296 Impl::Instance().SetGlobalFilter(filter);
300} 297}
@@ -319,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
319 if (!filter.CheckMessage(log_class, log_level)) 316 if (!filter.CheckMessage(log_class, log_level))
320 return; 317 return;
321 318
322 Entry entry = 319 instance.PushEntry(log_class, log_level, filename, line_num, function,
323 CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); 320 fmt::vformat(format, args));
324
325 instance.PushEntry(std::move(entry));
326} 321}
327} // namespace Log 322} // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index 91bb0c309..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
27 unsigned int line_num; 27 unsigned int line_num;
28 std::string function; 28 std::string function;
29 std::string message; 29 std::string message;
30 bool final_entry = false;
30 31
31 Entry() = default; 32 Entry() = default;
32 Entry(Entry&& o) = default; 33 Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
134 */ 135 */
135const char* GetLevelName(Level log_level); 136const char* GetLevelName(Level log_level);
136 137
137/// Creates a log entry by formatting the given source location, and message.
138Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
139 const char* function, std::string message);
140
141/** 138/**
142 * The global filter will prevent any messages from even being processed if they are filtered. Each 139 * The global filter will prevent any messages from even being processed if they are filtered. Each
143 * backend can have a filter, but if the level is lower than the global filter, the backend will 140 * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
7#include <cstdlib> 7#include <cstdlib>
8#include <type_traits> 8#include <type_traits>
9 9
10namespace MathUtil { 10namespace Common {
11 11
12constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
13 13
@@ -41,4 +41,4 @@ struct Rectangle {
41 } 41 }
42}; 42};
43 43
44} // namespace MathUtil 44} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
6 6
7#include "common/vector_math.h" 7#include "common/vector_math.h"
8 8
9namespace Math { 9namespace Common {
10 10
11template <typename T> 11template <typename T>
12class Quaternion { 12class Quaternion {
13public: 13public:
14 Math::Vec3<T> xyz; 14 Vec3<T> xyz;
15 T w{}; 15 T w{};
16 16
17 Quaternion<decltype(-T{})> Inverse() const { 17 Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
38}; 38};
39 39
40template <typename T> 40template <typename T>
41auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { 41auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); 42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
43} 43}
44 44
45inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { 45inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
46 return {axis * std::sin(angle / 2), std::cos(angle / 2)}; 46 return {axis * std::sin(angle / 2), std::cos(angle / 2)};
47} 47}
48 48
49} // namespace Math 49} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index 32af0b6ac..0e219747f 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
28#include <cstring> 28#include <cstring>
29#include "common/common_types.h" 29#include "common/common_types.h"
30 30
31// GCC 4.6+ 31// GCC
32#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 32#ifdef __GNUC__
33 33
34#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) 34#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
35#define COMMON_LITTLE_ENDIAN 1 35#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
38#endif 38#endif
39 39
40// LLVM/clang 40// LLVM/clang
41#elif __clang__ 41#elif defined(__clang__)
42 42
43#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) 43#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
44#define COMMON_LITTLE_ENDIAN 1 44#define COMMON_LITTLE_ENDIAN 1
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index f553efdc9..821e8536a 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -8,6 +8,7 @@
8// single reader, single writer queue 8// single reader, single writer queue
9 9
10#include <atomic> 10#include <atomic>
11#include <condition_variable>
11#include <cstddef> 12#include <cstddef>
12#include <mutex> 13#include <mutex>
13#include <utility> 14#include <utility>
@@ -45,6 +46,7 @@ public:
45 ElementPtr* new_ptr = new ElementPtr(); 46 ElementPtr* new_ptr = new ElementPtr();
46 write_ptr->next.store(new_ptr, std::memory_order_release); 47 write_ptr->next.store(new_ptr, std::memory_order_release);
47 write_ptr = new_ptr; 48 write_ptr = new_ptr;
49 cv.notify_one();
48 50
49 ++size; 51 ++size;
50 } 52 }
@@ -74,6 +76,16 @@ public:
74 return true; 76 return true;
75 } 77 }
76 78
79 T PopWait() {
80 if (Empty()) {
81 std::unique_lock<std::mutex> lock(cv_mutex);
82 cv.wait(lock, [this]() { return !Empty(); });
83 }
84 T t;
85 Pop(t);
86 return t;
87 }
88
77 // not thread-safe 89 // not thread-safe
78 void Clear() { 90 void Clear() {
79 size.store(0); 91 size.store(0);
@@ -101,6 +113,8 @@ private:
101 ElementPtr* write_ptr; 113 ElementPtr* write_ptr;
102 ElementPtr* read_ptr; 114 ElementPtr* read_ptr;
103 std::atomic_size_t size{0}; 115 std::atomic_size_t size{0};
116 std::mutex cv_mutex;
117 std::condition_variable cv;
104}; 118};
105 119
106// a simple thread-safe, 120// a simple thread-safe,
@@ -135,6 +149,10 @@ public:
135 return spsc_queue.Pop(t); 149 return spsc_queue.Pop(t);
136 } 150 }
137 151
152 T PopWait() {
153 return spsc_queue.PopWait();
154 }
155
138 // not thread-safe 156 // not thread-safe
139 void Clear() { 157 void Clear() {
140 spsc_queue.Clear(); 158 spsc_queue.Clear();
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
33#include <cmath> 33#include <cmath>
34#include <type_traits> 34#include <type_traits>
35 35
36namespace Math { 36namespace Common {
37 37
38template <typename T> 38template <typename T>
39class Vec2; 39class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
690 return MakeVec(x, yzw[0], yzw[1], yzw[2]); 690 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
691} 691}
692 692
693} // namespace Math 693} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f61bcd40d..8ccb2d5f0 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -217,6 +217,7 @@ add_library(core STATIC
217 hle/service/audio/audren_u.h 217 hle/service/audio/audren_u.h
218 hle/service/audio/codecctl.cpp 218 hle/service/audio/codecctl.cpp
219 hle/service/audio/codecctl.h 219 hle/service/audio/codecctl.h
220 hle/service/audio/errors.h
220 hle/service/audio/hwopus.cpp 221 hle/service/audio/hwopus.cpp
221 hle/service/audio/hwopus.h 222 hle/service/audio/hwopus.h
222 hle/service/bcat/bcat.cpp 223 hle/service/bcat/bcat.cpp
@@ -400,6 +401,10 @@ add_library(core STATIC
400 hle/service/time/time.h 401 hle/service/time/time.h
401 hle/service/usb/usb.cpp 402 hle/service/usb/usb.cpp
402 hle/service/usb/usb.h 403 hle/service/usb/usb.h
404 hle/service/vi/display/vi_display.cpp
405 hle/service/vi/display/vi_display.h
406 hle/service/vi/layer/vi_layer.cpp
407 hle/service/vi/layer/vi_layer.h
403 hle/service/vi/vi.cpp 408 hle/service/vi/vi.cpp
404 hle/service/vi/vi.h 409 hle/service/vi/vi.h
405 hle/service/vi/vi_m.cpp 410 hle/service/vi/vi_m.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 8aa0932c5..89b3fb418 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
36#include "frontend/applets/software_keyboard.h" 36#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 37#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 38#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 39#include "video_core/gpu_asynch.h"
40#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 41#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 42#include "video_core/video_core.h"
42 43
@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
78 return vfs->OpenFile(path, FileSys::Mode::Read); 79 return vfs->OpenFile(path, FileSys::Mode::Read);
79} 80}
80struct System::Impl { 81struct System::Impl {
82 explicit Impl(System& system) : kernel{system} {}
81 83
82 Cpu& CurrentCpuCore() { 84 Cpu& CurrentCpuCore() {
83 return cpu_core_manager.GetCurrentCore(); 85 return cpu_core_manager.GetCurrentCore();
@@ -95,7 +97,7 @@ struct System::Impl {
95 LOG_DEBUG(HW_Memory, "initialized OK"); 97 LOG_DEBUG(HW_Memory, "initialized OK");
96 98
97 core_timing.Initialize(); 99 core_timing.Initialize();
98 kernel.Initialize(core_timing); 100 kernel.Initialize();
99 101
100 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( 102 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
101 std::chrono::system_clock::now().time_since_epoch()); 103 std::chrono::system_clock::now().time_since_epoch());
@@ -114,7 +116,7 @@ struct System::Impl {
114 if (web_browser == nullptr) 116 if (web_browser == nullptr)
115 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); 117 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
116 118
117 auto main_process = Kernel::Process::Create(kernel, "main"); 119 auto main_process = Kernel::Process::Create(system, "main");
118 kernel.MakeCurrentProcess(main_process.get()); 120 kernel.MakeCurrentProcess(main_process.get());
119 121
120 telemetry_session = std::make_unique<Core::TelemetrySession>(); 122 telemetry_session = std::make_unique<Core::TelemetrySession>();
@@ -128,10 +130,16 @@ struct System::Impl {
128 return ResultStatus::ErrorVideoCore; 130 return ResultStatus::ErrorVideoCore;
129 } 131 }
130 132
131 gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer()); 133 is_powered_on = true;
134
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
137 } else {
138 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
139 }
132 140
133 cpu_core_manager.Initialize(system); 141 cpu_core_manager.Initialize(system);
134 is_powered_on = true; 142
135 LOG_DEBUG(Core, "Initialized OK"); 143 LOG_DEBUG(Core, "Initialized OK");
136 144
137 // Reset counters and set time origin to current frame 145 // Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {
182 190
183 void Shutdown() { 191 void Shutdown() {
184 // Log last frame performance stats 192 // Log last frame performance stats
185 auto perf_results = GetAndResetPerfStats(); 193 const auto perf_results = GetAndResetPerfStats();
186 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 194 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
187 perf_results.emulation_speed * 100.0); 195 perf_results.emulation_speed * 100.0);
188 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 196 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
189 perf_results.game_fps); 197 perf_results.game_fps);
190 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 198 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
191 perf_results.frametime * 1000.0); 199 perf_results.frametime * 1000.0);
192 200
193 is_powered_on = false; 201 is_powered_on = false;
194 202
@@ -265,7 +273,7 @@ struct System::Impl {
265 Core::FrameLimiter frame_limiter; 273 Core::FrameLimiter frame_limiter;
266}; 274};
267 275
268System::System() : impl{std::make_unique<Impl>()} {} 276System::System() : impl{std::make_unique<Impl>(*this)} {}
269System::~System() = default; 277System::~System() = default;
270 278
271Cpu& System::CurrentCpuCore() { 279Cpu& System::CurrentCpuCore() {
diff --git a/src/core/core.h b/src/core/core.h
index d720013f7..ba76a41d8 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
293 return System::GetInstance().CurrentArmInterface(); 293 return System::GetInstance().CurrentArmInterface();
294} 294}
295 295
296inline TelemetrySession& Telemetry() {
297 return System::GetInstance().TelemetrySession();
298}
299
300inline Kernel::Process* CurrentProcess() { 296inline Kernel::Process* CurrentProcess() {
301 return System::GetInstance().CurrentProcess(); 297 return System::GetInstance().CurrentProcess();
302} 298}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 54aa21a3a..1eefed6d0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 CpuBarrier& cpu_barrier, std::size_t core_index) 54 std::size_t core_index)
54 : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
55 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
56#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
57 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
@@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
63 arm_interface = std::make_unique<ARM_Unicorn>(core_timing); 64 arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
64 } 65 }
65 66
66 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
67} 68}
68 69
69Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index e2204c6b0..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
15class Scheduler; 15class Scheduler;
16} 16}
17 17
18namespace Core {
19class System;
20}
21
18namespace Core::Timing { 22namespace Core::Timing {
19class CoreTiming; 23class CoreTiming;
20} 24}
@@ -45,8 +49,8 @@ private:
45 49
46class Cpu { 50class Cpu {
47public: 51public:
48 Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
49 CpuBarrier& cpu_barrier, std::size_t core_index); 53 std::size_t core_index);
50 ~Cpu(); 54 ~Cpu();
51 55
52 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 2ddb3610d..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
32 } 31 }
33 32
34 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index ca12fb4ab..dfac9a4b3 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
398} 398}
399 399
400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) { 400void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
401 std::ifstream file(filename); 401 std::ifstream file;
402 OpenFStream(file, filename, std::ios_base::in);
402 if (!file.is_open()) 403 if (!file.is_open())
403 return; 404 return;
404 405
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 515626658..75fc04302 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
47 if (offset + length > data.size()) 47 if (offset + length > data.size())
48 data.resize(offset + length); 48 data.resize(offset + length);
49 const auto write = std::min(length, data.size() - offset); 49 const auto write = std::min(length, data.size() - offset);
50 std::memcpy(data.data(), data_, write); 50 std::memcpy(data.data() + offset, data_, write);
51 return write; 51 return write;
52} 52}
53 53
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..e29afd630 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); 67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
68} 68}
69 69
70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { 70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
71 new_x = std::max(new_x, framebuffer_layout.screen.left); 71 new_x = std::max(new_x, framebuffer_layout.screen.left);
72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1); 72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
73 73
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 7006a37b3..d0bcb4660 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
166 /** 166 /**
167 * Clip the provided coordinates to be inside the touchscreen area. 167 * Clip the provided coordinates to be inside the touchscreen area.
168 */ 168 */
169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); 169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
170}; 170};
171 171
172} // namespace Core::Frontend 172} // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
12 12
13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio 13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
14template <class T> 14template <class T>
15static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, 15static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
16 float screen_aspect_ratio) { 16 float screen_aspect_ratio) {
17 float scale = std::min(static_cast<float>(window_area.GetWidth()), 17 float scale = std::min(static_cast<float>(window_area.GetWidth()),
18 window_area.GetHeight() / screen_aspect_ratio); 18 window_area.GetHeight() / screen_aspect_ratio);
19 return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), 19 return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
20 static_cast<T>(std::round(scale * screen_aspect_ratio))}; 20 static_cast<T>(std::round(scale * screen_aspect_ratio))};
21} 21}
22 22
23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { 23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
31 ScreenUndocked::Width}; 31 ScreenUndocked::Width};
32 MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; 32 Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
33 MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); 33 Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
34 34
35 float window_aspect_ratio = static_cast<float>(height) / width; 35 float window_aspect_ratio = static_cast<float>(height) / width;
36 36
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
16 unsigned width{ScreenUndocked::Width}; 16 unsigned width{ScreenUndocked::Width};
17 unsigned height{ScreenUndocked::Height}; 17 unsigned height{ScreenUndocked::Height};
18 18
19 MathUtil::Rectangle<unsigned> screen; 19 Common::Rectangle<unsigned> screen;
20 20
21 /** 21 /**
22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked 22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
124 * Orientation is determined by right-hand rule. 124 * Orientation is determined by right-hand rule.
125 * Units: deg/sec 125 * Units: deg/sec
126 */ 126 */
127using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; 127using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
128 128
129/** 129/**
130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are 130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..455d1f346 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 90f276ee8..a1e4be070 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
19#include "core/hle/kernel/hle_ipc.h" 19#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/object.h" 20#include "core/hle/kernel/object.h"
21#include "core/hle/kernel/server_session.h" 21#include "core/hle/kernel/server_session.h"
22#include "core/hle/result.h"
22 23
23namespace IPC { 24namespace IPC {
24 25
26constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
27
25class RequestHelperBase { 28class RequestHelperBase {
26protected: 29protected:
27 Kernel::HLERequestContext* context = nullptr; 30 Kernel::HLERequestContext* context = nullptr;
@@ -350,7 +353,7 @@ public:
350 template <class T> 353 template <class T>
351 std::shared_ptr<T> PopIpcInterface() { 354 std::shared_ptr<T> PopIpcInterface() {
352 ASSERT(context->Session()->IsDomain()); 355 ASSERT(context->Session()->IsDomain());
353 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 356 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 357 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
355 } 358 }
356}; 359};
@@ -362,6 +365,11 @@ inline u32 RequestParser::Pop() {
362 return cmdbuf[index++]; 365 return cmdbuf[index++];
363} 366}
364 367
368template <>
369inline s32 RequestParser::Pop() {
370 return static_cast<s32>(Pop<u32>());
371}
372
365template <typename T> 373template <typename T>
366void RequestParser::PopRaw(T& value) { 374void RequestParser::PopRaw(T& value) {
367 std::memcpy(&value, cmdbuf + index, sizeof(T)); 375 std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -393,6 +401,16 @@ inline u64 RequestParser::Pop() {
393} 401}
394 402
395template <> 403template <>
404inline s8 RequestParser::Pop() {
405 return static_cast<s8>(Pop<u8>());
406}
407
408template <>
409inline s16 RequestParser::Pop() {
410 return static_cast<s16>(Pop<u16>());
411}
412
413template <>
396inline s64 RequestParser::Pop() { 414inline s64 RequestParser::Pop() {
397 return static_cast<s64>(Pop<u64>()); 415 return static_cast<s64>(Pop<u64>());
398} 416}
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 57157beb4..352190da8 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_cpu.h" 11#include "core/core_cpu.h"
12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -18,58 +19,15 @@
18#include "core/memory.h" 19#include "core/memory.h"
19 20
20namespace Kernel { 21namespace Kernel {
21namespace AddressArbiter { 22namespace {
22
23// Performs actual address waiting logic.
24static ResultCode WaitForAddress(VAddr address, s64 timeout) {
25 SharedPtr<Thread> current_thread = GetCurrentThread();
26 current_thread->SetArbiterWaitAddress(address);
27 current_thread->SetStatus(ThreadStatus::WaitArb);
28 current_thread->InvalidateWakeupCallback();
29
30 current_thread->WakeAfterDelay(timeout);
31
32 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
33 return RESULT_TIMEOUT;
34}
35
36// Gets the threads waiting on an address.
37static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
38 const auto RetrieveWaitingThreads = [](std::size_t core_index,
39 std::vector<SharedPtr<Thread>>& waiting_threads,
40 VAddr arb_addr) {
41 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
42 const auto& thread_list = scheduler.GetThreadList();
43
44 for (const auto& thread : thread_list) {
45 if (thread->GetArbiterWaitAddress() == arb_addr)
46 waiting_threads.push_back(thread);
47 }
48 };
49
50 // Retrieve all threads that are waiting for this address.
51 std::vector<SharedPtr<Thread>> threads;
52 RetrieveWaitingThreads(0, threads, address);
53 RetrieveWaitingThreads(1, threads, address);
54 RetrieveWaitingThreads(2, threads, address);
55 RetrieveWaitingThreads(3, threads, address);
56
57 // Sort them by priority, such that the highest priority ones come first.
58 std::sort(threads.begin(), threads.end(),
59 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
60 return lhs->GetPriority() < rhs->GetPriority();
61 });
62
63 return threads;
64}
65
66// Wake up num_to_wake (or all) threads in a vector. 23// Wake up num_to_wake (or all) threads in a vector.
67static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { 24void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
68 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 25 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
69 // them all. 26 // them all.
70 std::size_t last = waiting_threads.size(); 27 std::size_t last = waiting_threads.size();
71 if (num_to_wake > 0) 28 if (num_to_wake > 0) {
72 last = num_to_wake; 29 last = num_to_wake;
30 }
73 31
74 // Signal the waiting threads. 32 // Signal the waiting threads.
75 for (std::size_t i = 0; i < last; i++) { 33 for (std::size_t i = 0; i < last; i++) {
@@ -79,42 +37,55 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
79 waiting_threads[i]->ResumeFromWait(); 37 waiting_threads[i]->ResumeFromWait();
80 } 38 }
81} 39}
40} // Anonymous namespace
41
42AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
43AddressArbiter::~AddressArbiter() = default;
44
45ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
46 s32 num_to_wake) {
47 switch (type) {
48 case SignalType::Signal:
49 return SignalToAddressOnly(address, num_to_wake);
50 case SignalType::IncrementAndSignalIfEqual:
51 return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
52 case SignalType::ModifyByWaitingCountAndSignalIfEqual:
53 return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
54 default:
55 return ERR_INVALID_ENUM_VALUE;
56 }
57}
82 58
83// Signals an address being waited on. 59ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
84ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { 60 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
85 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
86
87 WakeThreads(waiting_threads, num_to_wake); 61 WakeThreads(waiting_threads, num_to_wake);
88 return RESULT_SUCCESS; 62 return RESULT_SUCCESS;
89} 63}
90 64
91// Signals an address being waited on and increments its value if equal to the value argument. 65ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
92ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { 66 s32 num_to_wake) {
93 // Ensure that we can write to the address. 67 // Ensure that we can write to the address.
94 if (!Memory::IsValidVirtualAddress(address)) { 68 if (!Memory::IsValidVirtualAddress(address)) {
95 return ERR_INVALID_ADDRESS_STATE; 69 return ERR_INVALID_ADDRESS_STATE;
96 } 70 }
97 71
98 if (static_cast<s32>(Memory::Read32(address)) == value) { 72 if (static_cast<s32>(Memory::Read32(address)) != value) {
99 Memory::Write32(address, static_cast<u32>(value + 1));
100 } else {
101 return ERR_INVALID_STATE; 73 return ERR_INVALID_STATE;
102 } 74 }
103 75
104 return SignalToAddress(address, num_to_wake); 76 Memory::Write32(address, static_cast<u32>(value + 1));
77 return SignalToAddressOnly(address, num_to_wake);
105} 78}
106 79
107// Signals an address being waited on and modifies its value based on waiting thread count if equal 80ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
108// to the value argument. 81 s32 num_to_wake) {
109ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
110 s32 num_to_wake) {
111 // Ensure that we can write to the address. 82 // Ensure that we can write to the address.
112 if (!Memory::IsValidVirtualAddress(address)) { 83 if (!Memory::IsValidVirtualAddress(address)) {
113 return ERR_INVALID_ADDRESS_STATE; 84 return ERR_INVALID_ADDRESS_STATE;
114 } 85 }
115 86
116 // Get threads waiting on the address. 87 // Get threads waiting on the address.
117 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); 88 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
118 89
119 // Determine the modified value depending on the waiting count. 90 // Determine the modified value depending on the waiting count.
120 s32 updated_value; 91 s32 updated_value;
@@ -126,41 +97,54 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu
126 updated_value = value; 97 updated_value = value;
127 } 98 }
128 99
129 if (static_cast<s32>(Memory::Read32(address)) == value) { 100 if (static_cast<s32>(Memory::Read32(address)) != value) {
130 Memory::Write32(address, static_cast<u32>(updated_value));
131 } else {
132 return ERR_INVALID_STATE; 101 return ERR_INVALID_STATE;
133 } 102 }
134 103
104 Memory::Write32(address, static_cast<u32>(updated_value));
135 WakeThreads(waiting_threads, num_to_wake); 105 WakeThreads(waiting_threads, num_to_wake);
136 return RESULT_SUCCESS; 106 return RESULT_SUCCESS;
137} 107}
138 108
139// Waits on an address if the value passed is less than the argument value, optionally decrementing. 109ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
140ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { 110 s64 timeout_ns) {
111 switch (type) {
112 case ArbitrationType::WaitIfLessThan:
113 return WaitForAddressIfLessThan(address, value, timeout_ns, false);
114 case ArbitrationType::DecrementAndWaitIfLessThan:
115 return WaitForAddressIfLessThan(address, value, timeout_ns, true);
116 case ArbitrationType::WaitIfEqual:
117 return WaitForAddressIfEqual(address, value, timeout_ns);
118 default:
119 return ERR_INVALID_ENUM_VALUE;
120 }
121}
122
123ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
124 bool should_decrement) {
141 // Ensure that we can read the address. 125 // Ensure that we can read the address.
142 if (!Memory::IsValidVirtualAddress(address)) { 126 if (!Memory::IsValidVirtualAddress(address)) {
143 return ERR_INVALID_ADDRESS_STATE; 127 return ERR_INVALID_ADDRESS_STATE;
144 } 128 }
145 129
146 s32 cur_value = static_cast<s32>(Memory::Read32(address)); 130 const s32 cur_value = static_cast<s32>(Memory::Read32(address));
147 if (cur_value < value) { 131 if (cur_value >= value) {
148 if (should_decrement) {
149 Memory::Write32(address, static_cast<u32>(cur_value - 1));
150 }
151 } else {
152 return ERR_INVALID_STATE; 132 return ERR_INVALID_STATE;
153 } 133 }
134
135 if (should_decrement) {
136 Memory::Write32(address, static_cast<u32>(cur_value - 1));
137 }
138
154 // Short-circuit without rescheduling, if timeout is zero. 139 // Short-circuit without rescheduling, if timeout is zero.
155 if (timeout == 0) { 140 if (timeout == 0) {
156 return RESULT_TIMEOUT; 141 return RESULT_TIMEOUT;
157 } 142 }
158 143
159 return WaitForAddress(address, timeout); 144 return WaitForAddressImpl(address, timeout);
160} 145}
161 146
162// Waits on an address if the value passed is equal to the argument value. 147ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
164 // Ensure that we can read the address. 148 // Ensure that we can read the address.
165 if (!Memory::IsValidVirtualAddress(address)) { 149 if (!Memory::IsValidVirtualAddress(address)) {
166 return ERR_INVALID_ADDRESS_STATE; 150 return ERR_INVALID_ADDRESS_STATE;
@@ -174,7 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
174 return RESULT_TIMEOUT; 158 return RESULT_TIMEOUT;
175 } 159 }
176 160
177 return WaitForAddress(address, timeout); 161 return WaitForAddressImpl(address, timeout);
162}
163
164ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
165 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
166 current_thread->SetArbiterWaitAddress(address);
167 current_thread->SetStatus(ThreadStatus::WaitArb);
168 current_thread->InvalidateWakeupCallback();
169
170 current_thread->WakeAfterDelay(timeout);
171
172 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
173 return RESULT_TIMEOUT;
174}
175
176std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
177 const auto RetrieveWaitingThreads = [this](std::size_t core_index,
178 std::vector<SharedPtr<Thread>>& waiting_threads,
179 VAddr arb_addr) {
180 const auto& scheduler = system.Scheduler(core_index);
181 const auto& thread_list = scheduler.GetThreadList();
182
183 for (const auto& thread : thread_list) {
184 if (thread->GetArbiterWaitAddress() == arb_addr) {
185 waiting_threads.push_back(thread);
186 }
187 }
188 };
189
190 // Retrieve all threads that are waiting for this address.
191 std::vector<SharedPtr<Thread>> threads;
192 RetrieveWaitingThreads(0, threads, address);
193 RetrieveWaitingThreads(1, threads, address);
194 RetrieveWaitingThreads(2, threads, address);
195 RetrieveWaitingThreads(3, threads, address);
196
197 // Sort them by priority, such that the highest priority ones come first.
198 std::sort(threads.begin(), threads.end(),
199 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
200 return lhs->GetPriority() < rhs->GetPriority();
201 });
202
203 return threads;
178} 204}
179} // namespace AddressArbiter
180} // namespace Kernel 205} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index e3657b8e9..ed0d0e69f 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,31 +4,77 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/object.h"
8 11
9union ResultCode; 12union ResultCode;
10 13
14namespace Core {
15class System;
16}
17
11namespace Kernel { 18namespace Kernel {
12 19
13namespace AddressArbiter { 20class Thread;
14enum class ArbitrationType {
15 WaitIfLessThan = 0,
16 DecrementAndWaitIfLessThan = 1,
17 WaitIfEqual = 2,
18};
19 21
20enum class SignalType { 22class AddressArbiter {
21 Signal = 0, 23public:
22 IncrementAndSignalIfEqual = 1, 24 enum class ArbitrationType {
23 ModifyByWaitingCountAndSignalIfEqual = 2, 25 WaitIfLessThan = 0,
24}; 26 DecrementAndWaitIfLessThan = 1,
27 WaitIfEqual = 2,
28 };
29
30 enum class SignalType {
31 Signal = 0,
32 IncrementAndSignalIfEqual = 1,
33 ModifyByWaitingCountAndSignalIfEqual = 2,
34 };
35
36 explicit AddressArbiter(Core::System& system);
37 ~AddressArbiter();
38
39 AddressArbiter(const AddressArbiter&) = delete;
40 AddressArbiter& operator=(const AddressArbiter&) = delete;
41
42 AddressArbiter(AddressArbiter&&) = default;
43 AddressArbiter& operator=(AddressArbiter&&) = delete;
44
45 /// Signals an address being waited on with a particular signaling type.
46 ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
25 47
26ResultCode SignalToAddress(VAddr address, s32 num_to_wake); 48 /// Waits on an address with a particular arbitration type.
27ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); 49 ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
28ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
29 50
30ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); 51private:
31ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 52 /// Signals an address being waited on.
32} // namespace AddressArbiter 53 ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
54
55 /// Signals an address being waited on and increments its value if equal to the value argument.
56 ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
57
58 /// Signals an address being waited on and modifies its value based on waiting thread count if
59 /// equal to the value argument.
60 ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
61 s32 num_to_wake);
62
63 /// Waits on an address if the value passed is less than the argument value,
64 /// optionally decrementing.
65 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
66 bool should_decrement);
67
68 /// Waits on an address if the value passed is equal to the argument value.
69 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
70
71 // Waits on the given address with a timeout in nanoseconds
72 ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
73
74 // Gets the threads waiting on an address.
75 std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
76
77 Core::System& system;
78};
33 79
34} // namespace Kernel 80} // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index d4c91d529..aa432658e 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,10 +33,11 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler. 33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); 34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
35 35
36 if (server_port->hle_handler) 36 if (server_port->HasHLEHandler()) {
37 server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); 37 server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
38 else 38 } else {
39 server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions)); 39 server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
40 }
40 41
41 // Wake the threads waiting on the ServerPort 42 // Wake the threads waiting on the ServerPort
42 server_port->WakeupAllWaitingThreads(); 43 server_port->WakeupAllWaitingThreads();
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..b1f39aad7 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
17constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; 18constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
18constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; 19constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
19constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; 20constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
16constexpr u16 GetSlot(Handle handle) { 16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15; 17 return static_cast<u16>(handle >> 15);
18} 18}
19 19
20constexpr u16 GetGeneration(Handle handle) { 20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF; 21 return static_cast<u16>(handle & 0x7FFF);
22} 22}
23} // Anonymous namespace 23} // Anonymous namespace
24 24
25HandleTable::HandleTable() { 25HandleTable::HandleTable() {
26 next_generation = 1;
27 Clear(); 26 Clear();
28} 27}
29 28
30HandleTable::~HandleTable() = default; 29HandleTable::~HandleTable() = default;
31 30
31ResultCode HandleTable::SetSize(s32 handle_table_size) {
32 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
33 return ERR_OUT_OF_MEMORY;
34 }
35
36 // Values less than or equal to zero indicate to use the maximum allowable
37 // size for the handle table in the actual kernel, so we ignore the given
38 // value in that case, since we assume this by default unless this function
39 // is called.
40 if (handle_table_size > 0) {
41 table_size = static_cast<u16>(handle_table_size);
42 }
43
44 return RESULT_SUCCESS;
45}
46
32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 47ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
33 DEBUG_ASSERT(obj != nullptr); 48 DEBUG_ASSERT(obj != nullptr);
34 49
35 u16 slot = next_free_slot; 50 const u16 slot = next_free_slot;
36 if (slot >= generations.size()) { 51 if (slot >= table_size) {
37 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 52 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
38 return ERR_HANDLE_TABLE_FULL; 53 return ERR_HANDLE_TABLE_FULL;
39 } 54 }
40 next_free_slot = generations[slot]; 55 next_free_slot = generations[slot];
41 56
42 u16 generation = next_generation++; 57 const u16 generation = next_generation++;
43 58
44 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. 59 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
45 // Horizon OS uses zero to represent an invalid handle, so skip to 1. 60 // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
64} 79}
65 80
66ResultCode HandleTable::Close(Handle handle) { 81ResultCode HandleTable::Close(Handle handle) {
67 if (!IsValid(handle)) 82 if (!IsValid(handle)) {
68 return ERR_INVALID_HANDLE; 83 return ERR_INVALID_HANDLE;
84 }
69 85
70 u16 slot = GetSlot(handle); 86 const u16 slot = GetSlot(handle);
71 87
72 objects[slot] = nullptr; 88 objects[slot] = nullptr;
73 89
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
77} 93}
78 94
79bool HandleTable::IsValid(Handle handle) const { 95bool HandleTable::IsValid(Handle handle) const {
80 std::size_t slot = GetSlot(handle); 96 const std::size_t slot = GetSlot(handle);
81 u16 generation = GetGeneration(handle); 97 const u16 generation = GetGeneration(handle);
82 98
83 return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; 99 return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
84} 100}
85 101
86SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { 102SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
97} 113}
98 114
99void HandleTable::Clear() { 115void HandleTable::Clear() {
100 for (u16 i = 0; i < MAX_COUNT; ++i) { 116 for (u16 i = 0; i < table_size; ++i) {
101 generations[i] = i + 1; 117 generations[i] = i + 1;
102 objects[i] = nullptr; 118 objects[i] = nullptr;
103 } 119 }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
50 ~HandleTable(); 50 ~HandleTable();
51 51
52 /** 52 /**
53 * Sets the number of handles that may be in use at one time
54 * for this handle table.
55 *
56 * @param handle_table_size The desired size to limit the handle table to.
57 *
58 * @returns an error code indicating if initialization was successful.
59 * If initialization was not successful, then ERR_OUT_OF_MEMORY
60 * will be returned.
61 *
62 * @pre handle_table_size must be within the range [0, 1024]
63 */
64 ResultCode SetSize(s32 handle_table_size);
65
66 /**
53 * Allocates a handle for the given object. 67 * Allocates a handle for the given object.
54 * @return The created Handle or one of the following errors: 68 * @return The created Handle or one of the following errors:
55 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. 69 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
104 std::array<u16, MAX_COUNT> generations; 118 std::array<u16, MAX_COUNT> generations;
105 119
106 /** 120 /**
121 * The limited size of the handle table. This can be specified by process
122 * capabilities in order to restrict the overall number of handles that
123 * can be created in a process instance
124 */
125 u16 table_size = static_cast<u16>(MAX_COUNT);
126
127 /**
107 * Global counter of the number of created handles. Stored in `generations` when a handle is 128 * Global counter of the number of created handles. Stored in `generations` when a handle is
108 * created, and wraps around to 1 when it hits 0x8000. 129 * created, and wraps around to 1 when it hits 0x8000.
109 */ 130 */
110 u16 next_generation; 131 u16 next_generation = 1;
111 132
112 /// Head of the free slots linked list. 133 /// Head of the free slots linked list.
113 u16 next_free_slot; 134 u16 next_free_slot = 0;
114}; 135};
115 136
116} // namespace Kernel 137} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index dd749eed4..4d224d01d 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@
12 12
13#include "core/core.h" 13#include "core/core.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/client_port.h" 16#include "core/hle/kernel/client_port.h"
16#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
86} 87}
87 88
88struct KernelCore::Impl { 89struct KernelCore::Impl {
89 void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) { 90 explicit Impl(Core::System& system) : system{system} {}
91
92 void Initialize(KernelCore& kernel) {
90 Shutdown(); 93 Shutdown();
91 94
92 InitializeSystemResourceLimit(kernel); 95 InitializeSystemResourceLimit(kernel);
93 InitializeThreads(core_timing); 96 InitializeThreads();
94 } 97 }
95 98
96 void Shutdown() { 99 void Shutdown() {
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
122 ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); 125 ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
123 } 126 }
124 127
125 void InitializeThreads(Core::Timing::CoreTiming& core_timing) { 128 void InitializeThreads() {
126 thread_wakeup_event_type = 129 thread_wakeup_event_type =
127 core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); 130 system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
128 } 131 }
129 132
130 std::atomic<u32> next_object_id{0}; 133 std::atomic<u32> next_object_id{0};
@@ -145,15 +148,18 @@ struct KernelCore::Impl {
145 /// Map of named ports managed by the kernel, which can be retrieved using 148 /// Map of named ports managed by the kernel, which can be retrieved using
146 /// the ConnectToPort SVC. 149 /// the ConnectToPort SVC.
147 NamedPortTable named_ports; 150 NamedPortTable named_ports;
151
152 // System context
153 Core::System& system;
148}; 154};
149 155
150KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} 156KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
151KernelCore::~KernelCore() { 157KernelCore::~KernelCore() {
152 Shutdown(); 158 Shutdown();
153} 159}
154 160
155void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) { 161void KernelCore::Initialize() {
156 impl->Initialize(*this, core_timing); 162 impl->Initialize(*this);
157} 163}
158 164
159void KernelCore::Shutdown() { 165void KernelCore::Shutdown() {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 154bced42..ff17ff865 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,6 +11,10 @@
11template <typename T> 11template <typename T>
12class ResultVal; 12class ResultVal;
13 13
14namespace Core {
15class System;
16}
17
14namespace Core::Timing { 18namespace Core::Timing {
15class CoreTiming; 19class CoreTiming;
16struct EventType; 20struct EventType;
@@ -18,6 +22,7 @@ struct EventType;
18 22
19namespace Kernel { 23namespace Kernel {
20 24
25class AddressArbiter;
21class ClientPort; 26class ClientPort;
22class HandleTable; 27class HandleTable;
23class Process; 28class Process;
@@ -30,7 +35,14 @@ private:
30 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; 35 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
31 36
32public: 37public:
33 KernelCore(); 38 /// Constructs an instance of the kernel using the given System
39 /// instance as a context for any necessary system-related state,
40 /// such as threads, CPU core state, etc.
41 ///
42 /// @post After execution of the constructor, the provided System
43 /// object *must* outlive the kernel instance itself.
44 ///
45 explicit KernelCore(Core::System& system);
34 ~KernelCore(); 46 ~KernelCore();
35 47
36 KernelCore(const KernelCore&) = delete; 48 KernelCore(const KernelCore&) = delete;
@@ -40,11 +52,7 @@ public:
40 KernelCore& operator=(KernelCore&&) = delete; 52 KernelCore& operator=(KernelCore&&) = delete;
41 53
42 /// Resets the kernel to a clean slate for use. 54 /// Resets the kernel to a clean slate for use.
43 /// 55 void Initialize();
44 /// @param core_timing CoreTiming instance used to create any necessary
45 /// kernel-specific callback events.
46 ///
47 void Initialize(Core::Timing::CoreTiming& core_timing);
48 56
49 /// Clears all resources in use by the kernel instance. 57 /// Clears all resources in use by the kernel instance.
50 void Shutdown(); 58 void Shutdown();
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..49fced7b1 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -53,9 +53,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
53CodeSet::CodeSet() = default; 53CodeSet::CodeSet() = default;
54CodeSet::~CodeSet() = default; 54CodeSet::~CodeSet() = default;
55 55
56SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) { 56SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
57 SharedPtr<Process> process(new Process(kernel)); 57 auto& kernel = system.Kernel();
58 58
59 SharedPtr<Process> process(new Process(system));
59 process->name = std::move(name); 60 process->name = std::move(name);
60 process->resource_limit = kernel.GetSystemResourceLimit(); 61 process->resource_limit = kernel.GetSystemResourceLimit();
61 process->status = ProcessStatus::Created; 62 process->status = ProcessStatus::Created;
@@ -99,7 +100,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
99 vm_manager.Reset(metadata.GetAddressSpaceType()); 100 vm_manager.Reset(metadata.GetAddressSpaceType());
100 101
101 const auto& caps = metadata.GetKernelCapabilities(); 102 const auto& caps = metadata.GetKernelCapabilities();
102 return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); 103 const auto capability_init_result =
104 capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
105 if (capability_init_result.IsError()) {
106 return capability_init_result;
107 }
108
109 return handle_table.SetSize(capabilities.GetHandleTableSize());
103} 110}
104 111
105void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { 112void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
@@ -126,7 +133,7 @@ void Process::PrepareForTermination() {
126 if (thread->GetOwnerProcess() != this) 133 if (thread->GetOwnerProcess() != this)
127 continue; 134 continue;
128 135
129 if (thread == GetCurrentThread()) 136 if (thread == system.CurrentScheduler().GetCurrentThread())
130 continue; 137 continue;
131 138
132 // TODO(Subv): When are the other running/ready threads terminated? 139 // TODO(Subv): When are the other running/ready threads terminated?
@@ -138,7 +145,6 @@ void Process::PrepareForTermination() {
138 } 145 }
139 }; 146 };
140 147
141 const auto& system = Core::System::GetInstance();
142 stop_threads(system.Scheduler(0).GetThreadList()); 148 stop_threads(system.Scheduler(0).GetThreadList());
143 stop_threads(system.Scheduler(1).GetThreadList()); 149 stop_threads(system.Scheduler(1).GetThreadList());
144 stop_threads(system.Scheduler(2).GetThreadList()); 150 stop_threads(system.Scheduler(2).GetThreadList());
@@ -221,14 +227,12 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
221 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable); 227 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);
222 228
223 // Clear instruction cache in CPU JIT 229 // Clear instruction cache in CPU JIT
224 Core::System::GetInstance().ArmInterface(0).ClearInstructionCache(); 230 system.InvalidateCpuInstructionCaches();
225 Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
226 Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
227 Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
228} 231}
229 232
230Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} 233Process::Process(Core::System& system)
231Kernel::Process::~Process() {} 234 : WaitObject{system.Kernel()}, address_arbiter{system}, system{system} {}
235Process::~Process() = default;
232 236
233void Process::Acquire(Thread* thread) { 237void Process::Acquire(Thread* thread) {
234 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); 238 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dcc57ae9f..47ffd4ad3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -12,12 +12,17 @@
12#include <vector> 12#include <vector>
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/handle_table.h" 16#include "core/hle/kernel/handle_table.h"
16#include "core/hle/kernel/process_capability.h" 17#include "core/hle/kernel/process_capability.h"
17#include "core/hle/kernel/vm_manager.h" 18#include "core/hle/kernel/vm_manager.h"
18#include "core/hle/kernel/wait_object.h" 19#include "core/hle/kernel/wait_object.h"
19#include "core/hle/result.h" 20#include "core/hle/result.h"
20 21
22namespace Core {
23class System;
24}
25
21namespace FileSys { 26namespace FileSys {
22class ProgramMetadata; 27class ProgramMetadata;
23} 28}
@@ -116,7 +121,7 @@ public:
116 121
117 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; 122 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
118 123
119 static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name); 124 static SharedPtr<Process> Create(Core::System& system, std::string&& name);
120 125
121 std::string GetTypeName() const override { 126 std::string GetTypeName() const override {
122 return "Process"; 127 return "Process";
@@ -150,6 +155,16 @@ public:
150 return handle_table; 155 return handle_table;
151 } 156 }
152 157
158 /// Gets a reference to the process' address arbiter.
159 AddressArbiter& GetAddressArbiter() {
160 return address_arbiter;
161 }
162
163 /// Gets a const reference to the process' address arbiter.
164 const AddressArbiter& GetAddressArbiter() const {
165 return address_arbiter;
166 }
167
153 /// Gets the current status of the process 168 /// Gets the current status of the process
154 ProcessStatus GetStatus() const { 169 ProcessStatus GetStatus() const {
155 return status; 170 return status;
@@ -251,7 +266,7 @@ public:
251 void FreeTLSSlot(VAddr tls_address); 266 void FreeTLSSlot(VAddr tls_address);
252 267
253private: 268private:
254 explicit Process(KernelCore& kernel); 269 explicit Process(Core::System& system);
255 ~Process() override; 270 ~Process() override;
256 271
257 /// Checks if the specified thread should wait until this process is available. 272 /// Checks if the specified thread should wait until this process is available.
@@ -309,9 +324,16 @@ private:
309 /// Per-process handle table for storing created object handles in. 324 /// Per-process handle table for storing created object handles in.
310 HandleTable handle_table; 325 HandleTable handle_table;
311 326
327 /// Per-process address arbiter.
328 AddressArbiter address_arbiter;
329
312 /// Random values for svcGetInfo RandomEntropy 330 /// Random values for svcGetInfo RandomEntropy
313 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; 331 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
314 332
333 /// System context
334 Core::System& system;
335
336 /// Name of this process
315 std::string name; 337 std::string name;
316}; 338};
317 339
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
96 interrupt_capabilities.set(); 96 interrupt_capabilities.set();
97 97
98 // Allow using the maximum possible amount of handles 98 // Allow using the maximum possible amount of handles
99 handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); 99 handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
100 100
101 // Allow all debugging capabilities. 101 // Allow all debugging capabilities.
102 is_debuggable = true; 102 is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
337 return ERR_RESERVED_VALUE; 337 return ERR_RESERVED_VALUE;
338 } 338 }
339 339
340 handle_table_size = (flags >> 16) & 0x3FF; 340 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
341 return RESULT_SUCCESS; 341 return RESULT_SUCCESS;
342} 342}
343 343
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
156 } 156 }
157 157
158 /// Gets the number of total allowable handles for the process' handle table. 158 /// Gets the number of total allowable handles for the process' handle table.
159 u32 GetHandleTableSize() const { 159 s32 GetHandleTableSize() const {
160 return handle_table_size; 160 return handle_table_size;
161 } 161 }
162 162
@@ -252,7 +252,7 @@ private:
252 u64 core_mask = 0; 252 u64 core_mask = 0;
253 u64 priority_mask = 0; 253 u64 priority_mask = 0;
254 254
255 u32 handle_table_size = 0; 255 s32 handle_table_size = 0;
256 u32 kernel_version = 0; 256 u32 kernel_version = 0;
257 257
258 ProgramType program_type = ProgramType::SysModule; 258 ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 44f30d070..e524509df 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {
61 62
62void Scheduler::SwitchContext(Thread* new_thread) { 63void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 64 Thread* const previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 65 Process* const previous_process = system.Kernel().CurrentProcess();
65 66
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 67 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 68
@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
94 95
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 96 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 97 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 98 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); 99 SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
99 } 100 }
100 101
101 cpu_core.LoadContext(new_thread->GetContext()); 102 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 112
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 113void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 114 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); 115 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 116 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 117
117 if (thread != nullptr) { 118 if (thread != nullptr) {
@@ -198,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
198 ASSERT(thread->GetPriority() < THREADPRIO_COUNT); 199 ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
199 200
200 // Yield this thread -- sleep for zero time and force reschedule to different thread 201 // Yield this thread -- sleep for zero time and force reschedule to different thread
201 WaitCurrentThread_Sleep(); 202 GetCurrentThread()->Sleep(0);
202 GetCurrentThread()->WakeAfterDelay(0);
203} 203}
204 204
205void Scheduler::YieldWithLoadBalancing(Thread* thread) { 205void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
214 ASSERT(priority < THREADPRIO_COUNT); 214 ASSERT(priority < THREADPRIO_COUNT);
215 215
216 // Sleep for zero time to be able to force reschedule to different thread 216 // Sleep for zero time to be able to force reschedule to different thread
217 WaitCurrentThread_Sleep(); 217 GetCurrentThread()->Sleep(0);
218 GetCurrentThread()->WakeAfterDelay(0);
219 218
220 Thread* suggested_thread = nullptr; 219 Thread* suggested_thread = nullptr;
221 220
@@ -223,8 +222,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 222 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 223 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 224 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 225 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 226
229 // If scheduler provides a suggested thread 227 // If scheduler provides a suggested thread
230 if (res != nullptr) { 228 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..1c5bf57d9 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index d6ceeb2da..0e1515c89 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,6 +26,10 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
26 return MakeResult(std::move(session)); 26 return MakeResult(std::move(session));
27} 27}
28 28
29void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
30 pending_sessions.push_back(std::move(pending_session));
31}
32
29bool ServerPort::ShouldWait(Thread* thread) const { 33bool ServerPort::ShouldWait(Thread* thread) const {
30 // If there are no pending sessions, we wait until a new one is added. 34 // If there are no pending sessions, we wait until a new one is added.
31 return pending_sessions.empty(); 35 return pending_sessions.empty();
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index e52f8245f..9bc667cf2 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -22,6 +22,8 @@ class SessionRequestHandler;
22 22
23class ServerPort final : public WaitObject { 23class ServerPort final : public WaitObject {
24public: 24public:
25 using HLEHandler = std::shared_ptr<SessionRequestHandler>;
26
25 /** 27 /**
26 * Creates a pair of ServerPort and an associated ClientPort. 28 * Creates a pair of ServerPort and an associated ClientPort.
27 * 29 *
@@ -51,22 +53,27 @@ public:
51 */ 53 */
52 ResultVal<SharedPtr<ServerSession>> Accept(); 54 ResultVal<SharedPtr<ServerSession>> Accept();
53 55
56 /// Whether or not this server port has an HLE handler available.
57 bool HasHLEHandler() const {
58 return hle_handler != nullptr;
59 }
60
61 /// Gets the HLE handler for this port.
62 HLEHandler GetHLEHandler() const {
63 return hle_handler;
64 }
65
54 /** 66 /**
55 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port 67 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
56 * will inherit a reference to this handler. 68 * will inherit a reference to this handler.
57 */ 69 */
58 void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) { 70 void SetHleHandler(HLEHandler hle_handler_) {
59 hle_handler = std::move(hle_handler_); 71 hle_handler = std::move(hle_handler_);
60 } 72 }
61 73
62 std::string name; ///< Name of port (optional) 74 /// Appends a ServerSession to the collection of ServerSessions
63 75 /// waiting to be accepted by this port.
64 /// ServerSessions waiting to be accepted by the port 76 void AppendPendingSession(SharedPtr<ServerSession> pending_session);
65 std::vector<SharedPtr<ServerSession>> pending_sessions;
66
67 /// This session's HLE request handler template (optional)
68 /// ServerSessions created from this port inherit a reference to this handler.
69 std::shared_ptr<SessionRequestHandler> hle_handler;
70 77
71 bool ShouldWait(Thread* thread) const override; 78 bool ShouldWait(Thread* thread) const override;
72 void Acquire(Thread* thread) override; 79 void Acquire(Thread* thread) override;
@@ -74,6 +81,16 @@ public:
74private: 81private:
75 explicit ServerPort(KernelCore& kernel); 82 explicit ServerPort(KernelCore& kernel);
76 ~ServerPort() override; 83 ~ServerPort() override;
84
85 /// ServerSessions waiting to be accepted by the port
86 std::vector<SharedPtr<ServerSession>> pending_sessions;
87
88 /// This session's HLE request handler template (optional)
89 /// ServerSessions created from this port inherit a reference to this handler.
90 HLEHandler hle_handler;
91
92 /// Name of the port (optional)
93 std::string name;
77}; 94};
78 95
79} // namespace Kernel 96} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..4d8a337a7 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 63 pending_requesting_threads.pop_back();
64} 64}
65 65
66void ServerSession::ClientDisconnected() {
67 // We keep a shared pointer to the hle handler to keep it alive throughout
68 // the call to ClientDisconnected, as ClientDisconnected invalidates the
69 // hle_handler member itself during the course of the function executing.
70 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
71 if (handler) {
72 // Note that after this returns, this server session's hle_handler is
73 // invalidated (set to null).
74 handler->ClientDisconnected(this);
75 }
76
77 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
78 // their WaitSynchronization result to 0xC920181A.
79
80 // Clean up the list of client threads with pending requests, they are unneeded now that the
81 // client endpoint is closed.
82 pending_requesting_threads.clear();
83 currently_handling = nullptr;
84}
85
86void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
87 domain_request_handlers.push_back(std::move(handler));
88}
89
90std::size_t ServerSession::NumDomainRequestHandlers() const {
91 return domain_request_handlers.size();
92}
93
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 94ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 95 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 96 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 97 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 98
71 99 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 100 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 101
74 switch (domain_message_header->command) { 102 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 103 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 104 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 105 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 106 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 107 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 108 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 109 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 110 "to {} needed to return a new interface!",
83 } 111 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 112 UNREACHABLE();
85 113 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 114 }
115 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
96 116
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 117 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
98 static_cast<int>(domain_message_header->command.Value())); 118 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
99 ASSERT(false); 119
120 domain_request_handlers[object_id - 1] = nullptr;
121
122 IPC::ResponseBuilder rb{context, 2};
123 rb.Push(RESULT_SUCCESS);
124 return RESULT_SUCCESS;
125 }
100 } 126 }
101 127
128 LOG_CRITICAL(IPC, "Unknown domain command={}",
129 static_cast<int>(domain_message_header.command.Value()));
130 ASSERT(false);
102 return RESULT_SUCCESS; 131 return RESULT_SUCCESS;
103} 132}
104 133
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..aea4ccfeb 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
46 return HANDLE_TYPE; 46 return HANDLE_TYPE;
47 } 47 }
48 48
49 Session* GetParent() {
50 return parent.get();
51 }
52
53 const Session* GetParent() const {
54 return parent.get();
55 }
56
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 57 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 58
51 /** 59 /**
@@ -78,23 +86,16 @@ public:
78 86
79 void Acquire(Thread* thread) override; 87 void Acquire(Thread* thread) override;
80 88
81 std::string name; ///< The name of this session (optional) 89 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 90 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 91
86 /// This is the list of domain request handlers (after conversion to a domain) 92 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 93 /// this ServerSession instance.
88 94 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
89 /// List of threads that are pending a response after a sync request. This list is processed in
90 /// a LIFO manner, thus, the last request will be dispatched first.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93 95
94 /// Thread whose request is currently being handled. A request is considered "handled" when a 96 /// Retrieves the total number of domain request handlers that have been
95 /// response is sent via svcReplyAndReceive. 97 /// appended to this ServerSession instance.
96 /// TODO(Subv): Find a better name for this. 98 std::size_t NumDomainRequestHandlers() const;
97 SharedPtr<Thread> currently_handling;
98 99
99 /// Returns true if the session has been converted to a domain, otherwise False 100 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 101 bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
129 /// object handle. 130 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 131 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 132
133 /// The parent session, which links to the client endpoint.
134 std::shared_ptr<Session> parent;
135
136 /// This session's HLE request handler (applicable when not a domain)
137 std::shared_ptr<SessionRequestHandler> hle_handler;
138
139 /// This is the list of domain request handlers (after conversion to a domain)
140 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
141
142 /// List of threads that are pending a response after a sync request. This list is processed in
143 /// a LIFO manner, thus, the last request will be dispatched first.
144 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
145 std::vector<SharedPtr<Thread>> pending_requesting_threads;
146
147 /// Thread whose request is currently being handled. A request is considered "handled" when a
148 /// response is sent via svcReplyAndReceive.
149 /// TODO(Subv): Find a better name for this.
150 SharedPtr<Thread> currently_handling;
151
132 /// When set to True, converts the session to a domain at the end of the command 152 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 153 bool convert_to_domain{};
154
155 /// The name of this session (optional)
156 std::string name;
134}; 157};
135 158
136} // namespace Kernel 159} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..62861da36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 33 shared_memory->backing_block_offset = 0;
35 34
36 // Refresh the address mappings for the current process. 35 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 36 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 37 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 38 shared_memory->backing_block.get());
40 } 39 }
41 } else { 40 } else {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c5d399bab..047fa0c19 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address; 48 return address + size > address;
48} 49}
49 50
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// 8 GiB 51// 8 GiB
68constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; 52constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
69 53
@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 return ERR_INVALID_ADDRESS_STATE; 89 return ERR_INVALID_ADDRESS_STATE;
106 } 90 }
107 91
108 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { 92 if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
109 LOG_ERROR(Kernel_SVC, 93 LOG_ERROR(Kernel_SVC,
110 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 94 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
111 src_addr, size); 95 src_addr, size);
112 return ERR_INVALID_ADDRESS_STATE; 96 return ERR_INVALID_ADDRESS_STATE;
113 } 97 }
114 98
115 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { 99 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
116 LOG_ERROR(Kernel_SVC, 100 LOG_ERROR(Kernel_SVC,
117 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 101 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
118 dst_addr, size); 102 dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
238 auto* const current_process = Core::CurrentProcess(); 222 auto* const current_process = Core::CurrentProcess();
239 auto& vm_manager = current_process->VMManager(); 223 auto& vm_manager = current_process->VMManager();
240 224
241 if (!IsInsideAddressSpace(vm_manager, addr, size)) { 225 if (!vm_manager.IsWithinAddressSpace(addr, size)) {
242 LOG_ERROR(Kernel_SVC, 226 LOG_ERROR(Kernel_SVC,
243 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 227 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
244 size); 228 size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
299 } 283 }
300 284
301 auto& vm_manager = Core::CurrentProcess()->VMManager(); 285 auto& vm_manager = Core::CurrentProcess()->VMManager();
302 if (!IsInsideAddressSpace(vm_manager, address, size)) { 286 if (!vm_manager.IsWithinAddressSpace(address, size)) {
303 LOG_ERROR(Kernel_SVC, 287 LOG_ERROR(Kernel_SVC,
304 "Given address (0x{:016X}) is outside the bounds of the address space.", address); 288 "Given address (0x{:016X}) is outside the bounds of the address space.", address);
305 return ERR_INVALID_ADDRESS_STATE; 289 return ERR_INVALID_ADDRESS_STATE;
@@ -1300,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) {
1300 1284
1301/// Called when a thread exits 1285/// Called when a thread exits
1302static void ExitThread() { 1286static void ExitThread() {
1303 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC()); 1287 auto& system = Core::System::GetInstance();
1304 1288
1305 ExitCurrentThread(); 1289 LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
1306 Core::System::GetInstance().PrepareReschedule(); 1290
1291 auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
1292 current_thread->Stop();
1293 system.CurrentScheduler().RemoveThread(current_thread);
1294 system.PrepareReschedule();
1307} 1295}
1308 1296
1309/// Sleep the current thread 1297/// Sleep the current thread
@@ -1316,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) {
1316 YieldAndWaitForLoadBalancing = -2, 1304 YieldAndWaitForLoadBalancing = -2,
1317 }; 1305 };
1318 1306
1307 auto& system = Core::System::GetInstance();
1308 auto& scheduler = system.CurrentScheduler();
1309 auto* const current_thread = scheduler.GetCurrentThread();
1310
1319 if (nanoseconds <= 0) { 1311 if (nanoseconds <= 0) {
1320 auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
1321 switch (static_cast<SleepType>(nanoseconds)) { 1312 switch (static_cast<SleepType>(nanoseconds)) {
1322 case SleepType::YieldWithoutLoadBalancing: 1313 case SleepType::YieldWithoutLoadBalancing:
1323 scheduler.YieldWithoutLoadBalancing(GetCurrentThread()); 1314 scheduler.YieldWithoutLoadBalancing(current_thread);
1324 break; 1315 break;
1325 case SleepType::YieldWithLoadBalancing: 1316 case SleepType::YieldWithLoadBalancing:
1326 scheduler.YieldWithLoadBalancing(GetCurrentThread()); 1317 scheduler.YieldWithLoadBalancing(current_thread);
1327 break; 1318 break;
1328 case SleepType::YieldAndWaitForLoadBalancing: 1319 case SleepType::YieldAndWaitForLoadBalancing:
1329 scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread()); 1320 scheduler.YieldAndWaitForLoadBalancing(current_thread);
1330 break; 1321 break;
1331 default: 1322 default:
1332 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); 1323 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
1333 } 1324 }
1334 } else { 1325 } else {
1335 // Sleep current thread and check for next thread to schedule 1326 current_thread->Sleep(nanoseconds);
1336 WaitCurrentThread_Sleep();
1337
1338 // Create an event to wake the thread up after the specified nanosecond delay has passed
1339 GetCurrentThread()->WakeAfterDelay(nanoseconds);
1340 } 1327 }
1341 1328
1342 // Reschedule all CPU cores 1329 // Reschedule all CPU cores
1343 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) 1330 for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
1344 Core::System::GetInstance().CpuCore(i).PrepareReschedule(); 1331 system.CpuCore(i).PrepareReschedule();
1332 }
1345} 1333}
1346 1334
1347/// Wait process wide key atomic 1335/// Wait process wide key atomic
@@ -1495,20 +1483,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
1495 return ERR_INVALID_ADDRESS; 1483 return ERR_INVALID_ADDRESS;
1496 } 1484 }
1497 1485
1498 switch (static_cast<AddressArbiter::ArbitrationType>(type)) { 1486 const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
1499 case AddressArbiter::ArbitrationType::WaitIfLessThan: 1487 auto& address_arbiter =
1500 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); 1488 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1501 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: 1489 return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
1502 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
1503 case AddressArbiter::ArbitrationType::WaitIfEqual:
1504 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
1505 default:
1506 LOG_ERROR(Kernel_SVC,
1507 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
1508 "or WaitIfEqual but got {}",
1509 type);
1510 return ERR_INVALID_ENUM_VALUE;
1511 }
1512} 1490}
1513 1491
1514// Signals to an address (via Address Arbiter) 1492// Signals to an address (via Address Arbiter)
@@ -1526,21 +1504,10 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
1526 return ERR_INVALID_ADDRESS; 1504 return ERR_INVALID_ADDRESS;
1527 } 1505 }
1528 1506
1529 switch (static_cast<AddressArbiter::SignalType>(type)) { 1507 const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
1530 case AddressArbiter::SignalType::Signal: 1508 auto& address_arbiter =
1531 return AddressArbiter::SignalToAddress(address, num_to_wake); 1509 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1532 case AddressArbiter::SignalType::IncrementAndSignalIfEqual: 1510 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
1533 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
1534 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
1535 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
1536 num_to_wake);
1537 default:
1538 LOG_ERROR(Kernel_SVC,
1539 "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
1540 "or ModifyByWaitingCountAndSignalIfEqual but got {}",
1541 type);
1542 return ERR_INVALID_ENUM_VALUE;
1543 }
1544} 1511}
1545 1512
1546/// This returns the total CPU ticks elapsed since the CPU was powered-on 1513/// This returns the total CPU ticks elapsed since the CPU was powered-on
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6661e2130..2e712c9cb 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -68,11 +68,6 @@ void Thread::Stop() {
68 owner_process->FreeTLSSlot(tls_address); 68 owner_process->FreeTLSSlot(tls_address);
69} 69}
70 70
71void WaitCurrentThread_Sleep() {
72 Thread* thread = GetCurrentThread();
73 thread->SetStatus(ThreadStatus::WaitSleep);
74}
75
76void ExitCurrentThread() { 71void ExitCurrentThread() {
77 Thread* thread = GetCurrentThread(); 72 Thread* thread = GetCurrentThread();
78 thread->Stop(); 73 thread->Stop();
@@ -184,8 +179,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
184 return ERR_INVALID_PROCESSOR_ID; 179 return ERR_INVALID_PROCESSOR_ID;
185 } 180 }
186 181
187 // TODO(yuriks): Other checks, returning 0xD9001BEA
188
189 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { 182 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
190 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 183 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
191 // TODO (bunnei): Find the correct error code to use here 184 // TODO (bunnei): Find the correct error code to use here
@@ -393,6 +386,14 @@ void Thread::SetActivity(ThreadActivity value) {
393 } 386 }
394} 387}
395 388
389void Thread::Sleep(s64 nanoseconds) {
390 // Sleep current thread and check for next thread to schedule
391 SetStatus(ThreadStatus::WaitSleep);
392
393 // Create an event to wake the thread up after the specified nanosecond delay has passed
394 WakeAfterDelay(nanoseconds);
395}
396
396//////////////////////////////////////////////////////////////////////////////////////////////////// 397////////////////////////////////////////////////////////////////////////////////////////////////////
397 398
398/** 399/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..ccdefeecc 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -383,6 +383,9 @@ public:
383 383
384 void SetActivity(ThreadActivity value); 384 void SetActivity(ThreadActivity value);
385 385
386 /// Sleeps this thread for the given amount of nanoseconds.
387 void Sleep(s64 nanoseconds);
388
386private: 389private:
387 explicit Thread(KernelCore& kernel); 390 explicit Thread(KernelCore& kernel);
388 ~Thread() override; 391 ~Thread() override;
@@ -460,14 +463,4 @@ private:
460 */ 463 */
461Thread* GetCurrentThread(); 464Thread* GetCurrentThread();
462 465
463/**
464 * Waits the current thread on a sleep
465 */
466void WaitCurrentThread_Sleep();
467
468/**
469 * Stops the current thread and removes it from the thread_list
470 */
471void ExitCurrentThread();
472
473} // namespace Kernel 466} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..05c59af34 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -17,8 +17,8 @@
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
20 20namespace {
21static const char* GetMemoryStateName(MemoryState state) { 21const char* GetMemoryStateName(MemoryState state) {
22 static constexpr const char* names[] = { 22 static constexpr const char* names[] = {
23 "Unmapped", "Io", 23 "Unmapped", "Io",
24 "Normal", "CodeStatic", 24 "Normal", "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
35 return names[ToSvcMemoryState(state)]; 35 return names[ToSvcMemoryState(state)];
36} 36}
37 37
38// Checks if a given address range lies within a larger address range.
39constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
40 VAddr address_range_end) {
41 const VAddr end_address = address + size - 1;
42 return address_range_begin <= address && end_address <= address_range_end - 1;
43}
44} // Anonymous namespace
45
38bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 46bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
39 ASSERT(base + size == next.base); 47 ASSERT(base + size == next.base);
40 if (permissions != next.permissions || state != next.state || attribute != next.attribute || 48 if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
249} 257}
250 258
251ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { 259ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
252 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 260 if (!IsWithinHeapRegion(target, size)) {
253 target + size < target) {
254 return ERR_INVALID_ADDRESS; 261 return ERR_INVALID_ADDRESS;
255 } 262 }
256 263
@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
285} 292}
286 293
287ResultCode VMManager::HeapFree(VAddr target, u64 size) { 294ResultCode VMManager::HeapFree(VAddr target, u64 size) {
288 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 295 if (!IsWithinHeapRegion(target, size)) {
289 target + size < target) {
290 return ERR_INVALID_ADDRESS; 296 return ERR_INVALID_ADDRESS;
291 } 297 }
292 298
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
706 return address_space_width; 712 return address_space_width;
707} 713}
708 714
715bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
716 return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
717 GetAddressSpaceEndAddress());
718}
719
709VAddr VMManager::GetASLRRegionBaseAddress() const { 720VAddr VMManager::GetASLRRegionBaseAddress() const {
710 return aslr_region_base; 721 return aslr_region_base;
711} 722}
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
750 return code_region_end - code_region_base; 761 return code_region_end - code_region_base;
751} 762}
752 763
764bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
765 return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
766 GetCodeRegionEndAddress());
767}
768
753VAddr VMManager::GetHeapRegionBaseAddress() const { 769VAddr VMManager::GetHeapRegionBaseAddress() const {
754 return heap_region_base; 770 return heap_region_base;
755} 771}
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
762 return heap_region_end - heap_region_base; 778 return heap_region_end - heap_region_base;
763} 779}
764 780
781bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
782 return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
783 GetHeapRegionEndAddress());
784}
785
765VAddr VMManager::GetMapRegionBaseAddress() const { 786VAddr VMManager::GetMapRegionBaseAddress() const {
766 return map_region_base; 787 return map_region_base;
767} 788}
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
774 return map_region_end - map_region_base; 795 return map_region_end - map_region_base;
775} 796}
776 797
798bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
799 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
800}
801
777VAddr VMManager::GetNewMapRegionBaseAddress() const { 802VAddr VMManager::GetNewMapRegionBaseAddress() const {
778 return new_map_region_base; 803 return new_map_region_base;
779} 804}
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
786 return new_map_region_end - new_map_region_base; 811 return new_map_region_end - new_map_region_base;
787} 812}
788 813
814bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
815 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
816 GetNewMapRegionEndAddress());
817}
818
789VAddr VMManager::GetTLSIORegionBaseAddress() const { 819VAddr VMManager::GetTLSIORegionBaseAddress() const {
790 return tls_io_region_base; 820 return tls_io_region_base;
791} 821}
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
798 return tls_io_region_end - tls_io_region_base; 828 return tls_io_region_end - tls_io_region_base;
799} 829}
800 830
831bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
832 return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
833 GetTLSIORegionEndAddress());
834}
835
801} // namespace Kernel 836} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..88e0b3c02 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -432,18 +432,21 @@ public:
432 /// Gets the address space width in bits. 432 /// Gets the address space width in bits.
433 u64 GetAddressSpaceWidth() const; 433 u64 GetAddressSpaceWidth() const;
434 434
435 /// Determines whether or not the given address range lies within the address space.
436 bool IsWithinAddressSpace(VAddr address, u64 size) const;
437
435 /// Gets the base address of the ASLR region. 438 /// Gets the base address of the ASLR region.
436 VAddr GetASLRRegionBaseAddress() const; 439 VAddr GetASLRRegionBaseAddress() const;
437 440
438 /// Gets the end address of the ASLR region. 441 /// Gets the end address of the ASLR region.
439 VAddr GetASLRRegionEndAddress() const; 442 VAddr GetASLRRegionEndAddress() const;
440 443
441 /// Determines whether or not the specified address range is within the ASLR region.
442 bool IsWithinASLRRegion(VAddr address, u64 size) const;
443
444 /// Gets the size of the ASLR region 444 /// Gets the size of the ASLR region
445 u64 GetASLRRegionSize() const; 445 u64 GetASLRRegionSize() const;
446 446
447 /// Determines whether or not the specified address range is within the ASLR region.
448 bool IsWithinASLRRegion(VAddr address, u64 size) const;
449
447 /// Gets the base address of the code region. 450 /// Gets the base address of the code region.
448 VAddr GetCodeRegionBaseAddress() const; 451 VAddr GetCodeRegionBaseAddress() const;
449 452
@@ -453,6 +456,9 @@ public:
453 /// Gets the total size of the code region in bytes. 456 /// Gets the total size of the code region in bytes.
454 u64 GetCodeRegionSize() const; 457 u64 GetCodeRegionSize() const;
455 458
459 /// Determines whether or not the specified range is within the code region.
460 bool IsWithinCodeRegion(VAddr address, u64 size) const;
461
456 /// Gets the base address of the heap region. 462 /// Gets the base address of the heap region.
457 VAddr GetHeapRegionBaseAddress() const; 463 VAddr GetHeapRegionBaseAddress() const;
458 464
@@ -462,6 +468,9 @@ public:
462 /// Gets the total size of the heap region in bytes. 468 /// Gets the total size of the heap region in bytes.
463 u64 GetHeapRegionSize() const; 469 u64 GetHeapRegionSize() const;
464 470
471 /// Determines whether or not the specified range is within the heap region.
472 bool IsWithinHeapRegion(VAddr address, u64 size) const;
473
465 /// Gets the base address of the map region. 474 /// Gets the base address of the map region.
466 VAddr GetMapRegionBaseAddress() const; 475 VAddr GetMapRegionBaseAddress() const;
467 476
@@ -471,6 +480,9 @@ public:
471 /// Gets the total size of the map region in bytes. 480 /// Gets the total size of the map region in bytes.
472 u64 GetMapRegionSize() const; 481 u64 GetMapRegionSize() const;
473 482
483 /// Determines whether or not the specified range is within the map region.
484 bool IsWithinMapRegion(VAddr address, u64 size) const;
485
474 /// Gets the base address of the new map region. 486 /// Gets the base address of the new map region.
475 VAddr GetNewMapRegionBaseAddress() const; 487 VAddr GetNewMapRegionBaseAddress() const;
476 488
@@ -480,6 +492,9 @@ public:
480 /// Gets the total size of the new map region in bytes. 492 /// Gets the total size of the new map region in bytes.
481 u64 GetNewMapRegionSize() const; 493 u64 GetNewMapRegionSize() const;
482 494
495 /// Determines whether or not the given address range is within the new map region
496 bool IsWithinNewMapRegion(VAddr address, u64 size) const;
497
483 /// Gets the base address of the TLS IO region. 498 /// Gets the base address of the TLS IO region.
484 VAddr GetTLSIORegionBaseAddress() const; 499 VAddr GetTLSIORegionBaseAddress() const;
485 500
@@ -489,6 +504,9 @@ public:
489 /// Gets the total size of the TLS IO region in bytes. 504 /// Gets the total size of the TLS IO region in bytes.
490 u64 GetTLSIORegionSize() const; 505 u64 GetTLSIORegionSize() const;
491 506
507 /// Determines if the given address range is within the TLS IO region.
508 bool IsWithinTLSIORegion(VAddr address, u64 size) const;
509
492 /// Each VMManager has its own page table, which is set as the main one when the owning process 510 /// Each VMManager has its own page table, which is set as the main one when the owning process
493 /// is scheduled. 511 /// is scheduled.
494 Memory::PageTable page_table; 512 Memory::PageTable page_table;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..ab84f5ddc 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,20 +8,11 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
15 14
16/** 15/**
17 * Detailed description of the error. Code 0 always means success.
18 */
19enum class ErrorDescription : u32 {
20 Success = 0,
21 RemoteProcessDead = 301,
22};
23
24/**
25 * Identifies the module which caused the error. Error codes can be propagated through a call 16 * Identifies the module which caused the error. Error codes can be propagated through a call
26 * chain, meaning that this doesn't always correspond to the module where the API call made is 17 * chain, meaning that this doesn't always correspond to the module where the API call made is
27 * contained. 18 * contained.
@@ -121,7 +112,7 @@ enum class ErrorModule : u32 {
121 ShopN = 811, 112 ShopN = 811,
122}; 113};
123 114
124/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields. 115/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
125union ResultCode { 116union ResultCode {
126 u32 raw; 117 u32 raw;
127 118
@@ -134,17 +125,9 @@ union ResultCode {
134 125
135 constexpr explicit ResultCode(u32 raw) : raw(raw) {} 126 constexpr explicit ResultCode(u32 raw) : raw(raw) {}
136 127
137 constexpr ResultCode(ErrorModule module, ErrorDescription description)
138 : ResultCode(module, static_cast<u32>(description)) {}
139
140 constexpr ResultCode(ErrorModule module_, u32 description_) 128 constexpr ResultCode(ErrorModule module_, u32 description_)
141 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {} 129 : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}
142 130
143 constexpr ResultCode& operator=(const ResultCode& o) {
144 raw = o.raw;
145 return *this;
146 }
147
148 constexpr bool IsSuccess() const { 131 constexpr bool IsSuccess() const {
149 return raw == 0; 132 return raw == 0;
150 } 133 }
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 6831c0735..21f5e64c7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
18#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
20#include "core/hle/service/audio/audout_u.h" 20#include "core/hle/service/audio/audout_u.h"
21#include "core/hle/service/audio/errors.h"
21#include "core/memory.h" 22#include "core/memory.h"
22 23
23namespace Service::Audio { 24namespace Service::Audio {
24 25
25namespace ErrCodes {
26enum {
27 ErrorUnknown = 2,
28 BufferCountExceeded = 8,
29};
30}
31
32constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; 26constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
33constexpr int DefaultSampleRate{48000}; 27constexpr int DefaultSampleRate{48000};
34 28
@@ -100,7 +94,7 @@ private:
100 94
101 if (stream->IsPlaying()) { 95 if (stream->IsPlaying()) {
102 IPC::ResponseBuilder rb{ctx, 2}; 96 IPC::ResponseBuilder rb{ctx, 2};
103 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); 97 rb.Push(ERR_OPERATION_FAILED);
104 return; 98 return;
105 } 99 }
106 100
@@ -113,7 +107,9 @@ private:
113 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
114 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
115 109
116 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
117 113
118 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
143 139
144 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { 140 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
145 IPC::ResponseBuilder rb{ctx, 2}; 141 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); 142 rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
143 return;
147 } 144 }
148 145
149 IPC::ResponseBuilder rb{ctx, 2}; 146 IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7e0cc64a8..c9de10a24 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
17#include "core/hle/kernel/readable_event.h" 17#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/audio/audren_u.h" 19#include "core/hle/service/audio/audren_u.h"
20#include "core/hle/service/audio/errors.h"
20 21
21namespace Service::Audio { 22namespace Service::Audio {
22 23
@@ -37,7 +38,7 @@ public:
37 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, 38 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
38 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, 39 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
39 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, 40 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
40 {11, nullptr, "ExecuteAudioRendererRendering"}, 41 {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
41 }; 42 };
42 // clang-format on 43 // clang-format on
43 RegisterHandlers(functions); 44 RegisterHandlers(functions);
@@ -138,6 +139,17 @@ private:
138 rb.Push(rendering_time_limit_percent); 139 rb.Push(rendering_time_limit_percent);
139 } 140 }
140 141
142 void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
143 LOG_DEBUG(Service_Audio, "called");
144
145 // This service command currently only reports an unsupported operation
146 // error code, or aborts. Given that, we just always return an error
147 // code in this case.
148
149 IPC::ResponseBuilder rb{ctx, 2};
150 rb.Push(ERR_NOT_SUPPORTED);
151 }
152
141 Kernel::EventPair system_event; 153 Kernel::EventPair system_event;
142 std::unique_ptr<AudioCore::AudioRenderer> renderer; 154 std::unique_ptr<AudioCore::AudioRenderer> renderer;
143 u32 rendering_time_limit_percent = 100; 155 u32 rendering_time_limit_percent = 100;
@@ -235,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
235 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 247 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
236 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, 248 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
237 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, 249 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
238 {3, nullptr, "OpenAudioRendererAuto"}, 250 {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
239 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, 251 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
240 }; 252 };
241 // clang-format on 253 // clang-format on
@@ -248,12 +260,7 @@ AudRenU::~AudRenU() = default;
248void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { 260void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
249 LOG_DEBUG(Service_Audio, "called"); 261 LOG_DEBUG(Service_Audio, "called");
250 262
251 IPC::RequestParser rp{ctx}; 263 OpenAudioRendererImpl(ctx);
252 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
253 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
254
255 rb.Push(RESULT_SUCCESS);
256 rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
257} 264}
258 265
259void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 266void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -262,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
262 LOG_DEBUG(Service_Audio, "called"); 269 LOG_DEBUG(Service_Audio, "called");
263 270
264 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); 271 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
265 buffer_sz += params.unknown_c * 1024; 272 buffer_sz += params.submix_count * 1024;
266 buffer_sz += 0x940 * (params.unknown_c + 1); 273 buffer_sz += 0x940 * (params.submix_count + 1);
267 buffer_sz += 0x3F0 * params.voice_count; 274 buffer_sz += 0x3F0 * params.voice_count;
268 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); 275 buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
269 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 276 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
270 buffer_sz += 277 buffer_sz += Common::AlignUp(
271 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 278 (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
272 (params.mix_buffer_count + 6), 279 (params.mix_buffer_count + 6),
273 0x40); 280 0x40);
274 281
275 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 282 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
276 u32 count = params.unknown_c + 1; 283 const u32 count = params.submix_count + 1;
277 u64 node_count = Common::AlignUp(count, 0x40); 284 u64 node_count = Common::AlignUp(count, 0x40);
278 u64 node_state_buffer_sz = 285 const u64 node_state_buffer_sz =
279 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 286 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
280 u64 edge_matrix_buffer_sz = 0; 287 u64 edge_matrix_buffer_sz = 0;
281 node_count = Common::AlignUp(count * count, 0x40); 288 node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
289 296
290 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 297 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
291 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 298 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
292 buffer_sz += 0xE0 * params.unknown_2c; 299 buffer_sz += 0xE0 * params.num_splitter_send_channels;
293 buffer_sz += 0x20 * params.splitter_count; 300 buffer_sz += 0x20 * params.splitter_count;
294 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); 301 buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
295 } 302 }
296 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 303 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
297 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 304 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
298 ((params.voice_count * 256) | 0x40); 305 ((params.voice_count * 256) | 0x40);
299 306
300 if (params.unknown_1c >= 1) { 307 if (params.performance_frame_count >= 1) {
301 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 308 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
302 16 * params.voice_count + 16) + 309 16 * params.voice_count + 16) +
303 0x658) * 310 0x658) *
304 (params.unknown_1c + 1) + 311 (params.performance_frame_count + 1) +
305 0xc0, 312 0xc0,
306 0x40) + 313 0x40) +
307 output_sz; 314 output_sz;
@@ -325,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
325 rb.PushIpcInterface<Audio::IAudioDevice>(); 332 rb.PushIpcInterface<Audio::IAudioDevice>();
326} 333}
327 334
335void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
336 LOG_DEBUG(Service_Audio, "called");
337
338 OpenAudioRendererImpl(ctx);
339}
340
328void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 341void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
329 LOG_WARNING(Service_Audio, "(STUBBED) called"); 342 LOG_WARNING(Service_Audio, "(STUBBED) called");
330 343
@@ -335,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
335 // based on the current revision 348 // based on the current revision
336} 349}
337 350
351void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
352 IPC::RequestParser rp{ctx};
353 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
354 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
355
356 rb.Push(RESULT_SUCCESS);
357 rb.PushIpcInterface<IAudioRenderer>(params);
358}
359
338bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 360bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
339 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 361 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
340 switch (feature) { 362 switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 3d63388fb..e55d25973 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,8 +21,11 @@ private:
21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx); 21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx); 23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
24 void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
24 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); 25 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
25 26
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28
26 enum class AudioFeatures : u32 { 29 enum class AudioFeatures : u32 {
27 Splitter, 30 Splitter,
28 }; 31 };
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Audio {
10
11constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
12constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
13constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
14
15} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 11eba4a12..377e12cfa 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -9,43 +9,32 @@
9 9
10#include <opus.h> 10#include <opus.h>
11 11
12#include "common/common_funcs.h" 12#include "common/assert.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "core/hle/ipc_helpers.h" 14#include "core/hle/ipc_helpers.h"
15#include "core/hle/kernel/hle_ipc.h" 15#include "core/hle/kernel/hle_ipc.h"
16#include "core/hle/service/audio/hwopus.h" 16#include "core/hle/service/audio/hwopus.h"
17 17
18namespace Service::Audio { 18namespace Service::Audio {
19 19namespace {
20struct OpusDeleter { 20struct OpusDeleter {
21 void operator()(void* ptr) const { 21 void operator()(void* ptr) const {
22 operator delete(ptr); 22 operator delete(ptr);
23 } 23 }
24}; 24};
25 25
26class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 26using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
27public:
28 IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
29 u32 channel_count)
30 : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
31 sample_rate(sample_rate), channel_count(channel_count) {
32 // clang-format off
33 static const FunctionInfo functions[] = {
34 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
35 {1, nullptr, "SetContext"},
36 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
37 {3, nullptr, "SetContextForMultiStream"},
38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
39 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
40 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
41 {7, nullptr, "DecodeInterleavedForMultiStream"},
42 };
43 // clang-format on
44 27
45 RegisterHandlers(functions); 28struct OpusPacketHeader {
46 } 29 // Packet size in bytes.
30 u32_be size;
31 // Indicates the final range of the codec's entropy coder.
32 u32_be final_range;
33};
34static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
47 35
48private: 36class OpusDecoderStateBase {
37public:
49 /// Describes extra behavior that may be asked of the decoding context. 38 /// Describes extra behavior that may be asked of the decoding context.
50 enum class ExtraBehavior { 39 enum class ExtraBehavior {
51 /// No extra behavior. 40 /// No extra behavior.
@@ -55,30 +44,36 @@ private:
55 ResetContext, 44 ResetContext,
56 }; 45 };
57 46
58 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { 47 enum class PerfTime {
59 LOG_DEBUG(Audio, "called"); 48 Disabled,
60 49 Enabled,
61 DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); 50 };
62 }
63
64 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
65 LOG_DEBUG(Audio, "called");
66 51
67 u64 performance = 0; 52 virtual ~OpusDecoderStateBase() = default;
68 DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
69 }
70 53
71 void DecodeInterleaved(Kernel::HLERequestContext& ctx) { 54 // Decodes interleaved Opus packets. Optionally allows reporting time taken to
72 LOG_DEBUG(Audio, "called"); 55 // perform the decoding, as well as any relevant extra behavior.
73 56 virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
74 IPC::RequestParser rp{ctx}; 57 ExtraBehavior extra_behavior) = 0;
75 const auto extra_behavior = 58};
76 rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
77 59
78 u64 performance = 0; 60// Represents the decoder state for a non-multistream decoder.
79 DecodeInterleavedHelper(ctx, &performance, extra_behavior); 61class OpusDecoderState final : public OpusDecoderStateBase {
62public:
63 explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
64 : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
65
66 void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
67 ExtraBehavior extra_behavior) override {
68 if (perf_time == PerfTime::Disabled) {
69 DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
70 } else {
71 u64 performance = 0;
72 DecodeInterleavedHelper(ctx, &performance, extra_behavior);
73 }
80 } 74 }
81 75
76private:
82 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, 77 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
83 ExtraBehavior extra_behavior) { 78 ExtraBehavior extra_behavior) {
84 u32 consumed = 0; 79 u32 consumed = 0;
@@ -89,8 +84,7 @@ private:
89 ResetDecoderContext(); 84 ResetDecoderContext();
90 } 85 }
91 86
92 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, 87 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
93 performance)) {
94 LOG_ERROR(Audio, "Failed to decode opus data"); 88 LOG_ERROR(Audio, "Failed to decode opus data");
95 IPC::ResponseBuilder rb{ctx, 2}; 89 IPC::ResponseBuilder rb{ctx, 2};
96 // TODO(ogniK): Use correct error code 90 // TODO(ogniK): Use correct error code
@@ -109,27 +103,27 @@ private:
109 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 103 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
110 } 104 }
111 105
112 bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, 106 bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
113 std::vector<opus_int16>& output, u64* out_performance_time) { 107 std::vector<opus_int16>& output, u64* out_performance_time) const {
114 const auto start_time = std::chrono::high_resolution_clock::now(); 108 const auto start_time = std::chrono::high_resolution_clock::now();
115 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 109 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
116 if (sizeof(OpusHeader) > input.size()) { 110 if (sizeof(OpusPacketHeader) > input.size()) {
117 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", 111 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
118 sizeof(OpusHeader), input.size()); 112 sizeof(OpusPacketHeader), input.size());
119 return false; 113 return false;
120 } 114 }
121 115
122 OpusHeader hdr{}; 116 OpusPacketHeader hdr{};
123 std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); 117 std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
124 if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { 118 if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
125 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", 119 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
126 sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); 120 sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
127 return false; 121 return false;
128 } 122 }
129 123
130 const auto frame = input.data() + sizeof(OpusHeader); 124 const auto frame = input.data() + sizeof(OpusPacketHeader);
131 const auto decoded_sample_count = opus_packet_get_nb_samples( 125 const auto decoded_sample_count = opus_packet_get_nb_samples(
132 frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), 126 frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
133 static_cast<opus_int32>(sample_rate)); 127 static_cast<opus_int32>(sample_rate));
134 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { 128 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
135 LOG_ERROR( 129 LOG_ERROR(
@@ -141,18 +135,18 @@ private:
141 135
142 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); 136 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
143 const auto out_sample_count = 137 const auto out_sample_count =
144 opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); 138 opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
145 if (out_sample_count < 0) { 139 if (out_sample_count < 0) {
146 LOG_ERROR(Audio, 140 LOG_ERROR(Audio,
147 "Incorrect sample count received from opus_decode, " 141 "Incorrect sample count received from opus_decode, "
148 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", 142 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
149 out_sample_count, frame_size, static_cast<u32>(hdr.sz)); 143 out_sample_count, frame_size, static_cast<u32>(hdr.size));
150 return false; 144 return false;
151 } 145 }
152 146
153 const auto end_time = std::chrono::high_resolution_clock::now() - start_time; 147 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
154 sample_count = out_sample_count; 148 sample_count = out_sample_count;
155 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 149 consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
156 if (out_performance_time != nullptr) { 150 if (out_performance_time != nullptr) {
157 *out_performance_time = 151 *out_performance_time =
158 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); 152 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -167,21 +161,66 @@ private:
167 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); 161 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
168 } 162 }
169 163
170 struct OpusHeader { 164 OpusDecoderPtr decoder;
171 u32_be sz; // Needs to be BE for some odd reason
172 INSERT_PADDING_WORDS(1);
173 };
174 static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
175
176 std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
177 u32 sample_rate; 165 u32 sample_rate;
178 u32 channel_count; 166 u32 channel_count;
179}; 167};
180 168
181static std::size_t WorkerBufferSize(u32 channel_count) { 169class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
170public:
171 explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
172 : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
173 // clang-format off
174 static const FunctionInfo functions[] = {
175 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
176 {1, nullptr, "SetContext"},
177 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
178 {3, nullptr, "SetContextForMultiStream"},
179 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
180 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
181 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
182 {7, nullptr, "DecodeInterleavedForMultiStream"},
183 };
184 // clang-format on
185
186 RegisterHandlers(functions);
187 }
188
189private:
190 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
191 LOG_DEBUG(Audio, "called");
192
193 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
194 OpusDecoderStateBase::ExtraBehavior::None);
195 }
196
197 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
198 LOG_DEBUG(Audio, "called");
199
200 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
201 OpusDecoderStateBase::ExtraBehavior::None);
202 }
203
204 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
205 LOG_DEBUG(Audio, "called");
206
207 IPC::RequestParser rp{ctx};
208 const auto extra_behavior = rp.Pop<bool>()
209 ? OpusDecoderStateBase::ExtraBehavior::ResetContext
210 : OpusDecoderStateBase::ExtraBehavior::None;
211
212 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
213 extra_behavior);
214 }
215
216 std::unique_ptr<OpusDecoderStateBase> decoder_state;
217};
218
219std::size_t WorkerBufferSize(u32 channel_count) {
182 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 220 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
183 return opus_decoder_get_size(static_cast<int>(channel_count)); 221 return opus_decoder_get_size(static_cast<int>(channel_count));
184} 222}
223} // Anonymous namespace
185 224
186void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { 225void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
187 IPC::RequestParser rp{ctx}; 226 IPC::RequestParser rp{ctx};
@@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
220 const std::size_t worker_sz = WorkerBufferSize(channel_count); 259 const std::size_t worker_sz = WorkerBufferSize(channel_count);
221 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); 260 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
222 261
223 std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ 262 OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
224 static_cast<OpusDecoder*>(operator new(worker_sz))};
225 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { 263 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
226 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); 264 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
227 IPC::ResponseBuilder rb{ctx, 2}; 265 IPC::ResponseBuilder rb{ctx, 2};
@@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
232 270
233 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 271 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
234 rb.Push(RESULT_SUCCESS); 272 rb.Push(RESULT_SUCCESS);
235 rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, 273 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
236 channel_count); 274 std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
237} 275}
238 276
239HwOpus::HwOpus() : ServiceFramework("hwopus") { 277HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 6d897c842..7cc58db4c 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -15,7 +15,7 @@ namespace Kernel {
15class SharedMemory; 15class SharedMemory;
16} 16}
17 17
18namespace SM { 18namespace Service::SM {
19class ServiceManager; 19class ServiceManager;
20} 20}
21 21
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 21ccfe1f8..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
23 23
24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, 24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, 25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
26 const MathUtil::Rectangle<int>& crop_rect) { 26 const Common::Rectangle<int>& crop_rect) {
27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); 27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
28 LOG_TRACE(Service, 28 LOG_TRACE(Service,
29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..ace71169f 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 25 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
27 NVFlinger::BufferQueue::BufferTransformFlags transform, 27 NVFlinger::BufferQueue::BufferTransformFlags transform,
28 const MathUtil::Rectangle<int>& crop_rect); 28 const Common::Rectangle<int>& crop_rect);
29 29
30private: 30private:
31 std::shared_ptr<nvmap> nvmap_dev; 31 std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..b031ebc66 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
12#include "core/hle/service/nvdrv/devices/nvmap.h" 12#include "core/hle/service/nvdrv/devices/nvmap.h"
13#include "core/memory.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
@@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
178 auto& gpu = system_instance.GPU(); 179 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); 180 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr); 181 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); 182 gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
182 183
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 184 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184 185
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 63}
64 64
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const MathUtil::Rectangle<int>& crop_rect) { 66 const Common::Rectangle<int>& crop_rect) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 67 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 68 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 69 ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index ab90d591e..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -67,14 +67,14 @@ public:
67 Status status = Status::Free; 67 Status status = Status::Free;
68 IGBPBuffer igbp_buffer; 68 IGBPBuffer igbp_buffer;
69 BufferTransformFlags transform; 69 BufferTransformFlags transform;
70 MathUtil::Rectangle<int> crop_rect; 70 Common::Rectangle<int> crop_rect;
71 }; 71 };
72 72
73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
74 std::optional<u32> DequeueBuffer(u32 width, u32 height); 74 std::optional<u32> DequeueBuffer(u32 width, u32 height);
75 const IGBPBuffer& RequestBuffer(u32 slot) const; 75 const IGBPBuffer& RequestBuffer(u32 slot) const;
76 void QueueBuffer(u32 slot, BufferTransformFlags transform, 76 void QueueBuffer(u32 slot, BufferTransformFlags transform,
77 const MathUtil::Rectangle<int>& crop_rect); 77 const Common::Rectangle<int>& crop_rect);
78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
79 void ReleaseBuffer(u32 slot); 79 void ReleaseBuffer(u32 slot);
80 u32 Query(QueryType type); 80 u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 3babc3f7c..fc496b654 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -14,11 +14,12 @@
14#include "core/core_timing_util.h" 14#include "core/core_timing_util.h"
15#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/readable_event.h" 16#include "core/hle/kernel/readable_event.h"
17#include "core/hle/kernel/writable_event.h"
18#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 17#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
19#include "core/hle/service/nvdrv/nvdrv.h" 18#include "core/hle/service/nvdrv/nvdrv.h"
20#include "core/hle/service/nvflinger/buffer_queue.h" 19#include "core/hle/service/nvflinger/buffer_queue.h"
21#include "core/hle/service/nvflinger/nvflinger.h" 20#include "core/hle/service/nvflinger/nvflinger.h"
21#include "core/hle/service/vi/display/vi_display.h"
22#include "core/hle/service/vi/layer/vi_layer.h"
22#include "core/perf_stats.h" 23#include "core/perf_stats.h"
23#include "video_core/renderer_base.h" 24#include "video_core/renderer_base.h"
24 25
@@ -28,6 +29,12 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60;
28constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); 29constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
29 30
30NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} { 31NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
32 displays.emplace_back(0, "Default");
33 displays.emplace_back(1, "External");
34 displays.emplace_back(2, "Edid");
35 displays.emplace_back(3, "Internal");
36 displays.emplace_back(4, "Null");
37
31 // Schedule the screen composition events 38 // Schedule the screen composition events
32 composition_event = 39 composition_event =
33 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { 40 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -52,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
52 // TODO(Subv): Currently we only support the Default display. 59 // TODO(Subv): Currently we only support the Default display.
53 ASSERT(name == "Default"); 60 ASSERT(name == "Default");
54 61
55 const auto itr = std::find_if(displays.begin(), displays.end(), 62 const auto itr =
56 [&](const Display& display) { return display.name == name; }); 63 std::find_if(displays.begin(), displays.end(),
64 [&](const VI::Display& display) { return display.GetName() == name; });
57 if (itr == displays.end()) { 65 if (itr == displays.end()) {
58 return {}; 66 return {};
59 } 67 }
60 68
61 return itr->id; 69 return itr->GetID();
62} 70}
63 71
64std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { 72std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -68,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
68 return {}; 76 return {};
69 } 77 }
70 78
71 ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
72
73 const u64 layer_id = next_layer_id++; 79 const u64 layer_id = next_layer_id++;
74 const u32 buffer_queue_id = next_buffer_queue_id++; 80 const u32 buffer_queue_id = next_buffer_queue_id++;
75 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); 81 buffer_queues.emplace_back(buffer_queue_id, layer_id);
76 display->layers.emplace_back(layer_id, buffer_queue); 82 display->CreateLayer(layer_id, buffer_queues.back());
77 buffer_queues.emplace_back(std::move(buffer_queue));
78 return layer_id; 83 return layer_id;
79} 84}
80 85
@@ -85,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
85 return {}; 90 return {};
86 } 91 }
87 92
88 return layer->buffer_queue->GetId(); 93 return layer->GetBufferQueue().GetId();
89} 94}
90 95
91Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { 96Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -95,20 +100,29 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
95 return nullptr; 100 return nullptr;
96 } 101 }
97 102
98 return display->vsync_event.readable; 103 return display->GetVSyncEvent();
104}
105
106BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
107 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
108 [id](const auto& queue) { return queue.GetId() == id; });
109
110 ASSERT(itr != buffer_queues.end());
111 return *itr;
99} 112}
100 113
101std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { 114const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
102 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 115 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
103 [&](const auto& queue) { return queue->GetId() == id; }); 116 [id](const auto& queue) { return queue.GetId() == id; });
104 117
105 ASSERT(itr != buffer_queues.end()); 118 ASSERT(itr != buffer_queues.end());
106 return *itr; 119 return *itr;
107} 120}
108 121
109Display* NVFlinger::FindDisplay(u64 display_id) { 122VI::Display* NVFlinger::FindDisplay(u64 display_id) {
110 const auto itr = std::find_if(displays.begin(), displays.end(), 123 const auto itr =
111 [&](const Display& display) { return display.id == display_id; }); 124 std::find_if(displays.begin(), displays.end(),
125 [&](const VI::Display& display) { return display.GetID() == display_id; });
112 126
113 if (itr == displays.end()) { 127 if (itr == displays.end()) {
114 return nullptr; 128 return nullptr;
@@ -117,9 +131,10 @@ Display* NVFlinger::FindDisplay(u64 display_id) {
117 return &*itr; 131 return &*itr;
118} 132}
119 133
120const Display* NVFlinger::FindDisplay(u64 display_id) const { 134const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
121 const auto itr = std::find_if(displays.begin(), displays.end(), 135 const auto itr =
122 [&](const Display& display) { return display.id == display_id; }); 136 std::find_if(displays.begin(), displays.end(),
137 [&](const VI::Display& display) { return display.GetID() == display_id; });
123 138
124 if (itr == displays.end()) { 139 if (itr == displays.end()) {
125 return nullptr; 140 return nullptr;
@@ -128,57 +143,41 @@ const Display* NVFlinger::FindDisplay(u64 display_id) const {
128 return &*itr; 143 return &*itr;
129} 144}
130 145
131Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) { 146VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
132 auto* const display = FindDisplay(display_id); 147 auto* const display = FindDisplay(display_id);
133 148
134 if (display == nullptr) { 149 if (display == nullptr) {
135 return nullptr; 150 return nullptr;
136 } 151 }
137 152
138 const auto itr = std::find_if(display->layers.begin(), display->layers.end(), 153 return display->FindLayer(layer_id);
139 [&](const Layer& layer) { return layer.id == layer_id; });
140
141 if (itr == display->layers.end()) {
142 return nullptr;
143 }
144
145 return &*itr;
146} 154}
147 155
148const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { 156const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
149 const auto* const display = FindDisplay(display_id); 157 const auto* const display = FindDisplay(display_id);
150 158
151 if (display == nullptr) { 159 if (display == nullptr) {
152 return nullptr; 160 return nullptr;
153 } 161 }
154 162
155 const auto itr = std::find_if(display->layers.begin(), display->layers.end(), 163 return display->FindLayer(layer_id);
156 [&](const Layer& layer) { return layer.id == layer_id; });
157
158 if (itr == display->layers.end()) {
159 return nullptr;
160 }
161
162 return &*itr;
163} 164}
164 165
165void NVFlinger::Compose() { 166void NVFlinger::Compose() {
166 for (auto& display : displays) { 167 for (auto& display : displays) {
167 // Trigger vsync for this display at the end of drawing 168 // Trigger vsync for this display at the end of drawing
168 SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); 169 SCOPE_EXIT({ display.SignalVSyncEvent(); });
169 170
170 // Don't do anything for displays without layers. 171 // Don't do anything for displays without layers.
171 if (display.layers.empty()) 172 if (!display.HasLayers())
172 continue; 173 continue;
173 174
174 // TODO(Subv): Support more than 1 layer. 175 // TODO(Subv): Support more than 1 layer.
175 ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); 176 VI::Layer& layer = display.GetLayer(0);
176 177 auto& buffer_queue = layer.GetBufferQueue();
177 Layer& layer = display.layers[0];
178 auto& buffer_queue = layer.buffer_queue;
179 178
180 // Search for a queued buffer and acquire it 179 // Search for a queued buffer and acquire it
181 auto buffer = buffer_queue->AcquireBuffer(); 180 auto buffer = buffer_queue.AcquireBuffer();
182 181
183 MicroProfileFlip(); 182 MicroProfileFlip();
184 183
@@ -187,7 +186,7 @@ void NVFlinger::Compose() {
187 186
188 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
189 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
190 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
191 continue; 190 continue;
192 } 191 }
193 192
@@ -203,19 +202,8 @@ void NVFlinger::Compose() {
203 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 202 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
204 buffer->get().transform, buffer->get().crop_rect); 203 buffer->get().transform, buffer->get().crop_rect);
205 204
206 buffer_queue->ReleaseBuffer(buffer->get().slot); 205 buffer_queue.ReleaseBuffer(buffer->get().slot);
207 } 206 }
208} 207}
209 208
210Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
211Layer::~Layer() = default;
212
213Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
214 auto& kernel = Core::System::GetInstance().Kernel();
215 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
216 fmt::format("Display VSync Event {}", id));
217}
218
219Display::~Display() = default;
220
221} // namespace Service::NVFlinger 209} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 437aa592d..c0a83fffb 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <memory> 7#include <memory>
9#include <optional> 8#include <optional>
10#include <string> 9#include <string>
@@ -26,31 +25,17 @@ class WritableEvent;
26 25
27namespace Service::Nvidia { 26namespace Service::Nvidia {
28class Module; 27class Module;
29} 28} // namespace Service::Nvidia
29
30namespace Service::VI {
31class Display;
32class Layer;
33} // namespace Service::VI
30 34
31namespace Service::NVFlinger { 35namespace Service::NVFlinger {
32 36
33class BufferQueue; 37class BufferQueue;
34 38
35struct Layer {
36 Layer(u64 id, std::shared_ptr<BufferQueue> queue);
37 ~Layer();
38
39 u64 id;
40 std::shared_ptr<BufferQueue> buffer_queue;
41};
42
43struct Display {
44 Display(u64 id, std::string name);
45 ~Display();
46
47 u64 id;
48 std::string name;
49
50 std::vector<Layer> layers;
51 Kernel::EventPair vsync_event;
52};
53
54class NVFlinger final { 39class NVFlinger final {
55public: 40public:
56 explicit NVFlinger(Core::Timing::CoreTiming& core_timing); 41 explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
@@ -80,7 +65,10 @@ public:
80 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; 65 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
81 66
82 /// Obtains a buffer queue identified by the ID. 67 /// Obtains a buffer queue identified by the ID.
83 std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; 68 BufferQueue& FindBufferQueue(u32 id);
69
70 /// Obtains a buffer queue identified by the ID.
71 const BufferQueue& FindBufferQueue(u32 id) const;
84 72
85 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 73 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
86 /// finished. 74 /// finished.
@@ -88,27 +76,21 @@ public:
88 76
89private: 77private:
90 /// Finds the display identified by the specified ID. 78 /// Finds the display identified by the specified ID.
91 Display* FindDisplay(u64 display_id); 79 VI::Display* FindDisplay(u64 display_id);
92 80
93 /// Finds the display identified by the specified ID. 81 /// Finds the display identified by the specified ID.
94 const Display* FindDisplay(u64 display_id) const; 82 const VI::Display* FindDisplay(u64 display_id) const;
95 83
96 /// Finds the layer identified by the specified ID in the desired display. 84 /// Finds the layer identified by the specified ID in the desired display.
97 Layer* FindLayer(u64 display_id, u64 layer_id); 85 VI::Layer* FindLayer(u64 display_id, u64 layer_id);
98 86
99 /// Finds the layer identified by the specified ID in the desired display. 87 /// Finds the layer identified by the specified ID in the desired display.
100 const Layer* FindLayer(u64 display_id, u64 layer_id) const; 88 const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
101 89
102 std::shared_ptr<Nvidia::Module> nvdrv; 90 std::shared_ptr<Nvidia::Module> nvdrv;
103 91
104 std::array<Display, 5> displays{{ 92 std::vector<VI::Display> displays;
105 {0, "Default"}, 93 std::vector<BufferQueue> buffer_queues;
106 {1, "External"},
107 {2, "Edid"},
108 {3, "Internal"},
109 {4, "Null"},
110 }};
111 std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
112 94
113 /// Id to use for the next layer that is created, this counter is shared among all displays. 95 /// Id to use for the next layer that is created, this counter is shared among all displays.
114 u64 next_layer_id = 1; 96 u64 next_layer_id = 1;
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 117f87a45..00806b0ed 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -11,7 +11,6 @@
11#include "core/hle/ipc.h" 11#include "core/hle/ipc.h"
12#include "core/hle/ipc_helpers.h" 12#include "core/hle/ipc_helpers.h"
13#include "core/hle/kernel/client_port.h" 13#include "core/hle/kernel/client_port.h"
14#include "core/hle/kernel/handle_table.h"
15#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/server_port.h" 16#include "core/hle/kernel/server_port.h"
@@ -76,7 +75,8 @@ namespace Service {
76 * Creates a function string for logging, complete with the name (or header code, depending 75 * Creates a function string for logging, complete with the name (or header code, depending
77 * on what's passed in) the port name, and all the cmd_buff arguments. 76 * on what's passed in) the port name, and all the cmd_buff arguments.
78 */ 77 */
79[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name, 78[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
79 std::string_view port_name,
80 const u32* cmd_buff) { 80 const u32* cmd_buff) {
81 // Number of params == bits 0-5 + bits 6-11 81 // Number of params == bits 0-5 + bits 6-11
82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); 82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +158,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
158 return ReportUnimplementedFunction(ctx, info); 158 return ReportUnimplementedFunction(ctx, info);
159 } 159 }
160 160
161 LOG_TRACE( 161 LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
162 Service, "{}",
163 MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
164 handler_invoker(this, info->handler_callback, ctx); 162 handler_invoker(this, info->handler_callback, ctx);
165} 163}
166 164
@@ -169,7 +167,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
169 case IPC::CommandType::Close: { 167 case IPC::CommandType::Close: {
170 IPC::ResponseBuilder rb{context, 2}; 168 IPC::ResponseBuilder rb{context, 2};
171 rb.Push(RESULT_SUCCESS); 169 rb.Push(RESULT_SUCCESS);
172 return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead); 170 return IPC::ERR_REMOTE_PROCESS_DEAD;
173 } 171 }
174 case IPC::CommandType::ControlWithContext: 172 case IPC::CommandType::ControlWithContext:
175 case IPC::CommandType::Control: { 173 case IPC::CommandType::Control: {
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index bef25433e..b9d6381b4 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -67,7 +67,7 @@ public:
67 if (port == nullptr) { 67 if (port == nullptr) {
68 return nullptr; 68 return nullptr;
69 } 69 }
70 return std::static_pointer_cast<T>(port->hle_handler); 70 return std::static_pointer_cast<T>(port->GetHLEHandler());
71 } 71 }
72 72
73 void InvokeControlRequest(Kernel::HLERequestContext& context); 73 void InvokeControlRequest(Kernel::HLERequestContext& context);
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
new file mode 100644
index 000000000..01d80311b
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -0,0 +1,71 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "core/core.h"
12#include "core/hle/kernel/readable_event.h"
13#include "core/hle/service/vi/display/vi_display.h"
14#include "core/hle/service/vi/layer/vi_layer.h"
15
16namespace Service::VI {
17
18Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
19 auto& kernel = Core::System::GetInstance().Kernel();
20 vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
21 fmt::format("Display VSync Event {}", id));
22}
23
24Display::~Display() = default;
25
26Layer& Display::GetLayer(std::size_t index) {
27 return layers.at(index);
28}
29
30const Layer& Display::GetLayer(std::size_t index) const {
31 return layers.at(index);
32}
33
34Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
35 return vsync_event.readable;
36}
37
38void Display::SignalVSyncEvent() {
39 vsync_event.writable->Signal();
40}
41
42void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
43 // TODO(Subv): Support more than 1 layer.
44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
45
46 layers.emplace_back(id, buffer_queue);
47}
48
49Layer* Display::FindLayer(u64 id) {
50 const auto itr = std::find_if(layers.begin(), layers.end(),
51 [id](const VI::Layer& layer) { return layer.GetID() == id; });
52
53 if (itr == layers.end()) {
54 return nullptr;
55 }
56
57 return &*itr;
58}
59
60const Layer* Display::FindLayer(u64 id) const {
61 const auto itr = std::find_if(layers.begin(), layers.end(),
62 [id](const VI::Layer& layer) { return layer.GetID() == id; });
63
64 if (itr == layers.end()) {
65 return nullptr;
66 }
67
68 return &*itr;
69}
70
71} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
new file mode 100644
index 000000000..2acd46ff8
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <vector>
9
10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12
13namespace Service::NVFlinger {
14class BufferQueue;
15}
16
17namespace Service::VI {
18
19class Layer;
20
21/// Represents a single display type
22class Display {
23public:
24 /// Constructs a display with a given unique ID and name.
25 ///
26 /// @param id The unique ID for this display.
27 /// @param name The name for this display.
28 ///
29 Display(u64 id, std::string name);
30 ~Display();
31
32 Display(const Display&) = delete;
33 Display& operator=(const Display&) = delete;
34
35 Display(Display&&) = default;
36 Display& operator=(Display&&) = default;
37
38 /// Gets the unique ID assigned to this display.
39 u64 GetID() const {
40 return id;
41 }
42
43 /// Gets the name of this display
44 const std::string& GetName() const {
45 return name;
46 }
47
48 /// Whether or not this display has any layers added to it.
49 bool HasLayers() const {
50 return !layers.empty();
51 }
52
53 /// Gets a layer for this display based off an index.
54 Layer& GetLayer(std::size_t index);
55
56 /// Gets a layer for this display based off an index.
57 const Layer& GetLayer(std::size_t index) const;
58
59 /// Gets the readable vsync event.
60 Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
61
62 /// Signals the internal vsync event.
63 void SignalVSyncEvent();
64
65 /// Creates and adds a layer to this display with the given ID.
66 ///
67 /// @param id The ID to assign to the created layer.
68 /// @param buffer_queue The buffer queue for the layer instance to use.
69 ///
70 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
71
72 /// Attempts to find a layer with the given ID.
73 ///
74 /// @param id The layer ID.
75 ///
76 /// @returns If found, the Layer instance with the given ID.
77 /// If not found, then nullptr is returned.
78 ///
79 Layer* FindLayer(u64 id);
80
81 /// Attempts to find a layer with the given ID.
82 ///
83 /// @param id The layer ID.
84 ///
85 /// @returns If found, the Layer instance with the given ID.
86 /// If not found, then nullptr is returned.
87 ///
88 const Layer* FindLayer(u64 id) const;
89
90private:
91 u64 id;
92 std::string name;
93
94 std::vector<Layer> layers;
95 Kernel::EventPair vsync_event;
96};
97
98} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp
new file mode 100644
index 000000000..954225c26
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/vi/layer/vi_layer.h"
6
7namespace Service::VI {
8
9Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
10
11Layer::~Layer() = default;
12
13} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h
new file mode 100644
index 000000000..c6bfd01f6
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -0,0 +1,52 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Service::NVFlinger {
10class BufferQueue;
11}
12
13namespace Service::VI {
14
15/// Represents a single display layer.
16class Layer {
17public:
18 /// Constructs a layer with a given ID and buffer queue.
19 ///
20 /// @param id The ID to assign to this layer.
21 /// @param queue The buffer queue for this layer to use.
22 ///
23 Layer(u64 id, NVFlinger::BufferQueue& queue);
24 ~Layer();
25
26 Layer(const Layer&) = delete;
27 Layer& operator=(const Layer&) = delete;
28
29 Layer(Layer&&) = default;
30 Layer& operator=(Layer&&) = delete;
31
32 /// Gets the ID for this layer.
33 u64 GetID() const {
34 return id;
35 }
36
37 /// Gets a reference to the buffer queue this layer is using.
38 NVFlinger::BufferQueue& GetBufferQueue() {
39 return buffer_queue;
40 }
41
42 /// Gets a const reference to the buffer queue this layer is using.
43 const NVFlinger::BufferQueue& GetBufferQueue() const {
44 return buffer_queue;
45 }
46
47private:
48 u64 id;
49 NVFlinger::BufferQueue& buffer_queue;
50};
51
52} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index a317a2885..566cd6006 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
24#include "core/hle/service/nvdrv/nvdrv.h" 24#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 25#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 26#include "core/hle/service/nvflinger/nvflinger.h"
27#include "core/hle/service/service.h"
27#include "core/hle/service/vi/vi.h" 28#include "core/hle/service/vi/vi.h"
28#include "core/hle/service/vi/vi_m.h" 29#include "core/hle/service/vi/vi_m.h"
29#include "core/hle/service/vi/vi_s.h" 30#include "core/hle/service/vi/vi_s.h"
@@ -33,6 +34,7 @@
33namespace Service::VI { 34namespace Service::VI {
34 35
35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; 36constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
37constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; 38constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
37constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7}; 39constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
38 40
@@ -420,7 +422,7 @@ public:
420 u32_le fence_is_valid; 422 u32_le fence_is_valid;
421 std::array<Fence, 2> fences; 423 std::array<Fence, 2> fences;
422 424
423 MathUtil::Rectangle<int> GetCropRect() const { 425 Common::Rectangle<int> GetCropRect() const {
424 return {crop_left, crop_top, crop_right, crop_bottom}; 426 return {crop_left, crop_top, crop_right, crop_bottom};
425 } 427 }
426 }; 428 };
@@ -525,7 +527,7 @@ private:
525 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 527 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
526 static_cast<u32>(transaction), flags); 528 static_cast<u32>(transaction), flags);
527 529
528 auto buffer_queue = nv_flinger->FindBufferQueue(id); 530 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
529 531
530 if (transaction == TransactionId::Connect) { 532 if (transaction == TransactionId::Connect) {
531 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 533 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +540,7 @@ private:
538 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 540 } else if (transaction == TransactionId::SetPreallocatedBuffer) {
539 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 541 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
540 542
541 buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); 543 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
542 544
543 IGBPSetPreallocatedBufferResponseParcel response{}; 545 IGBPSetPreallocatedBufferResponseParcel response{};
544 ctx.WriteBuffer(response.Serialize()); 546 ctx.WriteBuffer(response.Serialize());
@@ -546,7 +548,7 @@ private:
546 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 548 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
547 const u32 width{request.data.width}; 549 const u32 width{request.data.width};
548 const u32 height{request.data.height}; 550 const u32 height{request.data.height};
549 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 551 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
550 552
551 if (slot) { 553 if (slot) {
552 // Buffer is available 554 // Buffer is available
@@ -559,8 +561,8 @@ private:
559 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, 561 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
560 Kernel::ThreadWakeupReason reason) { 562 Kernel::ThreadWakeupReason reason) {
561 // Repeat TransactParcel DequeueBuffer when a buffer is available 563 // Repeat TransactParcel DequeueBuffer when a buffer is available
562 auto buffer_queue = nv_flinger->FindBufferQueue(id); 564 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
563 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 565 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
564 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 566 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
565 567
566 IGBPDequeueBufferResponseParcel response{*slot}; 568 IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +570,28 @@ private:
568 IPC::ResponseBuilder rb{ctx, 2}; 570 IPC::ResponseBuilder rb{ctx, 2};
569 rb.Push(RESULT_SUCCESS); 571 rb.Push(RESULT_SUCCESS);
570 }, 572 },
571 buffer_queue->GetWritableBufferWaitEvent()); 573 buffer_queue.GetWritableBufferWaitEvent());
572 } 574 }
573 } else if (transaction == TransactionId::RequestBuffer) { 575 } else if (transaction == TransactionId::RequestBuffer) {
574 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 576 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
575 577
576 auto& buffer = buffer_queue->RequestBuffer(request.slot); 578 auto& buffer = buffer_queue.RequestBuffer(request.slot);
577 579
578 IGBPRequestBufferResponseParcel response{buffer}; 580 IGBPRequestBufferResponseParcel response{buffer};
579 ctx.WriteBuffer(response.Serialize()); 581 ctx.WriteBuffer(response.Serialize());
580 } else if (transaction == TransactionId::QueueBuffer) { 582 } else if (transaction == TransactionId::QueueBuffer) {
581 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 583 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
582 584
583 buffer_queue->QueueBuffer(request.data.slot, request.data.transform, 585 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
584 request.data.GetCropRect()); 586 request.data.GetCropRect());
585 587
586 IGBPQueueBufferResponseParcel response{1280, 720}; 588 IGBPQueueBufferResponseParcel response{1280, 720};
587 ctx.WriteBuffer(response.Serialize()); 589 ctx.WriteBuffer(response.Serialize());
588 } else if (transaction == TransactionId::Query) { 590 } else if (transaction == TransactionId::Query) {
589 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 591 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
590 592
591 u32 value = 593 const u32 value =
592 buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); 594 buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
593 595
594 IGBPQueryResponseParcel response{value}; 596 IGBPQueryResponseParcel response{value};
595 ctx.WriteBuffer(response.Serialize()); 597 ctx.WriteBuffer(response.Serialize());
@@ -629,12 +631,12 @@ private:
629 631
630 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 632 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
631 633
632 const auto buffer_queue = nv_flinger->FindBufferQueue(id); 634 const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
633 635
634 // TODO(Subv): Find out what this actually is. 636 // TODO(Subv): Find out what this actually is.
635 IPC::ResponseBuilder rb{ctx, 2, 1}; 637 IPC::ResponseBuilder rb{ctx, 2, 1};
636 rb.Push(RESULT_SUCCESS); 638 rb.Push(RESULT_SUCCESS);
637 rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); 639 rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
638 } 640 }
639 641
640 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 642 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,6 +754,7 @@ public:
752 {1102, nullptr, "GetDisplayResolution"}, 754 {1102, nullptr, "GetDisplayResolution"},
753 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"}, 755 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
754 {2011, nullptr, "DestroyManagedLayer"}, 756 {2011, nullptr, "DestroyManagedLayer"},
757 {2012, nullptr, "CreateStrayLayer"},
755 {2050, nullptr, "CreateIndirectLayer"}, 758 {2050, nullptr, "CreateIndirectLayer"},
756 {2051, nullptr, "DestroyIndirectLayer"}, 759 {2051, nullptr, "DestroyIndirectLayer"},
757 {2052, nullptr, "CreateIndirectProducerEndPoint"}, 760 {2052, nullptr, "CreateIndirectProducerEndPoint"},
@@ -1202,26 +1205,40 @@ IApplicationDisplayService::IApplicationDisplayService(
1202 RegisterHandlers(functions); 1205 RegisterHandlers(functions);
1203} 1206}
1204 1207
1205Module::Interface::Interface(std::shared_ptr<Module> module, const char* name, 1208static bool IsValidServiceAccess(Permission permission, Policy policy) {
1206 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 1209 if (permission == Permission::User) {
1207 : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {} 1210 return policy == Policy::User;
1211 }
1212
1213 if (permission == Permission::System || permission == Permission::Manager) {
1214 return policy == Policy::User || policy == Policy::Compositor;
1215 }
1208 1216
1209Module::Interface::~Interface() = default; 1217 return false;
1218}
1210 1219
1211void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) { 1220void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
1212 LOG_WARNING(Service_VI, "(STUBBED) called"); 1221 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
1222 Permission permission) {
1223 IPC::RequestParser rp{ctx};
1224 const auto policy = rp.PopEnum<Policy>();
1225
1226 if (!IsValidServiceAccess(permission, policy)) {
1227 IPC::ResponseBuilder rb{ctx, 2};
1228 rb.Push(ERR_PERMISSION_DENIED);
1229 return;
1230 }
1213 1231
1214 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 1232 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1215 rb.Push(RESULT_SUCCESS); 1233 rb.Push(RESULT_SUCCESS);
1216 rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger); 1234 rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
1217} 1235}
1218 1236
1219void InstallInterfaces(SM::ServiceManager& service_manager, 1237void InstallInterfaces(SM::ServiceManager& service_manager,
1220 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { 1238 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
1221 auto module = std::make_shared<Module>(); 1239 std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
1222 std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager); 1240 std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
1223 std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager); 1241 std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
1224 std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
1225} 1242}
1226 1243
1227} // namespace Service::VI 1244} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index e3963502a..6b66f8b81 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/service.h" 7#include <memory>
8#include "common/common_types.h"
9
10namespace Kernel {
11class HLERequestContext;
12}
8 13
9namespace Service::NVFlinger { 14namespace Service::NVFlinger {
10class NVFlinger; 15class NVFlinger;
11} 16}
12 17
18namespace Service::SM {
19class ServiceManager;
20}
21
13namespace Service::VI { 22namespace Service::VI {
14 23
15enum class DisplayResolution : u32 { 24enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
19 UndockedHeight = 720, 28 UndockedHeight = 720,
20}; 29};
21 30
22class Module final { 31/// Permission level for a particular VI service instance
23public: 32enum class Permission {
24 class Interface : public ServiceFramework<Interface> { 33 User,
25 public: 34 System,
26 explicit Interface(std::shared_ptr<Module> module, const char* name, 35 Manager,
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 36};
28 ~Interface() override;
29
30 void GetDisplayService(Kernel::HLERequestContext& ctx);
31 37
32 protected: 38/// A policy type that may be requested via GetDisplayService and
33 std::shared_ptr<Module> module; 39/// GetDisplayServiceWithProxyNameExchange
34 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 40enum class Policy {
35 }; 41 User,
42 Compositor,
36}; 43};
37 44
45namespace detail {
46void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
47 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
48} // namespace detail
49
38/// Registers all VI services with the specified service manager. 50/// Registers all VI services with the specified service manager.
39void InstallInterfaces(SM::ServiceManager& service_manager, 51void InstallInterfaces(SM::ServiceManager& service_manager,
40 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 52 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp
index 207c06b16..06070087f 100644
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_m.h" 7#include "core/hle/service/vi/vi_m.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {2, &VI_M::GetDisplayService, "GetDisplayService"}, 14 {2, &VI_M::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_M::~VI_M() = default; 20VI_M::~VI_M() = default;
19 21
22void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h
index 487d58d50..290e06689 100644
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_M final : public Module::Interface { 19class VI_M final : public ServiceFramework<VI_M> {
12public: 20public:
13 explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_M() override; 22 ~VI_M() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp
index 920e6a1f6..57c596cc4 100644
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_s.h" 7#include "core/hle/service/vi/vi_s.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {1, &VI_S::GetDisplayService, "GetDisplayService"}, 14 {1, &VI_S::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_S::~VI_S() = default; 20VI_S::~VI_S() = default;
19 21
22void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h
index bbc31148f..47804dc0b 100644
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_S final : public Module::Interface { 19class VI_S final : public ServiceFramework<VI_S> {
12public: 20public:
13 explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_S() override; 22 ~VI_S() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp
index d81e410d6..9d5ceb608 100644
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_u.h" 7#include "core/hle/service/vi/vi_u.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {0, &VI_U::GetDisplayService, "GetDisplayService"}, 14 {0, &VI_U::GetDisplayService, "GetDisplayService"},
13 }; 15 };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
16 18
17VI_U::~VI_U() = default; 19VI_U::~VI_U() = default;
18 20
21void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
22 LOG_DEBUG(Service_VI, "called");
23
24 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
25}
26
19} // namespace Service::VI 27} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h
index b92f28c92..19bdb73b0 100644
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_U final : public Module::Interface { 19class VI_U final : public ServiceFramework<VI_U> {
12public: 20public:
13 explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_U() override; 22 ~VI_U() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..4fde53033 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -67,19 +67,27 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
68 (base + size) * PAGE_SIZE); 68 (base + size) * PAGE_SIZE);
69 69
70 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, 70 // During boot, current_page_table might not be set yet, in which case we need not flush
71 FlushMode::FlushAndInvalidate); 71 if (current_page_table) {
72 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
73 FlushMode::FlushAndInvalidate);
74 }
72 75
73 VAddr end = base + size; 76 VAddr end = base + size;
74 while (base != end) { 77 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 78 base + page_table.pointers.size());
79
80 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
76 81
77 page_table.attributes[base] = type; 82 if (memory == nullptr) {
78 page_table.pointers[base] = memory; 83 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
84 } else {
85 while (base != end) {
86 page_table.pointers[base] = memory;
79 87
80 base += 1; 88 base += 1;
81 if (memory != nullptr)
82 memory += PAGE_SIZE; 89 memory += PAGE_SIZE;
90 }
83 } 91 }
84} 92}
85 93
@@ -166,9 +174,6 @@ T Read(const VAddr vaddr) {
166 return value; 174 return value;
167 } 175 }
168 176
169 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
170 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
171
172 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 177 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
173 switch (type) { 178 switch (type) {
174 case PageType::Unmapped: 179 case PageType::Unmapped:
@@ -199,9 +204,6 @@ void Write(const VAddr vaddr, const T data) {
199 return; 204 return;
200 } 205 }
201 206
202 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
203 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
204
205 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 207 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
206 switch (type) { 208 switch (type) {
207 case PageType::Unmapped: 209 case PageType::Unmapped:
@@ -357,16 +359,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
357 const VAddr overlap_end = std::min(end, region_end); 359 const VAddr overlap_end = std::min(end, region_end);
358 const VAddr overlap_size = overlap_end - overlap_start; 360 const VAddr overlap_size = overlap_end - overlap_start;
359 361
360 auto& rasterizer = system_instance.Renderer().Rasterizer(); 362 auto& gpu = system_instance.GPU();
361 switch (mode) { 363 switch (mode) {
362 case FlushMode::Flush: 364 case FlushMode::Flush:
363 rasterizer.FlushRegion(overlap_start, overlap_size); 365 gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
364 break; 366 break;
365 case FlushMode::Invalidate: 367 case FlushMode::Invalidate:
366 rasterizer.InvalidateRegion(overlap_start, overlap_size); 368 gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
367 break; 369 break;
368 case FlushMode::FlushAndInvalidate: 370 case FlushMode::FlushAndInvalidate:
369 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); 371 gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
370 break; 372 break;
371 } 373 }
372 }; 374 };
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6dd3139cc 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 98 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 99 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 100 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index 7e76e0466..cdfb2f742 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
393 u16 frame_limit; 393 u16 frame_limit;
394 bool use_disk_shader_cache; 394 bool use_disk_shader_cache;
395 bool use_accurate_gpu_emulation; 395 bool use_accurate_gpu_emulation;
396 bool use_asynchronous_gpu_emulation;
396 397
397 float bg_red; 398 float bg_red;
398 float bg_green; 399 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 58dfcc4df..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
162 Settings::values.use_disk_shader_cache); 162 Settings::values.use_disk_shader_cache);
163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
164 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
166 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
167} 169}
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 1c7db28c0..5b4e032bd 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -7,15 +7,18 @@ add_library(input_common STATIC
7 main.h 7 main.h
8 motion_emu.cpp 8 motion_emu.cpp
9 motion_emu.h 9 motion_emu.h
10 10 sdl/sdl.cpp
11 $<$<BOOL:${SDL2_FOUND}>:sdl/sdl.cpp sdl/sdl.h> 11 sdl/sdl.h
12) 12)
13 13
14create_target_directory_groups(input_common)
15
16target_link_libraries(input_common PUBLIC core PRIVATE common)
17
18if(SDL2_FOUND) 14if(SDL2_FOUND)
15 target_sources(input_common PRIVATE
16 sdl/sdl_impl.cpp
17 sdl/sdl_impl.h
18 )
19 target_link_libraries(input_common PRIVATE SDL2) 19 target_link_libraries(input_common PRIVATE SDL2)
20 target_compile_definitions(input_common PRIVATE HAVE_SDL2) 20 target_compile_definitions(input_common PRIVATE HAVE_SDL2)
21endif() 21endif()
22
23create_target_directory_groups(input_common)
24target_link_libraries(input_common PUBLIC core PRIVATE common)
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 37f572853..8e66c1b15 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -17,10 +17,7 @@ namespace InputCommon {
17 17
18static std::shared_ptr<Keyboard> keyboard; 18static std::shared_ptr<Keyboard> keyboard;
19static std::shared_ptr<MotionEmu> motion_emu; 19static std::shared_ptr<MotionEmu> motion_emu;
20 20static std::unique_ptr<SDL::State> sdl;
21#ifdef HAVE_SDL2
22static std::thread poll_thread;
23#endif
24 21
25void Init() { 22void Init() {
26 keyboard = std::make_shared<Keyboard>(); 23 keyboard = std::make_shared<Keyboard>();
@@ -30,15 +27,7 @@ void Init() {
30 motion_emu = std::make_shared<MotionEmu>(); 27 motion_emu = std::make_shared<MotionEmu>();
31 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu); 28 Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu);
32 29
33#ifdef HAVE_SDL2 30 sdl = SDL::Init();
34 SDL::Init();
35#endif
36}
37
38void StartJoystickEventHandler() {
39#ifdef HAVE_SDL2
40 poll_thread = std::thread(SDL::PollLoop);
41#endif
42} 31}
43 32
44void Shutdown() { 33void Shutdown() {
@@ -47,11 +36,7 @@ void Shutdown() {
47 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button"); 36 Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button");
48 Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); 37 Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
49 motion_emu.reset(); 38 motion_emu.reset();
50 39 sdl.reset();
51#ifdef HAVE_SDL2
52 SDL::Shutdown();
53 poll_thread.join();
54#endif
55} 40}
56 41
57Keyboard* GetKeyboard() { 42Keyboard* GetKeyboard() {
@@ -88,7 +73,7 @@ namespace Polling {
88 73
89std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) { 74std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) {
90#ifdef HAVE_SDL2 75#ifdef HAVE_SDL2
91 return SDL::Polling::GetPollers(type); 76 return sdl->GetPollers(type);
92#else 77#else
93 return {}; 78 return {};
94#endif 79#endif
diff --git a/src/input_common/main.h b/src/input_common/main.h
index 9eb13106e..77a0ce90b 100644
--- a/src/input_common/main.h
+++ b/src/input_common/main.h
@@ -20,8 +20,6 @@ void Init();
20/// Deregisters all built-in input device factories and shuts them down. 20/// Deregisters all built-in input device factories and shuts them down.
21void Shutdown(); 21void Shutdown();
22 22
23void StartJoystickEventHandler();
24
25class Keyboard; 23class Keyboard;
26 24
27/// Gets the keyboard button device factory. 25/// Gets the keyboard button device factory.
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..6d96d4019 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
32 } 32 }
33 33
34 void BeginTilt(int x, int y) { 34 void BeginTilt(int x, int y) {
35 mouse_origin = Math::MakeVec(x, y); 35 mouse_origin = Common::MakeVec(x, y);
36 is_tilting = true; 36 is_tilting = true;
37 } 37 }
38 38
39 void Tilt(int x, int y) { 39 void Tilt(int x, int y) {
40 auto mouse_move = Math::MakeVec(x, y) - mouse_origin; 40 auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
41 if (is_tilting) { 41 if (is_tilting) {
42 std::lock_guard<std::mutex> guard(tilt_mutex); 42 std::lock_guard<std::mutex> guard(tilt_mutex);
43 if (mouse_move.x == 0 && mouse_move.y == 0) { 43 if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
45 } else { 45 } else {
46 tilt_direction = mouse_move.Cast<float>(); 46 tilt_direction = mouse_move.Cast<float>();
47 tilt_angle = 47 tilt_angle =
48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); 48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
49 } 49 }
50 } 50 }
51 } 51 }
@@ -56,7 +56,7 @@ public:
56 is_tilting = false; 56 is_tilting = false;
57 } 57 }
58 58
59 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { 59 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
60 std::lock_guard<std::mutex> guard(status_mutex); 60 std::lock_guard<std::mutex> guard(status_mutex);
61 return status; 61 return status;
62 } 62 }
@@ -66,17 +66,17 @@ private:
66 const std::chrono::steady_clock::duration update_duration; 66 const std::chrono::steady_clock::duration update_duration;
67 const float sensitivity; 67 const float sensitivity;
68 68
69 Math::Vec2<int> mouse_origin; 69 Common::Vec2<int> mouse_origin;
70 70
71 std::mutex tilt_mutex; 71 std::mutex tilt_mutex;
72 Math::Vec2<float> tilt_direction; 72 Common::Vec2<float> tilt_direction;
73 float tilt_angle = 0; 73 float tilt_angle = 0;
74 74
75 bool is_tilting = false; 75 bool is_tilting = false;
76 76
77 Common::Event shutdown_event; 77 Common::Event shutdown_event;
78 78
79 std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; 79 std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
80 std::mutex status_mutex; 80 std::mutex status_mutex;
81 81
82 // Note: always keep the thread declaration at the end so that other objects are initialized 82 // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:
85 85
86 void MotionEmuThread() { 86 void MotionEmuThread() {
87 auto update_time = std::chrono::steady_clock::now(); 87 auto update_time = std::chrono::steady_clock::now();
88 Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); 88 Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
89 Math::Quaternion<float> old_q; 89 Common::Quaternion<float> old_q;
90 90
91 while (!shutdown_event.WaitUntil(update_time)) { 91 while (!shutdown_event.WaitUntil(update_time)) {
92 update_time += update_duration; 92 update_time += update_duration;
@@ -96,18 +96,18 @@ private:
96 std::lock_guard<std::mutex> guard(tilt_mutex); 96 std::lock_guard<std::mutex> guard(tilt_mutex);
97 97
98 // Find the quaternion describing current 3DS tilting 98 // Find the quaternion describing current 3DS tilting
99 q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), 99 q = Common::MakeQuaternion(
100 tilt_angle); 100 Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
101 } 101 }
102 102
103 auto inv_q = q.Inverse(); 103 auto inv_q = q.Inverse();
104 104
105 // Set the gravity vector in world space 105 // Set the gravity vector in world space
106 auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); 106 auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
107 107
108 // Find the angular rate vector in world space 108 // Find the angular rate vector in world space
109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2; 109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
110 angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; 110 angular_rate *= 1000 / update_millisecond / Common::PI * 180;
111 111
112 // Transform the two vectors from world space to 3DS space 112 // Transform the two vectors from world space to 3DS space
113 gravity = QuaternionRotate(inv_q, gravity); 113 gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); 131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
132 } 132 }
133 133
134 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { 134 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
135 return device->GetStatus(); 135 return device->GetStatus();
136 } 136 }
137 137
diff --git a/src/input_common/sdl/sdl.cpp b/src/input_common/sdl/sdl.cpp
index faf3c1fa3..644db3448 100644
--- a/src/input_common/sdl/sdl.cpp
+++ b/src/input_common/sdl/sdl.cpp
@@ -1,631 +1,19 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "input_common/main.h"
24#include "input_common/sdl/sdl.h" 5#include "input_common/sdl/sdl.h"
6#ifdef HAVE_SDL2
7#include "input_common/sdl/sdl_impl.h"
8#endif
25 9
26namespace InputCommon { 10namespace InputCommon::SDL {
27 11
28namespace SDL { 12std::unique_ptr<State> Init() {
29 13#ifdef HAVE_SDL2
30class SDLJoystick; 14 return std::make_unique<SDLState>();
31class SDLButtonFactory; 15#else
32class SDLAnalogFactory; 16 return std::make_unique<NullState>();
33 17#endif
34/// Map of GUID of a list of corresponding virtual Joysticks
35static std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
36static std::mutex joystick_map_mutex;
37
38static std::shared_ptr<SDLButtonFactory> button_factory;
39static std::shared_ptr<SDLAnalogFactory> analog_factory;
40
41/// Used by the Pollers during config
42static std::atomic<bool> polling;
43static Common::SPSCQueue<SDL_Event> event_queue;
44
45static std::atomic<bool> initialized = false;
46
47static std::string GetGUID(SDL_Joystick* joystick) {
48 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
49 char guid_str[33];
50 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
51 return guid_str;
52}
53
54class SDLJoystick {
55public:
56 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
57 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
58 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
59
60 void SetButton(int button, bool value) {
61 std::lock_guard<std::mutex> lock(mutex);
62 state.buttons[button] = value;
63 }
64
65 bool GetButton(int button) const {
66 std::lock_guard<std::mutex> lock(mutex);
67 return state.buttons.at(button);
68 }
69
70 void SetAxis(int axis, Sint16 value) {
71 std::lock_guard<std::mutex> lock(mutex);
72 state.axes[axis] = value;
73 }
74
75 float GetAxis(int axis) const {
76 std::lock_guard<std::mutex> lock(mutex);
77 return state.axes.at(axis) / 32767.0f;
78 }
79
80 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
81 float x = GetAxis(axis_x);
82 float y = GetAxis(axis_y);
83 y = -y; // 3DS uses an y-axis inverse from SDL
84
85 // Make sure the coordinates are in the unit circle,
86 // otherwise normalize it.
87 float r = x * x + y * y;
88 if (r > 1.0f) {
89 r = std::sqrt(r);
90 x /= r;
91 y /= r;
92 }
93
94 return std::make_tuple(x, y);
95 }
96
97 void SetHat(int hat, Uint8 direction) {
98 std::lock_guard<std::mutex> lock(mutex);
99 state.hats[hat] = direction;
100 }
101
102 bool GetHatDirection(int hat, Uint8 direction) const {
103 std::lock_guard<std::mutex> lock(mutex);
104 return (state.hats.at(hat) & direction) != 0;
105 }
106 /**
107 * The guid of the joystick
108 */
109 const std::string& GetGUID() const {
110 return guid;
111 }
112
113 /**
114 * The number of joystick from the same type that were connected before this joystick
115 */
116 int GetPort() const {
117 return port;
118 }
119
120 SDL_Joystick* GetSDLJoystick() const {
121 return sdl_joystick.get();
122 }
123
124 void SetSDLJoystick(SDL_Joystick* joystick,
125 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
126 sdl_joystick =
127 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
128 }
129
130private:
131 struct State {
132 std::unordered_map<int, bool> buttons;
133 std::unordered_map<int, Sint16> axes;
134 std::unordered_map<int, Uint8> hats;
135 } state;
136 std::string guid;
137 int port;
138 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
139 mutable std::mutex mutex;
140};
141
142/**
143 * Get the nth joystick with the corresponding GUID
144 */
145static std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port) {
146 std::lock_guard<std::mutex> lock(joystick_map_mutex);
147 const auto it = joystick_map.find(guid);
148 if (it != joystick_map.end()) {
149 while (it->second.size() <= port) {
150 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
151 [](SDL_Joystick*) {});
152 it->second.emplace_back(std::move(joystick));
153 }
154 return it->second[port];
155 }
156 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
157 return joystick_map[guid].emplace_back(std::move(joystick));
158}
159
160/**
161 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
162 * it to a SDLJoystick with the same guid and that port
163 */
164static std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
165 std::lock_guard<std::mutex> lock(joystick_map_mutex);
166 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
167 const std::string guid = GetGUID(sdl_joystick);
168 auto map_it = joystick_map.find(guid);
169 if (map_it != joystick_map.end()) {
170 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
171 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
172 return sdl_joystick == joystick->GetSDLJoystick();
173 });
174 if (vec_it != map_it->second.end()) {
175 // This is the common case: There is already an existing SDL_Joystick maped to a
176 // SDLJoystick. return the SDLJoystick
177 return *vec_it;
178 }
179 // Search for a SDLJoystick without a mapped SDL_Joystick...
180 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
181 [](const std::shared_ptr<SDLJoystick>& joystick) {
182 return !joystick->GetSDLJoystick();
183 });
184 if (nullptr_it != map_it->second.end()) {
185 // ... and map it
186 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
187 return *nullptr_it;
188 }
189 // There is no SDLJoystick without a mapped SDL_Joystick
190 // Create a new SDLJoystick
191 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
192 return map_it->second.emplace_back(std::move(joystick));
193 }
194 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
195 return joystick_map[guid].emplace_back(std::move(joystick));
196}
197
198void InitJoystick(int joystick_index) {
199 std::lock_guard<std::mutex> lock(joystick_map_mutex);
200 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
201 if (!sdl_joystick) {
202 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
203 return;
204 }
205 std::string guid = GetGUID(sdl_joystick);
206 if (joystick_map.find(guid) == joystick_map.end()) {
207 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
208 joystick_map[guid].emplace_back(std::move(joystick));
209 return;
210 }
211 auto& joystick_guid_list = joystick_map[guid];
212 const auto it = std::find_if(
213 joystick_guid_list.begin(), joystick_guid_list.end(),
214 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
215 if (it != joystick_guid_list.end()) {
216 (*it)->SetSDLJoystick(sdl_joystick);
217 return;
218 }
219 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
220 joystick_guid_list.emplace_back(std::move(joystick));
221}
222
223void CloseJoystick(SDL_Joystick* sdl_joystick) {
224 std::lock_guard<std::mutex> lock(joystick_map_mutex);
225 std::string guid = GetGUID(sdl_joystick);
226 // This call to guid is save since the joystick is guranteed to be in that map
227 auto& joystick_guid_list = joystick_map[guid];
228 const auto joystick_it =
229 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
230 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
231 return joystick->GetSDLJoystick() == sdl_joystick;
232 });
233 (*joystick_it)->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
234}
235
236void HandleGameControllerEvent(const SDL_Event& event) {
237 switch (event.type) {
238 case SDL_JOYBUTTONUP: {
239 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
240 if (joystick) {
241 joystick->SetButton(event.jbutton.button, false);
242 }
243 break;
244 }
245 case SDL_JOYBUTTONDOWN: {
246 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
247 if (joystick) {
248 joystick->SetButton(event.jbutton.button, true);
249 }
250 break;
251 }
252 case SDL_JOYHATMOTION: {
253 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
254 if (joystick) {
255 joystick->SetHat(event.jhat.hat, event.jhat.value);
256 }
257 break;
258 }
259 case SDL_JOYAXISMOTION: {
260 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
261 if (joystick) {
262 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
263 }
264 break;
265 }
266 case SDL_JOYDEVICEREMOVED:
267 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
268 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
269 break;
270 case SDL_JOYDEVICEADDED:
271 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
272 InitJoystick(event.jdevice.which);
273 break;
274 }
275}
276
277void CloseSDLJoysticks() {
278 std::lock_guard<std::mutex> lock(joystick_map_mutex);
279 joystick_map.clear();
280}
281
282void PollLoop() {
283 if (SDL_Init(SDL_INIT_JOYSTICK) < 0) {
284 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
285 return;
286 }
287
288 SDL_Event event;
289 while (initialized) {
290 // Wait for 10 ms or until an event happens
291 if (SDL_WaitEventTimeout(&event, 10)) {
292 // Don't handle the event if we are configuring
293 if (polling) {
294 event_queue.Push(event);
295 } else {
296 HandleGameControllerEvent(event);
297 }
298 }
299 }
300 CloseSDLJoysticks();
301 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
302}
303
304class SDLButton final : public Input::ButtonDevice {
305public:
306 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
307 : joystick(std::move(joystick_)), button(button_) {}
308
309 bool GetStatus() const override {
310 return joystick->GetButton(button);
311 }
312
313private:
314 std::shared_ptr<SDLJoystick> joystick;
315 int button;
316};
317
318class SDLDirectionButton final : public Input::ButtonDevice {
319public:
320 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
321 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
322
323 bool GetStatus() const override {
324 return joystick->GetHatDirection(hat, direction);
325 }
326
327private:
328 std::shared_ptr<SDLJoystick> joystick;
329 int hat;
330 Uint8 direction;
331};
332
333class SDLAxisButton final : public Input::ButtonDevice {
334public:
335 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
336 bool trigger_if_greater_)
337 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
338 trigger_if_greater(trigger_if_greater_) {}
339
340 bool GetStatus() const override {
341 float axis_value = joystick->GetAxis(axis);
342 if (trigger_if_greater)
343 return axis_value > threshold;
344 return axis_value < threshold;
345 }
346
347private:
348 std::shared_ptr<SDLJoystick> joystick;
349 int axis;
350 float threshold;
351 bool trigger_if_greater;
352};
353
354class SDLAnalog final : public Input::AnalogDevice {
355public:
356 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_)
357 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_) {}
358
359 std::tuple<float, float> GetStatus() const override {
360 return joystick->GetAnalog(axis_x, axis_y);
361 }
362
363private:
364 std::shared_ptr<SDLJoystick> joystick;
365 int axis_x;
366 int axis_y;
367};
368
369/// A button device factory that creates button devices from SDL joystick
370class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
371public:
372 /**
373 * Creates a button device from a joystick button
374 * @param params contains parameters for creating the device:
375 * - "guid": the guid of the joystick to bind
376 * - "port": the nth joystick of the same type to bind
377 * - "button"(optional): the index of the button to bind
378 * - "hat"(optional): the index of the hat to bind as direction buttons
379 * - "axis"(optional): the index of the axis to bind
380 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
381 * "down", "left" or "right"
382 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
383 * triggered if the axis value crosses
384 * - "direction"(only used for axis): "+" means the button is triggered when the axis
385 * value is greater than the threshold; "-" means the button is triggered when the axis
386 * value is smaller than the threshold
387 */
388 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
389 const std::string guid = params.Get("guid", "0");
390 const int port = params.Get("port", 0);
391
392 auto joystick = GetSDLJoystickByGUID(guid, port);
393
394 if (params.Has("hat")) {
395 const int hat = params.Get("hat", 0);
396 const std::string direction_name = params.Get("direction", "");
397 Uint8 direction;
398 if (direction_name == "up") {
399 direction = SDL_HAT_UP;
400 } else if (direction_name == "down") {
401 direction = SDL_HAT_DOWN;
402 } else if (direction_name == "left") {
403 direction = SDL_HAT_LEFT;
404 } else if (direction_name == "right") {
405 direction = SDL_HAT_RIGHT;
406 } else {
407 direction = 0;
408 }
409 // This is necessary so accessing GetHat with hat won't crash
410 joystick->SetHat(hat, SDL_HAT_CENTERED);
411 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
412 }
413
414 if (params.Has("axis")) {
415 const int axis = params.Get("axis", 0);
416 const float threshold = params.Get("threshold", 0.5f);
417 const std::string direction_name = params.Get("direction", "");
418 bool trigger_if_greater;
419 if (direction_name == "+") {
420 trigger_if_greater = true;
421 } else if (direction_name == "-") {
422 trigger_if_greater = false;
423 } else {
424 trigger_if_greater = true;
425 LOG_ERROR(Input, "Unknown direction '{}'", direction_name);
426 }
427 // This is necessary so accessing GetAxis with axis won't crash
428 joystick->SetAxis(axis, 0);
429 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
430 }
431
432 const int button = params.Get("button", 0);
433 // This is necessary so accessing GetButton with button won't crash
434 joystick->SetButton(button, false);
435 return std::make_unique<SDLButton>(joystick, button);
436 }
437};
438
439/// An analog device factory that creates analog devices from SDL joystick
440class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
441public:
442 /**
443 * Creates analog device from joystick axes
444 * @param params contains parameters for creating the device:
445 * - "guid": the guid of the joystick to bind
446 * - "port": the nth joystick of the same type
447 * - "axis_x": the index of the axis to be bind as x-axis
448 * - "axis_y": the index of the axis to be bind as y-axis
449 */
450 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
451 const std::string guid = params.Get("guid", "0");
452 const int port = params.Get("port", 0);
453 const int axis_x = params.Get("axis_x", 0);
454 const int axis_y = params.Get("axis_y", 1);
455
456 auto joystick = GetSDLJoystickByGUID(guid, port);
457
458 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
459 joystick->SetAxis(axis_x, 0);
460 joystick->SetAxis(axis_y, 0);
461 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y);
462 }
463};
464
465void Init() {
466 using namespace Input;
467 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>());
468 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>());
469 polling = false;
470 initialized = true;
471}
472
473void Shutdown() {
474 if (initialized) {
475 using namespace Input;
476 UnregisterFactory<ButtonDevice>("sdl");
477 UnregisterFactory<AnalogDevice>("sdl");
478 initialized = false;
479 }
480}
481
482Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event) {
483 Common::ParamPackage params({{"engine", "sdl"}});
484 switch (event.type) {
485 case SDL_JOYAXISMOTION: {
486 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
487 params.Set("port", joystick->GetPort());
488 params.Set("guid", joystick->GetGUID());
489 params.Set("axis", event.jaxis.axis);
490 if (event.jaxis.value > 0) {
491 params.Set("direction", "+");
492 params.Set("threshold", "0.5");
493 } else {
494 params.Set("direction", "-");
495 params.Set("threshold", "-0.5");
496 }
497 break;
498 }
499 case SDL_JOYBUTTONUP: {
500 auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
501 params.Set("port", joystick->GetPort());
502 params.Set("guid", joystick->GetGUID());
503 params.Set("button", event.jbutton.button);
504 break;
505 }
506 case SDL_JOYHATMOTION: {
507 auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
508 params.Set("port", joystick->GetPort());
509 params.Set("guid", joystick->GetGUID());
510 params.Set("hat", event.jhat.hat);
511 switch (event.jhat.value) {
512 case SDL_HAT_UP:
513 params.Set("direction", "up");
514 break;
515 case SDL_HAT_DOWN:
516 params.Set("direction", "down");
517 break;
518 case SDL_HAT_LEFT:
519 params.Set("direction", "left");
520 break;
521 case SDL_HAT_RIGHT:
522 params.Set("direction", "right");
523 break;
524 default:
525 return {};
526 }
527 break;
528 }
529 }
530 return params;
531}
532
533namespace Polling {
534
535class SDLPoller : public InputCommon::Polling::DevicePoller {
536public:
537 void Start() override {
538 event_queue.Clear();
539 polling = true;
540 }
541
542 void Stop() override {
543 polling = false;
544 }
545};
546
547class SDLButtonPoller final : public SDLPoller {
548public:
549 Common::ParamPackage GetNextInput() override {
550 SDL_Event event;
551 while (event_queue.Pop(event)) {
552 switch (event.type) {
553 case SDL_JOYAXISMOTION:
554 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
555 break;
556 }
557 case SDL_JOYBUTTONUP:
558 case SDL_JOYHATMOTION:
559 return SDLEventToButtonParamPackage(event);
560 }
561 }
562 return {};
563 }
564};
565
566class SDLAnalogPoller final : public SDLPoller {
567public:
568 void Start() override {
569 SDLPoller::Start();
570
571 // Reset stored axes
572 analog_xaxis = -1;
573 analog_yaxis = -1;
574 analog_axes_joystick = -1;
575 }
576
577 Common::ParamPackage GetNextInput() override {
578 SDL_Event event;
579 while (event_queue.Pop(event)) {
580 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
581 continue;
582 }
583 // An analog device needs two axes, so we need to store the axis for later and wait for
584 // a second SDL event. The axes also must be from the same joystick.
585 int axis = event.jaxis.axis;
586 if (analog_xaxis == -1) {
587 analog_xaxis = axis;
588 analog_axes_joystick = event.jaxis.which;
589 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
590 analog_axes_joystick == event.jaxis.which) {
591 analog_yaxis = axis;
592 }
593 }
594 Common::ParamPackage params;
595 if (analog_xaxis != -1 && analog_yaxis != -1) {
596 auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
597 params.Set("engine", "sdl");
598 params.Set("port", joystick->GetPort());
599 params.Set("guid", joystick->GetGUID());
600 params.Set("axis_x", analog_xaxis);
601 params.Set("axis_y", analog_yaxis);
602 analog_xaxis = -1;
603 analog_yaxis = -1;
604 analog_axes_joystick = -1;
605 return params;
606 }
607 return params;
608 }
609
610private:
611 int analog_xaxis = -1;
612 int analog_yaxis = -1;
613 SDL_JoystickID analog_axes_joystick = -1;
614};
615
616std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
617 InputCommon::Polling::DeviceType type) {
618 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
619 switch (type) {
620 case InputCommon::Polling::DeviceType::Analog:
621 pollers.push_back(std::make_unique<SDLAnalogPoller>());
622 break;
623 case InputCommon::Polling::DeviceType::Button:
624 pollers.push_back(std::make_unique<SDLButtonPoller>());
625 break;
626 }
627 return pollers;
628} 18}
629} // namespace Polling 19} // namespace InputCommon::SDL
630} // namespace SDL
631} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl.h b/src/input_common/sdl/sdl.h
index 0206860d3..02a8d2e2c 100644
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -1,4 +1,4 @@
1// Copyright 2017 Citra Emulator Project 1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -7,45 +7,36 @@
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9#include "core/frontend/input.h" 9#include "core/frontend/input.h"
10#include "input_common/main.h"
10 11
11union SDL_Event; 12union SDL_Event;
13
12namespace Common { 14namespace Common {
13class ParamPackage; 15class ParamPackage;
14} 16} // namespace Common
15namespace InputCommon { 17
16namespace Polling { 18namespace InputCommon::Polling {
17class DevicePoller; 19class DevicePoller;
18enum class DeviceType; 20enum class DeviceType;
19} // namespace Polling 21} // namespace InputCommon::Polling
20} // namespace InputCommon
21
22namespace InputCommon {
23namespace SDL {
24
25/// Initializes and registers SDL device factories
26void Init();
27
28/// Unresisters SDL device factories and shut them down.
29void Shutdown();
30
31/// Needs to be called before SDL_QuitSubSystem.
32void CloseSDLJoysticks();
33 22
34/// Handle SDL_Events for joysticks from SDL_PollEvent 23namespace InputCommon::SDL {
35void HandleGameControllerEvent(const SDL_Event& event);
36 24
37/// A Loop that calls HandleGameControllerEvent until Shutdown is called 25class State {
38void PollLoop(); 26public:
27 /// Unresisters SDL device factories and shut them down.
28 virtual ~State() = default;
39 29
40/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice 30 virtual std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
41Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event); 31 InputCommon::Polling::DeviceType type) = 0;
32};
42 33
43namespace Polling { 34class NullState : public State {
35public:
36 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
37 InputCommon::Polling::DeviceType type) override {}
38};
44 39
45/// Get all DevicePoller that use the SDL backend for a specific device type 40std::unique_ptr<State> Init();
46std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
47 InputCommon::Polling::DeviceType type);
48 41
49} // namespace Polling 42} // namespace InputCommon::SDL
50} // namespace SDL
51} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
new file mode 100644
index 000000000..934339d3b
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -0,0 +1,669 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <atomic>
7#include <cmath>
8#include <functional>
9#include <iterator>
10#include <mutex>
11#include <string>
12#include <thread>
13#include <tuple>
14#include <unordered_map>
15#include <utility>
16#include <vector>
17#include <SDL.h>
18#include "common/assert.h"
19#include "common/logging/log.h"
20#include "common/math_util.h"
21#include "common/param_package.h"
22#include "common/threadsafe_queue.h"
23#include "core/frontend/input.h"
24#include "input_common/sdl/sdl_impl.h"
25
26namespace InputCommon {
27
28namespace SDL {
29
30static std::string GetGUID(SDL_Joystick* joystick) {
31 SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
32 char guid_str[33];
33 SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
34 return guid_str;
35}
36
37/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
38static Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event);
39
40static int SDLEventWatcher(void* userdata, SDL_Event* event) {
41 SDLState* sdl_state = reinterpret_cast<SDLState*>(userdata);
42 // Don't handle the event if we are configuring
43 if (sdl_state->polling) {
44 sdl_state->event_queue.Push(*event);
45 } else {
46 sdl_state->HandleGameControllerEvent(*event);
47 }
48 return 0;
49}
50
51class SDLJoystick {
52public:
53 SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
54 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
55 : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
56
57 void SetButton(int button, bool value) {
58 std::lock_guard<std::mutex> lock(mutex);
59 state.buttons[button] = value;
60 }
61
62 bool GetButton(int button) const {
63 std::lock_guard<std::mutex> lock(mutex);
64 return state.buttons.at(button);
65 }
66
67 void SetAxis(int axis, Sint16 value) {
68 std::lock_guard<std::mutex> lock(mutex);
69 state.axes[axis] = value;
70 }
71
72 float GetAxis(int axis) const {
73 std::lock_guard<std::mutex> lock(mutex);
74 return state.axes.at(axis) / 32767.0f;
75 }
76
77 std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
78 float x = GetAxis(axis_x);
79 float y = GetAxis(axis_y);
80 y = -y; // 3DS uses an y-axis inverse from SDL
81
82 // Make sure the coordinates are in the unit circle,
83 // otherwise normalize it.
84 float r = x * x + y * y;
85 if (r > 1.0f) {
86 r = std::sqrt(r);
87 x /= r;
88 y /= r;
89 }
90
91 return std::make_tuple(x, y);
92 }
93
94 void SetHat(int hat, Uint8 direction) {
95 std::lock_guard<std::mutex> lock(mutex);
96 state.hats[hat] = direction;
97 }
98
99 bool GetHatDirection(int hat, Uint8 direction) const {
100 std::lock_guard<std::mutex> lock(mutex);
101 return (state.hats.at(hat) & direction) != 0;
102 }
103 /**
104 * The guid of the joystick
105 */
106 const std::string& GetGUID() const {
107 return guid;
108 }
109
110 /**
111 * The number of joystick from the same type that were connected before this joystick
112 */
113 int GetPort() const {
114 return port;
115 }
116
117 SDL_Joystick* GetSDLJoystick() const {
118 return sdl_joystick.get();
119 }
120
121 void SetSDLJoystick(SDL_Joystick* joystick,
122 decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
123 sdl_joystick =
124 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
125 }
126
127private:
128 struct State {
129 std::unordered_map<int, bool> buttons;
130 std::unordered_map<int, Sint16> axes;
131 std::unordered_map<int, Uint8> hats;
132 } state;
133 std::string guid;
134 int port;
135 std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
136 mutable std::mutex mutex;
137};
138
139/**
140 * Get the nth joystick with the corresponding GUID
141 */
142std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
143 std::lock_guard<std::mutex> lock(joystick_map_mutex);
144 const auto it = joystick_map.find(guid);
145 if (it != joystick_map.end()) {
146 while (it->second.size() <= port) {
147 auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
148 [](SDL_Joystick*) {});
149 it->second.emplace_back(std::move(joystick));
150 }
151 return it->second[port];
152 }
153 auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
154 return joystick_map[guid].emplace_back(std::move(joystick));
155}
156
157/**
158 * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
159 * it to a SDLJoystick with the same guid and that port
160 */
161std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
162 auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
163 const std::string guid = GetGUID(sdl_joystick);
164 std::lock_guard<std::mutex> lock(joystick_map_mutex);
165 auto map_it = joystick_map.find(guid);
166 if (map_it != joystick_map.end()) {
167 auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
168 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
169 return sdl_joystick == joystick->GetSDLJoystick();
170 });
171 if (vec_it != map_it->second.end()) {
172 // This is the common case: There is already an existing SDL_Joystick maped to a
173 // SDLJoystick. return the SDLJoystick
174 return *vec_it;
175 }
176 // Search for a SDLJoystick without a mapped SDL_Joystick...
177 auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
178 [](const std::shared_ptr<SDLJoystick>& joystick) {
179 return !joystick->GetSDLJoystick();
180 });
181 if (nullptr_it != map_it->second.end()) {
182 // ... and map it
183 (*nullptr_it)->SetSDLJoystick(sdl_joystick);
184 return *nullptr_it;
185 }
186 // There is no SDLJoystick without a mapped SDL_Joystick
187 // Create a new SDLJoystick
188 auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
189 return map_it->second.emplace_back(std::move(joystick));
190 }
191 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
192 return joystick_map[guid].emplace_back(std::move(joystick));
193}
194
195void SDLState::InitJoystick(int joystick_index) {
196 SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
197 if (!sdl_joystick) {
198 LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
199 return;
200 }
201 std::string guid = GetGUID(sdl_joystick);
202 std::lock_guard<std::mutex> lock(joystick_map_mutex);
203 if (joystick_map.find(guid) == joystick_map.end()) {
204 auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
205 joystick_map[guid].emplace_back(std::move(joystick));
206 return;
207 }
208 auto& joystick_guid_list = joystick_map[guid];
209 const auto it = std::find_if(
210 joystick_guid_list.begin(), joystick_guid_list.end(),
211 [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
212 if (it != joystick_guid_list.end()) {
213 (*it)->SetSDLJoystick(sdl_joystick);
214 return;
215 }
216 auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
217 joystick_guid_list.emplace_back(std::move(joystick));
218}
219
220void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
221 std::string guid = GetGUID(sdl_joystick);
222 std::shared_ptr<SDLJoystick> joystick;
223 {
224 std::lock_guard<std::mutex> lock(joystick_map_mutex);
225 // This call to guid is safe since the joystick is guaranteed to be in the map
226 auto& joystick_guid_list = joystick_map[guid];
227 const auto joystick_it =
228 std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
229 [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
230 return joystick->GetSDLJoystick() == sdl_joystick;
231 });
232 joystick = *joystick_it;
233 }
234 // Destruct SDL_Joystick outside the lock guard because SDL can internally call event calback
235 // which locks the mutex again
236 joystick->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
237}
238
239void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
240 switch (event.type) {
241 case SDL_JOYBUTTONUP: {
242 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
243 joystick->SetButton(event.jbutton.button, false);
244 }
245 break;
246 }
247 case SDL_JOYBUTTONDOWN: {
248 if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
249 joystick->SetButton(event.jbutton.button, true);
250 }
251 break;
252 }
253 case SDL_JOYHATMOTION: {
254 if (auto joystick = GetSDLJoystickBySDLID(event.jhat.which)) {
255 joystick->SetHat(event.jhat.hat, event.jhat.value);
256 }
257 break;
258 }
259 case SDL_JOYAXISMOTION: {
260 if (auto joystick = GetSDLJoystickBySDLID(event.jaxis.which)) {
261 joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
262 }
263 break;
264 }
265 case SDL_JOYDEVICEREMOVED:
266 LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
267 CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
268 break;
269 case SDL_JOYDEVICEADDED:
270 LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
271 InitJoystick(event.jdevice.which);
272 break;
273 }
274}
275
276void SDLState::CloseJoysticks() {
277 std::lock_guard<std::mutex> lock(joystick_map_mutex);
278 joystick_map.clear();
279}
280
281class SDLButton final : public Input::ButtonDevice {
282public:
283 explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
284 : joystick(std::move(joystick_)), button(button_) {}
285
286 bool GetStatus() const override {
287 return joystick->GetButton(button);
288 }
289
290private:
291 std::shared_ptr<SDLJoystick> joystick;
292 int button;
293};
294
295class SDLDirectionButton final : public Input::ButtonDevice {
296public:
297 explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
298 : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
299
300 bool GetStatus() const override {
301 return joystick->GetHatDirection(hat, direction);
302 }
303
304private:
305 std::shared_ptr<SDLJoystick> joystick;
306 int hat;
307 Uint8 direction;
308};
309
310class SDLAxisButton final : public Input::ButtonDevice {
311public:
312 explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
313 bool trigger_if_greater_)
314 : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
315 trigger_if_greater(trigger_if_greater_) {}
316
317 bool GetStatus() const override {
318 float axis_value = joystick->GetAxis(axis);
319 if (trigger_if_greater)
320 return axis_value > threshold;
321 return axis_value < threshold;
322 }
323
324private:
325 std::shared_ptr<SDLJoystick> joystick;
326 int axis;
327 float threshold;
328 bool trigger_if_greater;
329};
330
331class SDLAnalog final : public Input::AnalogDevice {
332public:
333 SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, float deadzone_)
334 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_) {}
335
336 std::tuple<float, float> GetStatus() const override {
337 const auto [x, y] = joystick->GetAnalog(axis_x, axis_y);
338 const float r = std::sqrt((x * x) + (y * y));
339 if (r > deadzone) {
340 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
341 y / r * (r - deadzone) / (1 - deadzone));
342 }
343 return std::make_tuple<float, float>(0.0f, 0.0f);
344 }
345
346private:
347 std::shared_ptr<SDLJoystick> joystick;
348 const int axis_x;
349 const int axis_y;
350 const float deadzone;
351};
352
353/// A button device factory that creates button devices from SDL joystick
354class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
355public:
356 explicit SDLButtonFactory(SDLState& state_) : state(state_) {}
357
358 /**
359 * Creates a button device from a joystick button
360 * @param params contains parameters for creating the device:
361 * - "guid": the guid of the joystick to bind
362 * - "port": the nth joystick of the same type to bind
363 * - "button"(optional): the index of the button to bind
364 * - "hat"(optional): the index of the hat to bind as direction buttons
365 * - "axis"(optional): the index of the axis to bind
366 * - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
367 * "down", "left" or "right"
368 * - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
369 * triggered if the axis value crosses
370 * - "direction"(only used for axis): "+" means the button is triggered when the axis
371 * value is greater than the threshold; "-" means the button is triggered when the axis
372 * value is smaller than the threshold
373 */
374 std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
375 const std::string guid = params.Get("guid", "0");
376 const int port = params.Get("port", 0);
377
378 auto joystick = state.GetSDLJoystickByGUID(guid, port);
379
380 if (params.Has("hat")) {
381 const int hat = params.Get("hat", 0);
382 const std::string direction_name = params.Get("direction", "");
383 Uint8 direction;
384 if (direction_name == "up") {
385 direction = SDL_HAT_UP;
386 } else if (direction_name == "down") {
387 direction = SDL_HAT_DOWN;
388 } else if (direction_name == "left") {
389 direction = SDL_HAT_LEFT;
390 } else if (direction_name == "right") {
391 direction = SDL_HAT_RIGHT;
392 } else {
393 direction = 0;
394 }
395 // This is necessary so accessing GetHat with hat won't crash
396 joystick->SetHat(hat, SDL_HAT_CENTERED);
397 return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
398 }
399
400 if (params.Has("axis")) {
401 const int axis = params.Get("axis", 0);
402 const float threshold = params.Get("threshold", 0.5f);
403 const std::string direction_name = params.Get("direction", "");
404 bool trigger_if_greater;
405 if (direction_name == "+") {
406 trigger_if_greater = true;
407 } else if (direction_name == "-") {
408 trigger_if_greater = false;
409 } else {
410 trigger_if_greater = true;
411 LOG_ERROR(Input, "Unknown direction {}", direction_name);
412 }
413 // This is necessary so accessing GetAxis with axis won't crash
414 joystick->SetAxis(axis, 0);
415 return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
416 }
417
418 const int button = params.Get("button", 0);
419 // This is necessary so accessing GetButton with button won't crash
420 joystick->SetButton(button, false);
421 return std::make_unique<SDLButton>(joystick, button);
422 }
423
424private:
425 SDLState& state;
426};
427
428/// An analog device factory that creates analog devices from SDL joystick
429class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
430public:
431 explicit SDLAnalogFactory(SDLState& state_) : state(state_) {}
432 /**
433 * Creates analog device from joystick axes
434 * @param params contains parameters for creating the device:
435 * - "guid": the guid of the joystick to bind
436 * - "port": the nth joystick of the same type
437 * - "axis_x": the index of the axis to be bind as x-axis
438 * - "axis_y": the index of the axis to be bind as y-axis
439 */
440 std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
441 const std::string guid = params.Get("guid", "0");
442 const int port = params.Get("port", 0);
443 const int axis_x = params.Get("axis_x", 0);
444 const int axis_y = params.Get("axis_y", 1);
445 float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f);
446
447 auto joystick = state.GetSDLJoystickByGUID(guid, port);
448
449 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
450 joystick->SetAxis(axis_x, 0);
451 joystick->SetAxis(axis_y, 0);
452 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone);
453 }
454
455private:
456 SDLState& state;
457};
458
459SDLState::SDLState() {
460 using namespace Input;
461 RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>(*this));
462 RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>(*this));
463
464 // If the frontend is going to manage the event loop, then we dont start one here
465 start_thread = !SDL_WasInit(SDL_INIT_JOYSTICK);
466 if (start_thread && SDL_Init(SDL_INIT_JOYSTICK) < 0) {
467 LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
468 return;
469 }
470 if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
471 LOG_ERROR(Input, "Failed to set Hint for background events", SDL_GetError());
472 }
473
474 SDL_AddEventWatch(&SDLEventWatcher, this);
475
476 initialized = true;
477 if (start_thread) {
478 poll_thread = std::thread([&] {
479 using namespace std::chrono_literals;
480 SDL_Event event;
481 while (initialized) {
482 SDL_PumpEvents();
483 std::this_thread::sleep_for(std::chrono::duration(10ms));
484 }
485 });
486 }
487 // Because the events for joystick connection happens before we have our event watcher added, we
488 // can just open all the joysticks right here
489 for (int i = 0; i < SDL_NumJoysticks(); ++i) {
490 InitJoystick(i);
491 }
492}
493
494SDLState::~SDLState() {
495 using namespace Input;
496 UnregisterFactory<ButtonDevice>("sdl");
497 UnregisterFactory<AnalogDevice>("sdl");
498
499 CloseJoysticks();
500 SDL_DelEventWatch(&SDLEventWatcher, this);
501
502 initialized = false;
503 if (start_thread) {
504 poll_thread.join();
505 SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
506 }
507}
508
509Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) {
510 Common::ParamPackage params({{"engine", "sdl"}});
511
512 switch (event.type) {
513 case SDL_JOYAXISMOTION: {
514 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
515 params.Set("port", joystick->GetPort());
516 params.Set("guid", joystick->GetGUID());
517 params.Set("axis", event.jaxis.axis);
518 if (event.jaxis.value > 0) {
519 params.Set("direction", "+");
520 params.Set("threshold", "0.5");
521 } else {
522 params.Set("direction", "-");
523 params.Set("threshold", "-0.5");
524 }
525 break;
526 }
527 case SDL_JOYBUTTONUP: {
528 auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which);
529 params.Set("port", joystick->GetPort());
530 params.Set("guid", joystick->GetGUID());
531 params.Set("button", event.jbutton.button);
532 break;
533 }
534 case SDL_JOYHATMOTION: {
535 auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which);
536 params.Set("port", joystick->GetPort());
537 params.Set("guid", joystick->GetGUID());
538 params.Set("hat", event.jhat.hat);
539 switch (event.jhat.value) {
540 case SDL_HAT_UP:
541 params.Set("direction", "up");
542 break;
543 case SDL_HAT_DOWN:
544 params.Set("direction", "down");
545 break;
546 case SDL_HAT_LEFT:
547 params.Set("direction", "left");
548 break;
549 case SDL_HAT_RIGHT:
550 params.Set("direction", "right");
551 break;
552 default:
553 return {};
554 }
555 break;
556 }
557 }
558 return params;
559}
560
561namespace Polling {
562
563class SDLPoller : public InputCommon::Polling::DevicePoller {
564public:
565 explicit SDLPoller(SDLState& state_) : state(state_) {}
566
567 void Start() override {
568 state.event_queue.Clear();
569 state.polling = true;
570 }
571
572 void Stop() override {
573 state.polling = false;
574 }
575
576protected:
577 SDLState& state;
578};
579
580class SDLButtonPoller final : public SDLPoller {
581public:
582 explicit SDLButtonPoller(SDLState& state_) : SDLPoller(state_) {}
583
584 Common::ParamPackage GetNextInput() override {
585 SDL_Event event;
586 while (state.event_queue.Pop(event)) {
587 switch (event.type) {
588 case SDL_JOYAXISMOTION:
589 if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
590 break;
591 }
592 case SDL_JOYBUTTONUP:
593 case SDL_JOYHATMOTION:
594 return SDLEventToButtonParamPackage(state, event);
595 }
596 }
597 return {};
598 }
599};
600
601class SDLAnalogPoller final : public SDLPoller {
602public:
603 explicit SDLAnalogPoller(SDLState& state_) : SDLPoller(state_) {}
604
605 void Start() override {
606 SDLPoller::Start();
607
608 // Reset stored axes
609 analog_xaxis = -1;
610 analog_yaxis = -1;
611 analog_axes_joystick = -1;
612 }
613
614 Common::ParamPackage GetNextInput() override {
615 SDL_Event event;
616 while (state.event_queue.Pop(event)) {
617 if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
618 continue;
619 }
620 // An analog device needs two axes, so we need to store the axis for later and wait for
621 // a second SDL event. The axes also must be from the same joystick.
622 int axis = event.jaxis.axis;
623 if (analog_xaxis == -1) {
624 analog_xaxis = axis;
625 analog_axes_joystick = event.jaxis.which;
626 } else if (analog_yaxis == -1 && analog_xaxis != axis &&
627 analog_axes_joystick == event.jaxis.which) {
628 analog_yaxis = axis;
629 }
630 }
631 Common::ParamPackage params;
632 if (analog_xaxis != -1 && analog_yaxis != -1) {
633 auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
634 params.Set("engine", "sdl");
635 params.Set("port", joystick->GetPort());
636 params.Set("guid", joystick->GetGUID());
637 params.Set("axis_x", analog_xaxis);
638 params.Set("axis_y", analog_yaxis);
639 analog_xaxis = -1;
640 analog_yaxis = -1;
641 analog_axes_joystick = -1;
642 return params;
643 }
644 return params;
645 }
646
647private:
648 int analog_xaxis = -1;
649 int analog_yaxis = -1;
650 SDL_JoystickID analog_axes_joystick = -1;
651};
652} // namespace Polling
653
654std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPollers(
655 InputCommon::Polling::DeviceType type) {
656 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
657 switch (type) {
658 case InputCommon::Polling::DeviceType::Analog:
659 pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
660 break;
661 case InputCommon::Polling::DeviceType::Button:
662 pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
663 break;
664 return pollers;
665 }
666}
667
668} // namespace SDL
669} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
new file mode 100644
index 000000000..fec82fbe6
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.h
@@ -0,0 +1,64 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <thread>
10#include "common/threadsafe_queue.h"
11#include "input_common/sdl/sdl.h"
12
13union SDL_Event;
14using SDL_Joystick = struct _SDL_Joystick;
15using SDL_JoystickID = s32;
16
17namespace InputCommon::SDL {
18
19class SDLJoystick;
20class SDLButtonFactory;
21class SDLAnalogFactory;
22
23class SDLState : public State {
24public:
25 /// Initializes and registers SDL device factories
26 SDLState();
27
28 /// Unresisters SDL device factories and shut them down.
29 ~SDLState() override;
30
31 /// Handle SDL_Events for joysticks from SDL_PollEvent
32 void HandleGameControllerEvent(const SDL_Event& event);
33
34 std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id);
35 std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
36
37 /// Get all DevicePoller that use the SDL backend for a specific device type
38 std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
39 InputCommon::Polling::DeviceType type) override;
40
41 /// Used by the Pollers during config
42 std::atomic<bool> polling = false;
43 Common::SPSCQueue<SDL_Event> event_queue;
44
45private:
46 void InitJoystick(int joystick_index);
47 void CloseJoystick(SDL_Joystick* sdl_joystick);
48
49 /// Needs to be called before SDL_QuitSubSystem.
50 void CloseJoysticks();
51
52 /// Map of GUID of a list of corresponding virtual Joysticks
53 std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
54 std::mutex joystick_map_mutex;
55
56 std::shared_ptr<SDLButtonFactory> button_factory;
57 std::shared_ptr<SDLAnalogFactory> analog_factory;
58
59 bool start_thread = false;
60 std::atomic<bool> initialized = false;
61
62 std::thread poll_thread;
63};
64} // namespace InputCommon::SDL
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..6fe56833d 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -13,11 +13,11 @@
13namespace ArmTests { 13namespace ArmTests {
14 14
15TestEnvironment::TestEnvironment(bool mutable_memory_) 15TestEnvironment::TestEnvironment(bool mutable_memory_)
16 : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { 16 : mutable_memory(mutable_memory_),
17 17 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
18 auto process = Kernel::Process::Create(kernel, ""); 18 auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
19 kernel.MakeCurrentProcess(process.get()); 19 kernel.MakeCurrentProcess(process.get());
20 page_table = &Core::CurrentProcess()->VMManager().page_table; 20 page_table = &process->VMManager().page_table;
21 21
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 23 page_table->special_regions.clear();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d35a738d5..14b76680f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -74,6 +80,7 @@ add_library(video_core STATIC
74 shader/decode/hfma2.cpp 80 shader/decode/hfma2.cpp
75 shader/decode/conversion.cpp 81 shader/decode/conversion.cpp
76 shader/decode/memory.cpp 82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
77 shader/decode/float_set_predicate.cpp 84 shader/decode/float_set_predicate.cpp
78 shader/decode/integer_set_predicate.cpp 85 shader/decode/integer_set_predicate.cpp
79 shader/decode/half_set_predicate.cpp 86 shader/decode/half_set_predicate.cpp
@@ -94,6 +101,8 @@ add_library(video_core STATIC
94 surface.h 101 surface.h
95 textures/astc.cpp 102 textures/astc.cpp
96 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
97 textures/decoders.cpp 106 textures/decoders.cpp
98 textures/decoders.h 107 textures/decoders.h
99 textures/texture.h 108 textures/texture.h
@@ -104,8 +113,22 @@ add_library(video_core STATIC
104if (ENABLE_VULKAN) 113if (ENABLE_VULKAN)
105 target_sources(video_core PRIVATE 114 target_sources(video_core PRIVATE
106 renderer_vulkan/declarations.h 115 renderer_vulkan/declarations.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
118 renderer_vulkan/vk_buffer_cache.cpp
119 renderer_vulkan/vk_buffer_cache.h
107 renderer_vulkan/vk_device.cpp 120 renderer_vulkan/vk_device.cpp
108 renderer_vulkan/vk_device.h) 121 renderer_vulkan/vk_device.h
122 renderer_vulkan/vk_memory_manager.cpp
123 renderer_vulkan/vk_memory_manager.h
124 renderer_vulkan/vk_resource_manager.cpp
125 renderer_vulkan/vk_resource_manager.h
126 renderer_vulkan/vk_sampler_cache.cpp
127 renderer_vulkan/vk_sampler_cache.h
128 renderer_vulkan/vk_scheduler.cpp
129 renderer_vulkan/vk_scheduler.h
130 renderer_vulkan/vk_stream_buffer.cpp
131 renderer_vulkan/vk_stream_buffer.h)
109 132
110 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) 133 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
111 target_compile_definitions(video_core PRIVATE HAS_VULKAN) 134 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index eb9bf1878..bff1a37ff 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
33} 33}
34 34
35bool DmaPusher::Step() { 35bool DmaPusher::Step() {
36 if (dma_get != dma_put) { 36 if (!ib_enable || dma_pushbuffer.empty()) {
37 // Push buffer non-empty, read a word 37 // pushbuffer empty and IB empty or nonexistent - nothing to do
38 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); 38 return false;
39 ASSERT_MSG(address, "Invalid GPU address"); 39 }
40 40
41 const CommandHeader command_header{Memory::Read32(*address)}; 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main;
42 46
43 dma_get += sizeof(u32); 47 if (dma_pushbuffer_subindex >= command_list.size()) {
48 // We've gone through the current list, remove it from the queue
49 dma_pushbuffer.pop();
50 dma_pushbuffer_subindex = 0;
51 }
44 52
45 if (!non_main) { 53 if (command_list_header.size == 0) {
46 dma_mget = dma_get; 54 return true;
47 } 55 }
56
57 // Push buffer non-empty, read a word
58 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
59 ASSERT_MSG(address, "Invalid GPU address");
60
61 command_headers.resize(command_list_header.size);
62
63 Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
64
65 for (const CommandHeader& command_header : command_headers) {
48 66
49 // now, see if we're in the middle of a command 67 // now, see if we're in the middle of a command
50 if (dma_state.length_pending) { 68 if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
91 break; 109 break;
92 } 110 }
93 } 111 }
94 } else if (ib_enable && !dma_pushbuffer.empty()) { 112 }
95 // Current pushbuffer empty, but we have more IB entries to read 113
96 const CommandList& command_list{dma_pushbuffer.front()}; 114 if (!non_main) {
97 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 115 // TODO (degasus): This is dead code, as dma_mget is never read.
98 dma_get = command_list_header.addr; 116 dma_mget = dma_put;
99 dma_put = dma_get + command_list_header.size * sizeof(u32);
100 non_main = command_list_header.is_non_main;
101
102 if (dma_pushbuffer_subindex >= command_list.size()) {
103 // We've gone through the current list, remove it from the queue
104 dma_pushbuffer.pop();
105 dma_pushbuffer_subindex = 0;
106 }
107 } else {
108 // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
109 return {};
110 } 117 }
111 118
112 return true; 119 return true;
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1097e5c49..27a36348c 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,8 @@ private:
75 75
76 GPU& gpu; 76 GPU& gpu;
77 77
78 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
79
78 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 80 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
79 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer 81 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
80 82
@@ -89,11 +91,8 @@ private:
89 DmaState dma_state{}; 91 DmaState dma_state{};
90 bool dma_increment_once{}; 92 bool dma_increment_once{};
91 93
92 GPUVAddr dma_put{}; ///< pushbuffer current end address
93 GPUVAddr dma_get{}; ///< pushbuffer current read address
94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
95 bool ib_enable{true}; ///< IB mode enabled 95 bool ib_enable{true}; ///< IB mode enabled
96 bool non_main{}; ///< non-main pushbuffer active
97}; 96};
98 97
99} // namespace Tegra 98} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ec1a57226..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "common/assert.h"
6#include "core/memory.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
7#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10#include "video_core/textures/decoders.h"
11 10
12namespace Tegra::Engines { 11namespace Tegra::Engines {
13 12
@@ -44,10 +43,10 @@ void Fermi2D::HandleSurfaceCopy() {
44 const u32 src_blit_y2{ 43 const u32 src_blit_y2{
45 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; 44 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
46 45
47 const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; 46 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
48 const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, 47 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
49 regs.blit_dst_x + regs.blit_dst_width, 48 regs.blit_dst_x + regs.blit_dst_width,
50 regs.blit_dst_y + regs.blit_dst_height}; 49 regs.blit_dst_y + regs.blit_dst_height};
51 50
52 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
53 UNIMPLEMENTED(); 52 UNIMPLEMENTED();
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index c69f74cc5..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4ca856b6b..b1d950460 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h"
7#include "core/memory.h"
8#include "video_core/engines/kepler_compute.h" 7#include "video_core/engines/kepler_compute.h"
9#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
10 9
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index df0a32e0f..6575afd0f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h"
10#include "common/common_funcs.h" 9#include "common/common_funcs.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 5c1029ddf..daefa43a6 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,18 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/kepler_memory.h" 9#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
11 13
12namespace Tegra::Engines { 14namespace Tegra::Engines {
13 15
14KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer, 16KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 MemoryManager& memory_manager) 17 MemoryManager& memory_manager)
16 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
17 19
18KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
19 21
@@ -47,10 +49,11 @@ void KeplerMemory::ProcessData(u32 data) {
47 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 49 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
48 // We do this before actually writing the new data because the destination address might contain 50 // We do this before actually writing the new data because the destination address might contain
49 // a dirty surface that will have to be written back to memory. 51 // a dirty surface that will have to be written back to memory.
50 rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); 52 system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)),
53 sizeof(u32));
51 54
52 Memory::Write32(*dest_address, data); 55 Memory::Write32(*dest_address, data);
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 56 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54 57
55 state.write_offset++; 58 state.write_offset++;
56} 59}
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index fe9ebc5b9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,13 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace VideoCore { 19namespace VideoCore {
16class RasterizerInterface; 20class RasterizerInterface;
17} 21}
@@ -23,7 +27,8 @@ namespace Tegra::Engines {
23 27
24class KeplerMemory final { 28class KeplerMemory final {
25public: 29public:
26 KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 30 KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
31 MemoryManager& memory_manager);
27 ~KeplerMemory(); 32 ~KeplerMemory();
28 33
29 /// Write the value to the register identified by method. 34 /// Write the value to the register identified by method.
@@ -76,6 +81,7 @@ public:
76 } state{}; 81 } state{};
77 82
78private: 83private:
84 Core::System& system;
79 MemoryManager& memory_manager; 85 MemoryManager& memory_manager;
80 VideoCore::RasterizerInterface& rasterizer; 86 VideoCore::RasterizerInterface& rasterizer;
81 87
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 86ede5faa..49979694e 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -19,8 +19,10 @@ namespace Tegra::Engines {
19/// First register id that is actually a Macro call. 19/// First register id that is actually a Macro call.
20constexpr u32 MacroRegistersStart = 0xE00; 20constexpr u32 MacroRegistersStart = 0xE00;
21 21
22Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 22Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
23 : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) { 23 MemoryManager& memory_manager)
24 : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
25 macro_interpreter(*this) {
24 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
25} 27}
26 28
@@ -103,23 +105,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
103} 105}
104 106
105void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
106 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 108 auto debug_context = system.GetGPUDebugContext();
109
110 const u32 method = method_call.method;
107 111
108 // It is an error to write to a register other than the current macro's ARG register before it 112 // It is an error to write to a register other than the current macro's ARG register before it
109 // has finished execution. 113 // has finished execution.
110 if (executing_macro != 0) { 114 if (executing_macro != 0) {
111 ASSERT(method_call.method == executing_macro + 1); 115 ASSERT(method == executing_macro + 1);
112 } 116 }
113 117
114 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 118 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
115 // uploaded to the GPU during initialization. 119 // uploaded to the GPU during initialization.
116 if (method_call.method >= MacroRegistersStart) { 120 if (method >= MacroRegistersStart) {
117 // We're trying to execute a macro 121 // We're trying to execute a macro
118 if (executing_macro == 0) { 122 if (executing_macro == 0) {
119 // A macro call must begin by writing the macro method's register, not its argument. 123 // A macro call must begin by writing the macro method's register, not its argument.
120 ASSERT_MSG((method_call.method % 2) == 0, 124 ASSERT_MSG((method % 2) == 0,
121 "Can't start macro execution by writing to the ARGS register"); 125 "Can't start macro execution by writing to the ARGS register");
122 executing_macro = method_call.method; 126 executing_macro = method;
123 } 127 }
124 128
125 macro_params.push_back(method_call.argument); 129 macro_params.push_back(method_call.argument);
@@ -131,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
131 return; 135 return;
132 } 136 }
133 137
134 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 138 ASSERT_MSG(method < Regs::NUM_REGS,
135 "Invalid Maxwell3D register, increase the size of the Regs structure"); 139 "Invalid Maxwell3D register, increase the size of the Regs structure");
136 140
137 if (debug_context) { 141 if (debug_context) {
138 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 142 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
139 } 143 }
140 144
141 if (regs.reg_array[method_call.method] != method_call.argument) { 145 if (regs.reg_array[method] != method_call.argument) {
142 regs.reg_array[method_call.method] = method_call.argument; 146 regs.reg_array[method] = method_call.argument;
143 // Color buffers 147 // Color buffers
144 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 148 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
145 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 149 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
146 if (method_call.method >= first_rt_reg && 150 if (method >= first_rt_reg &&
147 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 151 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
148 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 152 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
149 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 153 dirty_flags.color_buffer.set(rt_index);
150 } 154 }
151 155
152 // Zeta buffer 156 // Zeta buffer
153 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 157 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
154 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 158 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
155 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 159 method == MAXWELL3D_REG_INDEX(zeta_width) ||
156 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 160 method == MAXWELL3D_REG_INDEX(zeta_height) ||
157 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 161 (method >= MAXWELL3D_REG_INDEX(zeta) &&
158 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 162 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
159 dirty_flags.zeta_buffer = true; 163 dirty_flags.zeta_buffer = true;
160 } 164 }
161 165
162 // Shader 166 // Shader
163 constexpr u32 shader_registers_count = 167 constexpr u32 shader_registers_count =
164 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 168 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
165 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 169 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
166 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 170 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
167 dirty_flags.shaders = true; 171 dirty_flags.shaders = true;
168 } 172 }
169 173
170 // Vertex format 174 // Vertex format
171 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 175 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
172 method_call.method < 176 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
173 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
174 dirty_flags.vertex_attrib_format = true; 177 dirty_flags.vertex_attrib_format = true;
175 } 178 }
176 179
177 // Vertex buffer 180 // Vertex buffer
178 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 181 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
179 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 182 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
180 dirty_flags.vertex_array |= 183 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
181 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 184 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
182 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 185 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
183 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 186 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
184 dirty_flags.vertex_array |= 187 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
185 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 188 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
186 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 189 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
187 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
188 dirty_flags.vertex_array |=
189 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
190 } 190 }
191 } 191 }
192 192
193 switch (method_call.method) { 193 switch (method) {
194 case MAXWELL3D_REG_INDEX(macros.data): { 194 case MAXWELL3D_REG_INDEX(macros.data): {
195 ProcessMacroUpload(method_call.argument); 195 ProcessMacroUpload(method_call.argument);
196 break; 196 break;
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
317 LongQueryResult query_result{}; 317 LongQueryResult query_result{};
318 query_result.value = result; 318 query_result.value = result;
319 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming 319 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
320 query_result.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); 320 query_result.timestamp = system.CoreTiming().GetTicks();
321 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 321 Memory::WriteBlock(*address, &query_result, sizeof(query_result));
322 } 322 }
323 dirty_flags.OnMemoryWrite(); 323 dirty_flags.OnMemoryWrite();
@@ -334,7 +334,7 @@ void Maxwell3D::DrawArrays() {
334 regs.vertex_buffer.count); 334 regs.vertex_buffer.count);
335 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 335 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
336 336
337 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 337 auto debug_context = system.GetGPUDebugContext();
338 338
339 if (debug_context) { 339 if (debug_context) {
340 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); 340 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
@@ -396,7 +396,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
397 ASSERT_MSG(address, "Invalid GPU address"); 397 ASSERT_MSG(address, "Invalid GPU address");
398 398
399 Memory::Write32(*address, value); 399 u8* ptr{Memory::GetPointer(*address)};
400 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
401 std::memcpy(ptr, &value, sizeof(u32));
402
400 dirty_flags.OnMemoryWrite(); 403 dirty_flags.OnMemoryWrite();
401 404
402 // Increment the current buffer position. 405 // Increment the current buffer position.
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1f76aa670..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -17,6 +19,10 @@
17#include "video_core/memory_manager.h" 19#include "video_core/memory_manager.h"
18#include "video_core/textures/texture.h" 20#include "video_core/textures/texture.h"
19 21
22namespace Core {
23class System;
24}
25
20namespace VideoCore { 26namespace VideoCore {
21class RasterizerInterface; 27class RasterizerInterface;
22} 28}
@@ -28,7 +34,8 @@ namespace Tegra::Engines {
28 34
29class Maxwell3D final { 35class Maxwell3D final {
30public: 36public:
31 explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 37 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
38 MemoryManager& memory_manager);
32 ~Maxwell3D() = default; 39 ~Maxwell3D() = default;
33 40
34 /// Register structure of the Maxwell3D engine. 41 /// Register structure of the Maxwell3D engine.
@@ -498,7 +505,7 @@ public:
498 f32 translate_z; 505 f32 translate_z;
499 INSERT_PADDING_WORDS(2); 506 INSERT_PADDING_WORDS(2);
500 507
501 MathUtil::Rectangle<s32> GetRect() const { 508 Common::Rectangle<s32> GetRect() const {
502 return { 509 return {
503 GetX(), // left 510 GetX(), // left
504 GetY() + GetHeight(), // top 511 GetY() + GetHeight(), // top
@@ -1089,19 +1096,18 @@ public:
1089 MemoryManager& memory_manager; 1096 MemoryManager& memory_manager;
1090 1097
1091 struct DirtyFlags { 1098 struct DirtyFlags {
1092 u8 color_buffer = 0xFF; 1099 std::bitset<8> color_buffer{0xFF};
1093 bool zeta_buffer = true; 1100 std::bitset<32> vertex_array{0xFFFFFFFF};
1094
1095 bool shaders = true;
1096 1101
1097 bool vertex_attrib_format = true; 1102 bool vertex_attrib_format = true;
1098 u32 vertex_array = 0xFFFFFFFF; 1103 bool zeta_buffer = true;
1104 bool shaders = true;
1099 1105
1100 void OnMemoryWrite() { 1106 void OnMemoryWrite() {
1101 color_buffer = 0xFF;
1102 zeta_buffer = true; 1107 zeta_buffer = true;
1103 shaders = true; 1108 shaders = true;
1104 vertex_array = 0xFFFFFFFF; 1109 color_buffer.set();
1110 vertex_array.set();
1105 } 1111 }
1106 }; 1112 };
1107 1113
@@ -1131,6 +1137,8 @@ public:
1131private: 1137private:
1132 void InitializeRegisterDefaults(); 1138 void InitializeRegisterDefaults();
1133 1139
1140 Core::System& system;
1141
1134 VideoCore::RasterizerInterface& rasterizer; 1142 VideoCore::RasterizerInterface& rasterizer;
1135 1143
1136 /// Start offsets of each macro in macro_memory 1144 /// Start offsets of each macro in macro_memory
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index d6c41a5ae..415a6319a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,17 +2,21 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
6#include "common/logging/log.h"
5#include "core/core.h" 7#include "core/core.h"
6#include "core/memory.h" 8#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
9#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
10#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
11 14
12namespace Tegra::Engines { 15namespace Tegra::Engines {
13 16
14MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 17MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 18 MemoryManager& memory_manager)
19 : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
16 20
17void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 21void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 22 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -59,7 +63,7 @@ void MaxwellDMA::HandleCopy() {
59 } 63 }
60 64
61 // All copies here update the main memory, so mark all rasterizer states as invalid. 65 // All copies here update the main memory, so mark all rasterizer states as invalid.
62 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 66 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
63 67
64 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 68 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
65 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 69 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
@@ -89,12 +93,14 @@ void MaxwellDMA::HandleCopy() {
89 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 93 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
90 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 94 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
91 // copying. 95 // copying.
92 rasterizer.FlushRegion(*source_cpu, src_size); 96 Core::System::GetInstance().Renderer().Rasterizer().FlushRegion(
97 ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
93 98
94 // We have to invalidate the destination region to evict any outdated surfaces from the 99 // We have to invalidate the destination region to evict any outdated surfaces from the
95 // cache. We do this before actually writing the new data because the destination address 100 // cache. We do this before actually writing the new data because the destination address
96 // might contain a dirty surface that will have to be written back to memory. 101 // might contain a dirty surface that will have to be written back to memory.
97 rasterizer.InvalidateRegion(*dest_cpu, dst_size); 102 Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion(
103 ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
98 }; 104 };
99 105
100 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 106 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 1f8cd65d2..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,13 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace VideoCore { 19namespace VideoCore {
16class RasterizerInterface; 20class RasterizerInterface;
17} 21}
@@ -20,7 +24,8 @@ namespace Tegra::Engines {
20 24
21class MaxwellDMA final { 25class MaxwellDMA final {
22public: 26public:
23 explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 27 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
28 MemoryManager& memory_manager);
24 ~MaxwellDMA() = default; 29 ~MaxwellDMA() = default;
25 30
26 /// Write the value to the register identified by method. 31 /// Write the value to the register identified by method.
@@ -137,6 +142,8 @@ public:
137 MemoryManager& memory_manager; 142 MemoryManager& memory_manager;
138 143
139private: 144private:
145 Core::System& system;
146
140 VideoCore::RasterizerInterface& rasterizer; 147 VideoCore::RasterizerInterface& rasterizer;
141 148
142 /// Performs the copy from the source buffer to the destination buffer as configured in the 149 /// Performs the copy from the source buffer to the destination buffer as configured in the
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 1f425f90b..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
6 6
7#include <bitset> 7#include <bitset>
8#include <optional> 8#include <optional>
9#include <string>
10#include <tuple> 9#include <tuple>
11#include <vector> 10#include <vector>
12 11
@@ -325,11 +324,11 @@ enum class TextureQueryType : u64 {
325 324
326enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
327 None = 0, 326 None = 0,
328 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
329 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
330 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
331 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
332 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
333}; 332};
334 333
335enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -376,9 +375,9 @@ enum class R2pMode : u64 {
376}; 375};
377 376
378enum class IpaInterpMode : u64 { 377enum class IpaInterpMode : u64 {
379 Linear = 0, 378 Pass = 0,
380 Perspective = 1, 379 Multiply = 1,
381 Flat = 2, 380 Constant = 2,
382 Sc = 3, 381 Sc = 3,
383}; 382};
384 383
@@ -1446,6 +1445,7 @@ public:
1446 Flow, 1445 Flow,
1447 Synch, 1446 Synch,
1448 Memory, 1447 Memory,
1448 Texture,
1449 FloatSet, 1449 FloatSet,
1450 FloatSetPredicate, 1450 FloatSetPredicate,
1451 IntegerSet, 1451 IntegerSet,
@@ -1576,14 +1576,14 @@ private:
1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1579 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1579 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1580 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1580 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1581 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1581 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1582 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1582 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1583 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1583 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1584 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1584 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1585 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1585 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1586 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1586 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index cf2b76ff6..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 16 TriangleStrip = 7,
17}; 17};
18 18
19enum class AttributeUse : u8 {
20 Unused = 0,
21 Constant = 1,
22 Perspective = 2,
23 ScreenLinear = 3,
24};
25
19// Documentation in: 26// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header { 28struct Header {
@@ -84,9 +91,15 @@ struct Header {
84 } vtg; 91 } vtg;
85 92
86 struct { 93 struct {
87 INSERT_PADDING_BYTES(3); // ImapSystemValuesA 94 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
88 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 95 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
89 INSERT_PADDING_BYTES(32); // ImapGenericVector[32] 96 union {
97 BitField<0, 2, AttributeUse> x;
98 BitField<2, 2, AttributeUse> y;
99 BitField<4, 2, AttributeUse> w;
100 BitField<6, 2, AttributeUse> z;
101 u8 raw;
102 } imap_generic_vector[32];
90 INSERT_PADDING_BYTES(2); // ImapColor 103 INSERT_PADDING_BYTES(2); // ImapColor
91 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 104 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
92 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] 105 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
103 const u32 bit = render_target * 4 + component; 116 const u32 bit = render_target * 4 + component;
104 return omap.target & (1 << bit); 117 return omap.target & (1 << bit);
105 } 118 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
120 return static_cast<AttributeUse>(
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
122 }
123 AttributeUse GetAttributeUse(u32 attribute) const {
124 AttributeUse result = AttributeUse::Unused;
125 for (u32 i = 0; i < 4; i++) {
126 const auto index = GetAttributeIndexUse(attribute, i);
127 if (index == AttributeUse::Unused) {
128 continue;
129 }
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue;
133 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
135 if (index == AttributeUse::Perspective) {
136 result = index;
137 }
138 }
139 return result;
140 }
106 } ps; 141 } ps;
107 }; 142 };
108 143
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index b86265dfe..08abf8ac9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
14#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 15#include "video_core/renderer_base.h"
16 16
17namespace Tegra { 17namespace Tegra {
18 18
@@ -28,14 +28,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
28 UNREACHABLE(); 28 UNREACHABLE();
29} 29}
30 30
31GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { 31GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
32 auto& rasterizer{renderer.Rasterizer()};
32 memory_manager = std::make_unique<Tegra::MemoryManager>(); 33 memory_manager = std::make_unique<Tegra::MemoryManager>();
33 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 34 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
34 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); 35 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
35 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 36 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
36 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); 37 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
37 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); 38 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
38 kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); 39 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
39} 40}
40 41
41GPU::~GPU() = default; 42GPU::~GPU() = default;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a482196ea..a14b95c30 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,16 +6,24 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <vector>
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "core/hle/service/nvflinger/buffer_queue.h" 10#include "core/hle/service/nvflinger/buffer_queue.h"
12#include "video_core/dma_pusher.h" 11#include "video_core/dma_pusher.h"
13#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
14 13
15namespace VideoCore { 14using CacheAddr = std::uintptr_t;
16class RasterizerInterface; 15inline CacheAddr ToCacheAddr(const void* host_ptr) {
16 return reinterpret_cast<CacheAddr>(host_ptr);
17}
18
19namespace Core {
20class System;
17} 21}
18 22
23namespace VideoCore {
24class RendererBase;
25} // namespace VideoCore
26
19namespace Tegra { 27namespace Tegra {
20 28
21enum class RenderTargetFormat : u32 { 29enum class RenderTargetFormat : u32 {
@@ -97,7 +105,7 @@ struct FramebufferConfig {
97 105
98 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; 106 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
99 TransformFlags transform_flags; 107 TransformFlags transform_flags;
100 MathUtil::Rectangle<int> crop_rect; 108 Common::Rectangle<int> crop_rect;
101}; 109};
102 110
103namespace Engines { 111namespace Engines {
@@ -116,10 +124,11 @@ enum class EngineID {
116 MAXWELL_DMA_COPY_A = 0xB0B5, 124 MAXWELL_DMA_COPY_A = 0xB0B5,
117}; 125};
118 126
119class GPU final { 127class GPU {
120public: 128public:
121 explicit GPU(VideoCore::RasterizerInterface& rasterizer); 129 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
122 ~GPU(); 130
131 virtual ~GPU();
123 132
124 struct MethodCall { 133 struct MethodCall {
125 u32 method{}; 134 u32 method{};
@@ -197,8 +206,42 @@ public:
197 }; 206 };
198 } regs{}; 207 } regs{};
199 208
209 /// Push GPU command entries to be processed
210 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
211
212 /// Swap buffers (render frame)
213 virtual void SwapBuffers(
214 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
215
216 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
217 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
218
219 /// Notify rasterizer that any caches of the specified region should be invalidated
220 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
221
222 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
223 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
224
200private: 225private:
226 void ProcessBindMethod(const MethodCall& method_call);
227 void ProcessSemaphoreTriggerMethod();
228 void ProcessSemaphoreRelease();
229 void ProcessSemaphoreAcquire();
230
231 /// Calls a GPU puller method.
232 void CallPullerMethod(const MethodCall& method_call);
233
234 /// Calls a GPU engine method.
235 void CallEngineMethod(const MethodCall& method_call);
236
237 /// Determines where the method should be executed.
238 bool ExecuteMethodOnEngine(const MethodCall& method_call);
239
240protected:
201 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 241 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
242 VideoCore::RendererBase& renderer;
243
244private:
202 std::unique_ptr<Tegra::MemoryManager> memory_manager; 245 std::unique_ptr<Tegra::MemoryManager> memory_manager;
203 246
204 /// Mapping of command subchannels to their bound engine ids. 247 /// Mapping of command subchannels to their bound engine ids.
@@ -214,18 +257,6 @@ private:
214 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 257 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
215 /// Inline memory engine 258 /// Inline memory engine
216 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 259 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
217
218 void ProcessBindMethod(const MethodCall& method_call);
219 void ProcessSemaphoreTriggerMethod();
220 void ProcessSemaphoreRelease();
221 void ProcessSemaphoreAcquire();
222
223 // Calls a GPU puller method.
224 void CallPullerMethod(const MethodCall& method_call);
225 // Calls a GPU engine method.
226 void CallEngineMethod(const MethodCall& method_call);
227 // Determines where the method should be executed.
228 bool ExecuteMethodOnEngine(const MethodCall& method_call);
229}; 260};
230 261
231#define ASSERT_REG_POSITION(field_name, position) \ 262#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..8b355cf7b
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..1dcc61a6c
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..2cfc900ed
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..766b5631c
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..086b2f625
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,98 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h"
8#include "video_core/dma_pusher.h"
9#include "video_core/gpu.h"
10#include "video_core/gpu_thread.h"
11#include "video_core/renderer_base.h"
12
13namespace VideoCommon::GPUThread {
14
15/// Runs the GPU thread
16static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
17 SynchState& state) {
18 MicroProfileOnThreadCreate("GpuThread");
19
20 // Wait for first GPU command before acquiring the window context
21 state.WaitForCommands();
22
23 // If emulation was stopped during disk shader loading, abort before trying to acquire context
24 if (!state.is_running) {
25 return;
26 }
27
28 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
29
30 CommandDataContainer next;
31 while (state.is_running) {
32 state.WaitForCommands();
33 while (!state.queue.Empty()) {
34 state.queue.Pop(next);
35 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
36 dma_pusher.Push(std::move(submit_list->entries));
37 dma_pusher.DispatchCalls();
38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
39 state.DecrementFramesCounter();
40 renderer.SwapBuffers(std::move(data->framebuffer));
41 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
42 renderer.Rasterizer().FlushRegion(data->addr, data->size);
43 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
44 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
46 return;
47 } else {
48 UNREACHABLE();
49 }
50 }
51 }
52}
53
54ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
55 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
56 std::ref(dma_pusher), std::ref(state)} {}
57
58ThreadManager::~ThreadManager() {
59 // Notify GPU thread that a shutdown is pending
60 PushCommand(EndProcessingCommand());
61 thread.join();
62}
63
64void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
65 PushCommand(SubmitListCommand(std::move(entries)));
66}
67
68void ThreadManager::SwapBuffers(
69 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
70 state.IncrementFramesCounter();
71 PushCommand(SwapBuffersCommand(std::move(framebuffer)));
72 state.WaitForFrames();
73}
74
75void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
76 PushCommand(FlushRegionCommand(addr, size));
77}
78
79void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
80 if (state.queue.Empty()) {
81 // It's quicker to invalidate a single region on the CPU if the queue is already empty
82 renderer.Rasterizer().InvalidateRegion(addr, size);
83 } else {
84 PushCommand(InvalidateRegionCommand(addr, size));
85 }
86}
87
88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size);
91}
92
93void ThreadManager::PushCommand(CommandData&& command_data) {
94 state.queue.Push(CommandDataContainer(std::move(command_data)));
95 state.SignalCommands();
96}
97
98} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..8cd7db1c6
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,185 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <thread>
14#include <variant>
15
16#include "common/threadsafe_queue.h"
17#include "video_core/gpu.h"
18
19namespace Tegra {
20struct FramebufferConfig;
21class DmaPusher;
22} // namespace Tegra
23
24namespace VideoCore {
25class RendererBase;
26} // namespace VideoCore
27
28namespace VideoCommon::GPUThread {
29
30/// Command to signal to the GPU thread that processing has ended
31struct EndProcessingCommand final {};
32
33/// Command to signal to the GPU thread that a command list is ready for processing
34struct SubmitListCommand final {
35 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
36
37 Tegra::CommandList entries;
38};
39
40/// Command to signal to the GPU thread that a swap buffers is pending
41struct SwapBuffersCommand final {
42 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
43 : framebuffer{std::move(framebuffer)} {}
44
45 std::optional<Tegra::FramebufferConfig> framebuffer;
46};
47
48/// Command to signal to the GPU thread to flush a region
49struct FlushRegionCommand final {
50 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
51
52 CacheAddr addr;
53 u64 size;
54};
55
56/// Command to signal to the GPU thread to invalidate a region
57struct InvalidateRegionCommand final {
58 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
59
60 CacheAddr addr;
61 u64 size;
62};
63
64/// Command to signal to the GPU thread to flush and invalidate a region
65struct FlushAndInvalidateRegionCommand final {
66 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
67 : addr{addr}, size{size} {}
68
69 CacheAddr addr;
70 u64 size;
71};
72
73using CommandData =
74 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
75 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
76
77struct CommandDataContainer {
78 CommandDataContainer() = default;
79
80 CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
81
82 CommandDataContainer& operator=(const CommandDataContainer& t) {
83 data = std::move(t.data);
84 return *this;
85 }
86
87 CommandData data;
88};
89
90/// Struct used to synchronize the GPU thread
91struct SynchState final {
92 std::atomic_bool is_running{true};
93 std::atomic_int queued_frame_count{};
94 std::mutex frames_mutex;
95 std::mutex commands_mutex;
96 std::condition_variable commands_condition;
97 std::condition_variable frames_condition;
98
99 void IncrementFramesCounter() {
100 std::lock_guard<std::mutex> lock{frames_mutex};
101 ++queued_frame_count;
102 }
103
104 void DecrementFramesCounter() {
105 {
106 std::lock_guard<std::mutex> lock{frames_mutex};
107 --queued_frame_count;
108
109 if (queued_frame_count) {
110 return;
111 }
112 }
113 frames_condition.notify_one();
114 }
115
116 void WaitForFrames() {
117 {
118 std::lock_guard<std::mutex> lock{frames_mutex};
119 if (!queued_frame_count) {
120 return;
121 }
122 }
123
124 // Wait for the GPU to be idle (all commands to be executed)
125 {
126 std::unique_lock<std::mutex> lock{frames_mutex};
127 frames_condition.wait(lock, [this] { return !queued_frame_count; });
128 }
129 }
130
131 void SignalCommands() {
132 {
133 std::unique_lock<std::mutex> lock{commands_mutex};
134 if (queue.Empty()) {
135 return;
136 }
137 }
138
139 commands_condition.notify_one();
140 }
141
142 void WaitForCommands() {
143 std::unique_lock<std::mutex> lock{commands_mutex};
144 commands_condition.wait(lock, [this] { return !queue.Empty(); });
145 }
146
147 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
148 CommandQueue queue;
149};
150
151/// Class used to manage the GPU thread
152class ThreadManager final {
153public:
154 explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
155 ~ThreadManager();
156
157 /// Push GPU command entries to be processed
158 void SubmitList(Tegra::CommandList&& entries);
159
160 /// Swap buffers (render frame)
161 void SwapBuffers(
162 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
163
164 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
165 void FlushRegion(CacheAddr addr, u64 size);
166
167 /// Notify rasterizer that any caches of the specified region should be invalidated
168 void InvalidateRegion(CacheAddr addr, u64 size);
169
170 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
171 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
172
173private:
174 /// Pushes a command to be executed by the GPU thread
175 void PushCommand(CommandData&& command_data);
176
177private:
178 SynchState state;
179 VideoCore::RendererBase& renderer;
180 Tegra::DmaPusher& dma_pusher;
181 std::thread thread;
182 std::thread::id thread_id;
183};
184
185} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index b68f4fb13..9692ce143 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -16,12 +16,12 @@ namespace VideoCore {
16using Surface::GetBytesPerPixel; 16using Surface::GetBytesPerPixel;
17using Surface::PixelFormat; 17using Surface::PixelFormat;
18 18
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); 19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; 20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21 21
22template <bool morton_to_linear, PixelFormat format> 22template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, 23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) { 24 u32 tile_width_spacing, u8* buffer, VAddr addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); 25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26 26
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -42,142 +42,138 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
42} 42}
43 43
44static constexpr ConversionArray morton_to_linear_fns = { 44static constexpr ConversionArray morton_to_linear_fns = {
45 // clang-format off 45 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8U>, 46 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8S>, 47 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::ABGR8UI>, 48 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::B5G6R5U>, 49 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A2B10G10R10U>, 50 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::A1B5G5R5U>, 51 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8U>, 52 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::R8UI>, 53 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16F>, 54 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16U>, 55 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::RGBA16UI>, 56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::R11FG11FB10F>, 57 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::RGBA32UI>, 58 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT1>, 59 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT23>, 60 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXT45>, 61 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN1>, 62 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2UNORM>, 63 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::DXN2SNORM>, 64 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC7U>, 65 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_UF16>, 66 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::BC6H_SF16>, 67 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::ASTC_2D_4X4>, 68 MortonCopy<true, PixelFormat::BGRA8>,
69 MortonCopy<true, PixelFormat::BGRA8>, 69 MortonCopy<true, PixelFormat::RGBA32F>,
70 MortonCopy<true, PixelFormat::RGBA32F>, 70 MortonCopy<true, PixelFormat::RG32F>,
71 MortonCopy<true, PixelFormat::RG32F>, 71 MortonCopy<true, PixelFormat::R32F>,
72 MortonCopy<true, PixelFormat::R32F>, 72 MortonCopy<true, PixelFormat::R16F>,
73 MortonCopy<true, PixelFormat::R16F>, 73 MortonCopy<true, PixelFormat::R16U>,
74 MortonCopy<true, PixelFormat::R16U>, 74 MortonCopy<true, PixelFormat::R16S>,
75 MortonCopy<true, PixelFormat::R16S>, 75 MortonCopy<true, PixelFormat::R16UI>,
76 MortonCopy<true, PixelFormat::R16UI>, 76 MortonCopy<true, PixelFormat::R16I>,
77 MortonCopy<true, PixelFormat::R16I>, 77 MortonCopy<true, PixelFormat::RG16>,
78 MortonCopy<true, PixelFormat::RG16>, 78 MortonCopy<true, PixelFormat::RG16F>,
79 MortonCopy<true, PixelFormat::RG16F>, 79 MortonCopy<true, PixelFormat::RG16UI>,
80 MortonCopy<true, PixelFormat::RG16UI>, 80 MortonCopy<true, PixelFormat::RG16I>,
81 MortonCopy<true, PixelFormat::RG16I>, 81 MortonCopy<true, PixelFormat::RG16S>,
82 MortonCopy<true, PixelFormat::RG16S>, 82 MortonCopy<true, PixelFormat::RGB32F>,
83 MortonCopy<true, PixelFormat::RGB32F>, 83 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
84 MortonCopy<true, PixelFormat::RGBA8_SRGB>, 84 MortonCopy<true, PixelFormat::RG8U>,
85 MortonCopy<true, PixelFormat::RG8U>, 85 MortonCopy<true, PixelFormat::RG8S>,
86 MortonCopy<true, PixelFormat::RG8S>, 86 MortonCopy<true, PixelFormat::RG32UI>,
87 MortonCopy<true, PixelFormat::RG32UI>, 87 MortonCopy<true, PixelFormat::R32UI>,
88 MortonCopy<true, PixelFormat::R32UI>, 88 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
89 MortonCopy<true, PixelFormat::ASTC_2D_8X8>, 89 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X5>, 90 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
91 MortonCopy<true, PixelFormat::ASTC_2D_5X4>, 91 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
92 MortonCopy<true, PixelFormat::BGRA8_SRGB>, 92 MortonCopy<true, PixelFormat::DXT1_SRGB>,
93 MortonCopy<true, PixelFormat::DXT1_SRGB>, 93 MortonCopy<true, PixelFormat::DXT23_SRGB>,
94 MortonCopy<true, PixelFormat::DXT23_SRGB>, 94 MortonCopy<true, PixelFormat::DXT45_SRGB>,
95 MortonCopy<true, PixelFormat::DXT45_SRGB>, 95 MortonCopy<true, PixelFormat::BC7U_SRGB>,
96 MortonCopy<true, PixelFormat::BC7U_SRGB>, 96 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
97 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, 97 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, 98 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, 99 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, 100 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X5>, 101 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, 102 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
103 MortonCopy<true, PixelFormat::ASTC_2D_10X8>, 103 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, 104 MortonCopy<true, PixelFormat::Z32F>,
105 MortonCopy<true, PixelFormat::Z32F>, 105 MortonCopy<true, PixelFormat::Z16>,
106 MortonCopy<true, PixelFormat::Z16>, 106 MortonCopy<true, PixelFormat::Z24S8>,
107 MortonCopy<true, PixelFormat::Z24S8>, 107 MortonCopy<true, PixelFormat::S8Z24>,
108 MortonCopy<true, PixelFormat::S8Z24>, 108 MortonCopy<true, PixelFormat::Z32FS8>,
109 MortonCopy<true, PixelFormat::Z32FS8>,
110 // clang-format on
111}; 109};
112 110
113static constexpr ConversionArray linear_to_morton_fns = { 111static constexpr ConversionArray linear_to_morton_fns = {
114 // clang-format off 112 MortonCopy<false, PixelFormat::ABGR8U>,
115 MortonCopy<false, PixelFormat::ABGR8U>, 113 MortonCopy<false, PixelFormat::ABGR8S>,
116 MortonCopy<false, PixelFormat::ABGR8S>, 114 MortonCopy<false, PixelFormat::ABGR8UI>,
117 MortonCopy<false, PixelFormat::ABGR8UI>, 115 MortonCopy<false, PixelFormat::B5G6R5U>,
118 MortonCopy<false, PixelFormat::B5G6R5U>, 116 MortonCopy<false, PixelFormat::A2B10G10R10U>,
119 MortonCopy<false, PixelFormat::A2B10G10R10U>, 117 MortonCopy<false, PixelFormat::A1B5G5R5U>,
120 MortonCopy<false, PixelFormat::A1B5G5R5U>, 118 MortonCopy<false, PixelFormat::R8U>,
121 MortonCopy<false, PixelFormat::R8U>, 119 MortonCopy<false, PixelFormat::R8UI>,
122 MortonCopy<false, PixelFormat::R8UI>, 120 MortonCopy<false, PixelFormat::RGBA16F>,
123 MortonCopy<false, PixelFormat::RGBA16F>, 121 MortonCopy<false, PixelFormat::RGBA16U>,
124 MortonCopy<false, PixelFormat::RGBA16U>, 122 MortonCopy<false, PixelFormat::RGBA16UI>,
125 MortonCopy<false, PixelFormat::RGBA16UI>, 123 MortonCopy<false, PixelFormat::R11FG11FB10F>,
126 MortonCopy<false, PixelFormat::R11FG11FB10F>, 124 MortonCopy<false, PixelFormat::RGBA32UI>,
127 MortonCopy<false, PixelFormat::RGBA32UI>, 125 MortonCopy<false, PixelFormat::DXT1>,
128 MortonCopy<false, PixelFormat::DXT1>, 126 MortonCopy<false, PixelFormat::DXT23>,
129 MortonCopy<false, PixelFormat::DXT23>, 127 MortonCopy<false, PixelFormat::DXT45>,
130 MortonCopy<false, PixelFormat::DXT45>, 128 MortonCopy<false, PixelFormat::DXN1>,
131 MortonCopy<false, PixelFormat::DXN1>, 129 MortonCopy<false, PixelFormat::DXN2UNORM>,
132 MortonCopy<false, PixelFormat::DXN2UNORM>, 130 MortonCopy<false, PixelFormat::DXN2SNORM>,
133 MortonCopy<false, PixelFormat::DXN2SNORM>, 131 MortonCopy<false, PixelFormat::BC7U>,
134 MortonCopy<false, PixelFormat::BC7U>, 132 MortonCopy<false, PixelFormat::BC6H_UF16>,
135 MortonCopy<false, PixelFormat::BC6H_UF16>, 133 MortonCopy<false, PixelFormat::BC6H_SF16>,
136 MortonCopy<false, PixelFormat::BC6H_SF16>, 134 // TODO(Subv): Swizzling ASTC formats are not supported
137 // TODO(Subv): Swizzling ASTC formats are not supported 135 nullptr,
138 nullptr, 136 MortonCopy<false, PixelFormat::BGRA8>,
139 MortonCopy<false, PixelFormat::BGRA8>, 137 MortonCopy<false, PixelFormat::RGBA32F>,
140 MortonCopy<false, PixelFormat::RGBA32F>, 138 MortonCopy<false, PixelFormat::RG32F>,
141 MortonCopy<false, PixelFormat::RG32F>, 139 MortonCopy<false, PixelFormat::R32F>,
142 MortonCopy<false, PixelFormat::R32F>, 140 MortonCopy<false, PixelFormat::R16F>,
143 MortonCopy<false, PixelFormat::R16F>, 141 MortonCopy<false, PixelFormat::R16U>,
144 MortonCopy<false, PixelFormat::R16U>, 142 MortonCopy<false, PixelFormat::R16S>,
145 MortonCopy<false, PixelFormat::R16S>, 143 MortonCopy<false, PixelFormat::R16UI>,
146 MortonCopy<false, PixelFormat::R16UI>, 144 MortonCopy<false, PixelFormat::R16I>,
147 MortonCopy<false, PixelFormat::R16I>, 145 MortonCopy<false, PixelFormat::RG16>,
148 MortonCopy<false, PixelFormat::RG16>, 146 MortonCopy<false, PixelFormat::RG16F>,
149 MortonCopy<false, PixelFormat::RG16F>, 147 MortonCopy<false, PixelFormat::RG16UI>,
150 MortonCopy<false, PixelFormat::RG16UI>, 148 MortonCopy<false, PixelFormat::RG16I>,
151 MortonCopy<false, PixelFormat::RG16I>, 149 MortonCopy<false, PixelFormat::RG16S>,
152 MortonCopy<false, PixelFormat::RG16S>, 150 MortonCopy<false, PixelFormat::RGB32F>,
153 MortonCopy<false, PixelFormat::RGB32F>, 151 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
154 MortonCopy<false, PixelFormat::RGBA8_SRGB>, 152 MortonCopy<false, PixelFormat::RG8U>,
155 MortonCopy<false, PixelFormat::RG8U>, 153 MortonCopy<false, PixelFormat::RG8S>,
156 MortonCopy<false, PixelFormat::RG8S>, 154 MortonCopy<false, PixelFormat::RG32UI>,
157 MortonCopy<false, PixelFormat::RG32UI>, 155 MortonCopy<false, PixelFormat::R32UI>,
158 MortonCopy<false, PixelFormat::R32UI>, 156 nullptr,
159 nullptr, 157 nullptr,
160 nullptr, 158 nullptr,
161 nullptr, 159 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
162 MortonCopy<false, PixelFormat::BGRA8_SRGB>, 160 MortonCopy<false, PixelFormat::DXT1_SRGB>,
163 MortonCopy<false, PixelFormat::DXT1_SRGB>, 161 MortonCopy<false, PixelFormat::DXT23_SRGB>,
164 MortonCopy<false, PixelFormat::DXT23_SRGB>, 162 MortonCopy<false, PixelFormat::DXT45_SRGB>,
165 MortonCopy<false, PixelFormat::DXT45_SRGB>, 163 MortonCopy<false, PixelFormat::BC7U_SRGB>,
166 MortonCopy<false, PixelFormat::BC7U_SRGB>, 164 nullptr,
167 nullptr, 165 nullptr,
168 nullptr, 166 nullptr,
169 nullptr, 167 nullptr,
170 nullptr, 168 nullptr,
171 nullptr, 169 nullptr,
172 nullptr, 170 nullptr,
173 nullptr, 171 nullptr,
174 nullptr, 172 MortonCopy<false, PixelFormat::Z32F>,
175 MortonCopy<false, PixelFormat::Z32F>, 173 MortonCopy<false, PixelFormat::Z16>,
176 MortonCopy<false, PixelFormat::Z16>, 174 MortonCopy<false, PixelFormat::Z24S8>,
177 MortonCopy<false, PixelFormat::Z24S8>, 175 MortonCopy<false, PixelFormat::S8Z24>,
178 MortonCopy<false, PixelFormat::S8Z24>, 176 MortonCopy<false, PixelFormat::Z32FS8>,
179 MortonCopy<false, PixelFormat::Z32FS8>,
180 // clang-format on
181}; 177};
182 178
183static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { 179static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +187,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
191 return morton_to_linear_fns[static_cast<std::size_t>(format)]; 187 return morton_to_linear_fns[static_cast<std::size_t>(format)];
192} 188}
193 189
194/// 8x8 Z-Order coordinate from 2D coordinates
195static u32 MortonInterleave(u32 x, u32 y) {
196 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
197 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
198 return xlut[x % 8] + ylut[y % 8];
199}
200
201/// Calculates the offset of the position of the pixel in Morton order
202static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
203 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
204 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
205 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
206 // texels are laid out in a 2x2 subtile like this:
207 // 2 3
208 // 0 1
209 //
210 // The full 8x8 tile has the texels arranged like this:
211 //
212 // 42 43 46 47 58 59 62 63
213 // 40 41 44 45 56 57 60 61
214 // 34 35 38 39 50 51 54 55
215 // 32 33 36 37 48 49 52 53
216 // 10 11 14 15 26 27 30 31
217 // 08 09 12 13 24 25 28 29
218 // 02 03 06 07 18 19 22 23
219 // 00 01 04 05 16 17 20 21
220 //
221 // This pattern is what's called Z-order curve, or Morton order.
222
223 const unsigned int block_height = 8;
224 const unsigned int coarse_x = x & ~7;
225
226 u32 i = MortonInterleave(x, y);
227
228 const unsigned int offset = coarse_x * block_height;
229
230 return (i + offset) * bytes_per_pixel;
231}
232
233static u32 MortonInterleave128(u32 x, u32 y) { 190static u32 MortonInterleave128(u32 x, u32 y) {
234 // 128x128 Z-Order coordinate from 2D coordinates 191 // 128x128 Z-Order coordinate from 2D coordinates
235 static constexpr u32 xlut[] = { 192 static constexpr u32 xlut[] = {
@@ -325,14 +282,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
325 282
326void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 283void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
327 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 284 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
328 u8* buffer, std::size_t buffer_size, VAddr addr) { 285 u8* buffer, VAddr addr) {
329
330 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, 286 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
331 tile_width_spacing, buffer, buffer_size, addr); 287 tile_width_spacing, buffer, addr);
332} 288}
333 289
334void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 290void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
335 u8* morton_data, u8* linear_data, bool morton_to_linear) { 291 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
292 const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
336 u8* data_ptrs[2]; 293 u8* data_ptrs[2];
337 for (u32 y = 0; y < height; ++y) { 294 for (u32 y = 0; y < height; ++y) {
338 for (u32 x = 0; x < width; ++x) { 295 for (u32 x = 0; x < width; ++x) {
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index 065f59ce3..b565204b5 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13 13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, 14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, std::size_t buffer_size, VAddr addr); 16 u8* buffer, VAddr addr);
17 17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, 18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear); 19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
20 20
21} // namespace VideoCore 21} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..ecd9986a0 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <mutex>
7#include <set> 8#include <set>
8#include <unordered_map> 9#include <unordered_map>
9 10
@@ -12,14 +13,26 @@
12 13
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "core/settings.h" 15#include "core/settings.h"
16#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16 18
17class RasterizerCacheObject { 19class RasterizerCacheObject {
18public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr)
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23
19 virtual ~RasterizerCacheObject(); 24 virtual ~RasterizerCacheObject();
20 25
26 CacheAddr GetCacheAddr() const {
27 return cache_addr;
28 }
29
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
21 /// Gets the address of the shader in guest memory, required for cache management 34 /// Gets the address of the shader in guest memory, required for cache management
22 virtual VAddr GetAddr() const = 0; 35 virtual VAddr GetCpuAddr() const = 0;
23 36
24 /// Gets the size of the shader in guest memory, required for cache management 37 /// Gets the size of the shader in guest memory, required for cache management
25 virtual std::size_t GetSizeInBytes() const = 0; 38 virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
58 bool is_registered{}; ///< Whether the object is currently registered with the cache 71 bool is_registered{}; ///< Whether the object is currently registered with the cache
59 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 72 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
60 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 73 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
74 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
75 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
61}; 76};
62 77
63template <class T> 78template <class T>
@@ -68,7 +83,9 @@ public:
68 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 83 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
69 84
70 /// Write any cached resources overlapping the specified region back to memory 85 /// Write any cached resources overlapping the specified region back to memory
71 void FlushRegion(Tegra::GPUVAddr addr, size_t size) { 86 void FlushRegion(CacheAddr addr, std::size_t size) {
87 std::lock_guard<std::recursive_mutex> lock{mutex};
88
72 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 89 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
73 for (auto& object : objects) { 90 for (auto& object : objects) {
74 FlushObject(object); 91 FlushObject(object);
@@ -76,7 +93,9 @@ public:
76 } 93 }
77 94
78 /// Mark the specified region as being invalidated 95 /// Mark the specified region as being invalidated
79 void InvalidateRegion(VAddr addr, u64 size) { 96 void InvalidateRegion(CacheAddr addr, u64 size) {
97 std::lock_guard<std::recursive_mutex> lock{mutex};
98
80 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 99 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
81 for (auto& object : objects) { 100 for (auto& object : objects) {
82 if (!object->IsRegistered()) { 101 if (!object->IsRegistered()) {
@@ -89,49 +108,70 @@ public:
89 108
90 /// Invalidates everything in the cache 109 /// Invalidates everything in the cache
91 void InvalidateAll() { 110 void InvalidateAll() {
111 std::lock_guard<std::recursive_mutex> lock{mutex};
112
92 while (interval_cache.begin() != interval_cache.end()) { 113 while (interval_cache.begin() != interval_cache.end()) {
93 Unregister(*interval_cache.begin()->second.begin()); 114 Unregister(*interval_cache.begin()->second.begin());
94 } 115 }
95 } 116 }
96 117
97protected: 118protected:
98 /// Tries to get an object from the cache with the specified address 119 /// Tries to get an object from the cache with the specified cache address
99 T TryGet(VAddr addr) const { 120 T TryGet(CacheAddr addr) const {
100 const auto iter = map_cache.find(addr); 121 const auto iter = map_cache.find(addr);
101 if (iter != map_cache.end()) 122 if (iter != map_cache.end())
102 return iter->second; 123 return iter->second;
103 return nullptr; 124 return nullptr;
104 } 125 }
105 126
127 T TryGet(const void* addr) const {
128 const auto iter = map_cache.find(ToCacheAddr(addr));
129 if (iter != map_cache.end())
130 return iter->second;
131 return nullptr;
132 }
133
106 /// Register an object into the cache 134 /// Register an object into the cache
107 void Register(const T& object) { 135 void Register(const T& object) {
136 std::lock_guard<std::recursive_mutex> lock{mutex};
137
108 object->SetIsRegistered(true); 138 object->SetIsRegistered(true);
109 interval_cache.add({GetInterval(object), ObjectSet{object}}); 139 interval_cache.add({GetInterval(object), ObjectSet{object}});
110 map_cache.insert({object->GetAddr(), object}); 140 map_cache.insert({object->GetCacheAddr(), object});
111 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); 141 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
112 } 142 }
113 143
114 /// Unregisters an object from the cache 144 /// Unregisters an object from the cache
115 void Unregister(const T& object) { 145 void Unregister(const T& object) {
116 object->SetIsRegistered(false); 146 std::lock_guard<std::recursive_mutex> lock{mutex};
117 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
118 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
119 if (Settings::values.use_accurate_gpu_emulation) {
120 FlushObject(object);
121 }
122 147
148 object->SetIsRegistered(false);
149 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
123 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 150 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
124 map_cache.erase(object->GetAddr()); 151 map_cache.erase(object->GetCacheAddr());
125 } 152 }
126 153
127 /// Returns a ticks counter used for tracking when cached objects were last modified 154 /// Returns a ticks counter used for tracking when cached objects were last modified
128 u64 GetModifiedTicks() { 155 u64 GetModifiedTicks() {
156 std::lock_guard<std::recursive_mutex> lock{mutex};
157
129 return ++modified_ticks; 158 return ++modified_ticks;
130 } 159 }
131 160
161 /// Flushes the specified object, updating appropriate cache state as needed
162 void FlushObject(const T& object) {
163 std::lock_guard<std::recursive_mutex> lock{mutex};
164
165 if (!object->IsDirty()) {
166 return;
167 }
168 object->Flush();
169 object->MarkAsModified(false, *this);
170 }
171
132private: 172private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 173 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 174 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
135 if (size == 0) { 175 if (size == 0) {
136 return {}; 176 return {};
137 } 177 }
@@ -154,27 +194,19 @@ private:
154 return objects; 194 return objects;
155 } 195 }
156 196
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 197 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 198 using ObjectCache = std::unordered_map<CacheAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 199 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
169 using ObjectInterval = typename IntervalCache::interval_type; 200 using ObjectInterval = typename IntervalCache::interval_type;
170 201
171 static auto GetInterval(const T& object) { 202 static auto GetInterval(const T& object) {
172 return ObjectInterval::right_open(object->GetAddr(), 203 return ObjectInterval::right_open(object->GetCacheAddr(),
173 object->GetAddr() + object->GetSizeInBytes()); 204 object->GetCacheAddr() + object->GetSizeInBytes());
174 } 205 }
175 206
176 ObjectCache map_cache; 207 ObjectCache map_cache;
177 IntervalCache interval_cache; ///< Cache of objects 208 IntervalCache interval_cache; ///< Cache of objects
178 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing 209 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
179 VideoCore::RasterizerInterface& rasterizer; 210 VideoCore::RasterizerInterface& rasterizer;
211 std::recursive_mutex mutex;
180}; 212};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b2a223705..76e292e87 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -35,20 +35,20 @@ public:
35 virtual void FlushAll() = 0; 35 virtual void FlushAll() = 0;
36 36
37 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 37 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
38 virtual void FlushRegion(VAddr addr, u64 size) = 0; 38 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
39 39
40 /// Notify rasterizer that any caches of the specified region should be invalidated 40 /// Notify rasterizer that any caches of the specified region should be invalidated
41 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 41 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
42 42
43 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 43 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
44 /// and invalidated 44 /// and invalidated
45 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 45 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
46 46
47 /// Attempt to use a faster method to perform a surface copy 47 /// Attempt to use a faster method to perform a surface copy
48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
49 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 49 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
50 const MathUtil::Rectangle<u32>& src_rect, 50 const Common::Rectangle<u32>& src_rect,
51 const MathUtil::Rectangle<u32>& dst_rect) { 51 const Common::Rectangle<u32>& dst_rect) {
52 return false; 52 return false;
53 } 53 }
54 54
@@ -63,7 +63,7 @@ public:
63 } 63 }
64 64
65 /// Increase/decrease the number of object in pages touching the specified region 65 /// Increase/decrease the number of object in pages touching the specified region
66 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} 66 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
67 67
68 /// Initialize disk cached resources for the game being emulated 68 /// Initialize disk cached resources for the game being emulated
69 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 69 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/emu_window.h" 6#include "core/frontend/emu_window.h"
6#include "core/settings.h" 7#include "core/settings.h"
7#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b3062e5ba..a4eea61a6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -13,6 +13,11 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
17 std::size_t alignment, u8* host_ptr)
18 : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
19 host_ptr} {}
20
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
17 : RasterizerCache{rasterizer}, stream_buffer(size, true) {} 22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
18 23
@@ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
26 // TODO: Figure out which size is the best for given games. 31 // TODO: Figure out which size is the best for given games.
27 cache &= size >= 2048; 32 cache &= size >= 2048;
28 33
34 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
29 if (cache) { 35 if (cache) {
30 auto entry = TryGet(*cpu_addr); 36 auto entry = TryGet(host_ptr);
31 if (entry) { 37 if (entry) {
32 if (entry->size >= size && entry->alignment == alignment) { 38 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
33 return entry->offset; 39 return entry->GetOffset();
34 } 40 }
35 Unregister(entry); 41 Unregister(entry);
36 } 42 }
@@ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
39 AlignBuffer(alignment); 45 AlignBuffer(alignment);
40 const GLintptr uploaded_offset = buffer_offset; 46 const GLintptr uploaded_offset = buffer_offset;
41 47
42 Memory::ReadBlock(*cpu_addr, buffer_ptr, size); 48 if (!host_ptr) {
49 return uploaded_offset;
50 }
43 51
52 std::memcpy(buffer_ptr, host_ptr, size);
44 buffer_ptr += size; 53 buffer_ptr += size;
45 buffer_offset += size; 54 buffer_offset += size;
46 55
47 if (cache) { 56 if (cache) {
48 auto entry = std::make_shared<CachedBufferEntry>(); 57 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
49 entry->offset = uploaded_offset; 58 alignment, host_ptr);
50 entry->size = size;
51 entry->alignment = alignment;
52 entry->addr = *cpu_addr;
53 Register(entry); 59 Register(entry);
54 } 60 }
55 61
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..1de1f84ae 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
17 17
18class RasterizerOpenGL; 18class RasterizerOpenGL;
19 19
20struct CachedBufferEntry final : public RasterizerCacheObject { 20class CachedBufferEntry final : public RasterizerCacheObject {
21 VAddr GetAddr() const override { 21public:
22 return addr; 22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
23 } 27 }
24 28
25 std::size_t GetSizeInBytes() const override { 29 std::size_t GetSizeInBytes() const override {
26 return size; 30 return size;
27 } 31 }
28 32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
29 // We do not have to flush this cache as things in it are never modified by us. 45 // We do not have to flush this cache as things in it are never modified by us.
30 void Flush() override {} 46 void Flush() override {}
31 47
32 VAddr addr; 48private:
33 std::size_t size; 49 VAddr cpu_addr{};
34 GLintptr offset; 50 std::size_t size{};
35 std::size_t alignment; 51 GLintptr offset{};
52 std::size_t alignment{};
36}; 53};
37 54
38class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 55class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index c7f32feaa..a2c509c24 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -15,12 +15,13 @@
15 15
16namespace OpenGL { 16namespace OpenGL {
17 17
18CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { 18CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
19 : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
19 buffer.Create(); 20 buffer.Create();
20 // Bind and unbind the buffer so it gets allocated by the driver 21 // Bind and unbind the buffer so it gets allocated by the driver
21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 23 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
23 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 24 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
24} 25}
25 26
26void CachedGlobalRegion::Reload(u32 size_) { 27void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
35 36
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer 37 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 38 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); 39 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
39} 40}
40 41
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { 42GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
@@ -46,19 +47,19 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
46 return search->second; 47 return search->second;
47} 48}
48 49
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { 50GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) {
50 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; 51 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
51 if (!region) { 52 if (!region) {
52 // No reserved surface available, create a new one and reserve it 53 // No reserved surface available, create a new one and reserve it
53 region = std::make_shared<CachedGlobalRegion>(addr, size); 54 region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr);
54 ReserveGlobalRegion(region); 55 ReserveGlobalRegion(region);
55 } 56 }
56 region->Reload(size); 57 region->Reload(size);
57 return region; 58 return region;
58} 59}
59 60
60void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { 61void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
61 reserve[region->GetAddr()] = region; 62 reserve.insert_or_assign(region->GetCpuAddr(), std::move(region));
62} 63}
63 64
64GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 65GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
80 ASSERT(actual_addr); 81 ASSERT(actual_addr);
81 82
82 // Look up global region in the cache based on address 83 // Look up global region in the cache based on address
83 GlobalRegion region = TryGet(*actual_addr); 84 const auto& host_ptr{Memory::GetPointer(*actual_addr)};
85 GlobalRegion region{TryGet(host_ptr)};
84 86
85 if (!region) { 87 if (!region) {
86 // No global region found - create a new one 88 // No global region found - create a new one
87 region = GetUncachedGlobalRegion(*actual_addr, size); 89 region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr);
88 Register(region); 90 Register(region);
89 } 91 }
90 92
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 37830bb7c..e497a0619 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27 27
28class CachedGlobalRegion final : public RasterizerCacheObject { 28class CachedGlobalRegion final : public RasterizerCacheObject {
29public: 29public:
30 explicit CachedGlobalRegion(VAddr addr, u32 size); 30 explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
31 31
32 /// Gets the address of the shader in guest memory, required for cache management 32 VAddr GetCpuAddr() const override {
33 VAddr GetAddr() const { 33 return cpu_addr;
34 return addr;
35 } 34 }
36 35
37 /// Gets the size of the shader in guest memory, required for cache management 36 std::size_t GetSizeInBytes() const override {
38 std::size_t GetSizeInBytes() const {
39 return size; 37 return size;
40 } 38 }
41 39
@@ -53,9 +51,8 @@ public:
53 } 51 }
54 52
55private: 53private:
56 VAddr addr{}; 54 VAddr cpu_addr{};
57 u32 size{}; 55 u32 size{};
58
59 OGLBuffer buffer; 56 OGLBuffer buffer;
60}; 57};
61 58
@@ -69,8 +66,8 @@ public:
69 66
70private: 67private:
71 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; 68 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
72 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); 69 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr);
73 void ReserveGlobalRegion(const GlobalRegion& region); 70 void ReserveGlobalRegion(GlobalRegion region);
74 71
75 std::unordered_map<VAddr, GlobalRegion> reserve; 72 std::unordered_map<VAddr, GlobalRegion> reserve;
76}; 73};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 12d876120..bb6de5477 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,9 @@ struct FramebufferCacheKey {
102 102
103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, 103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
104 ScreenInfo& info) 104 ScreenInfo& info)
105 : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, 105 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
106 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 106 emu_window{window}, system{system}, screen_info{info},
107 buffer_cache(*this, STREAM_BUFFER_SIZE) {
107 // Create sampler objects 108 // Create sampler objects
108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 109 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
109 texture_samplers[i].Create(); 110 texture_samplers[i].Create();
@@ -118,7 +119,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
118 119
119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 120 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
120 121
121 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 122 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
122 CheckExtensions(); 123 CheckExtensions();
123} 124}
124 125
@@ -138,7 +139,7 @@ void RasterizerOpenGL::CheckExtensions() {
138} 139}
139 140
140GLuint RasterizerOpenGL::SetupVertexFormat() { 141GLuint RasterizerOpenGL::SetupVertexFormat() {
141 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 142 auto& gpu = system.GPU().Maxwell3D();
142 const auto& regs = gpu.regs; 143 const auto& regs = gpu.regs;
143 144
144 if (!gpu.dirty_flags.vertex_attrib_format) { 145 if (!gpu.dirty_flags.vertex_attrib_format) {
@@ -177,7 +178,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
177 continue; 178 continue;
178 179
179 const auto& buffer = regs.vertex_array[attrib.buffer]; 180 const auto& buffer = regs.vertex_array[attrib.buffer];
180 LOG_TRACE(HW_GPU, 181 LOG_TRACE(Render_OpenGL,
181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 182 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 183 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
183 attrib.offset.Value(), attrib.IsNormalized()); 184 attrib.offset.Value(), attrib.IsNormalized());
@@ -200,24 +201,24 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
200 } 201 }
201 202
202 // Rebinding the VAO invalidates the vertex buffer bindings. 203 // Rebinding the VAO invalidates the vertex buffer bindings.
203 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 204 gpu.dirty_flags.vertex_array.set();
204 205
205 state.draw.vertex_array = vao_entry.handle; 206 state.draw.vertex_array = vao_entry.handle;
206 return vao_entry.handle; 207 return vao_entry.handle;
207} 208}
208 209
209void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 210void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 211 auto& gpu = system.GPU().Maxwell3D();
211 const auto& regs = gpu.regs; 212 const auto& regs = gpu.regs;
212 213
213 if (!gpu.dirty_flags.vertex_array) 214 if (gpu.dirty_flags.vertex_array.none())
214 return; 215 return;
215 216
216 MICROPROFILE_SCOPE(OpenGL_VB); 217 MICROPROFILE_SCOPE(OpenGL_VB);
217 218
218 // Upload all guest vertex arrays sequentially to our buffer 219 // Upload all guest vertex arrays sequentially to our buffer
219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 220 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
220 if (~gpu.dirty_flags.vertex_array & (1u << index)) 221 if (!gpu.dirty_flags.vertex_array[index])
221 continue; 222 continue;
222 223
223 const auto& vertex_array = regs.vertex_array[index]; 224 const auto& vertex_array = regs.vertex_array[index];
@@ -244,11 +245,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
244 } 245 }
245 } 246 }
246 247
247 gpu.dirty_flags.vertex_array = 0; 248 gpu.dirty_flags.vertex_array.reset();
248} 249}
249 250
250DrawParameters RasterizerOpenGL::SetupDraw() { 251DrawParameters RasterizerOpenGL::SetupDraw() {
251 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 252 const auto& gpu = system.GPU().Maxwell3D();
252 const auto& regs = gpu.regs; 253 const auto& regs = gpu.regs;
253 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 254 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
254 255
@@ -297,7 +298,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
297 298
298void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 299void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
299 MICROPROFILE_SCOPE(OpenGL_Shader); 300 MICROPROFILE_SCOPE(OpenGL_Shader);
300 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 301 auto& gpu = system.GPU().Maxwell3D();
301 302
302 BaseBindings base_bindings; 303 BaseBindings base_bindings;
303 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 304 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -343,9 +344,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 shader_program_manager->UseProgrammableFragmentShader(program_handle); 344 shader_program_manager->UseProgrammableFragmentShader(program_handle);
344 break; 345 break;
345 default: 346 default:
346 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 347 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
347 shader_config.enable.Value(), shader_config.offset); 348 shader_config.enable.Value(), shader_config.offset);
348 UNREACHABLE();
349 } 349 }
350 350
351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -414,7 +414,7 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
414} 414}
415 415
416std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 416std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
417 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 417 const auto& regs = system.GPU().Maxwell3D().regs;
418 418
419 std::size_t size = 0; 419 std::size_t size = 0;
420 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 420 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -432,7 +432,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
432} 432}
433 433
434std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { 434std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
435 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 435 const auto& regs = system.GPU().Maxwell3D().regs;
436 436
437 return static_cast<std::size_t>(regs.index_array.count) * 437 return static_cast<std::size_t>(regs.index_array.count) *
438 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); 438 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
@@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
449 return boost::make_iterator_range(map.equal_range(interval)); 449 return boost::make_iterator_range(map.equal_range(interval));
450} 450}
451 451
452void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { 452void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
453 const u64 page_start{addr >> Memory::PAGE_BITS}; 453 const u64 page_start{addr >> Memory::PAGE_BITS};
454 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; 454 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
455 455
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
489 std::optional<std::size_t> single_color_target) { 489 std::optional<std::size_t> single_color_target) {
490 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 490 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
491 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 491 auto& gpu = system.GPU().Maxwell3D();
492 const auto& regs = gpu.regs; 492 const auto& regs = gpu.regs;
493 493
494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
495 single_color_target}; 495 single_color_target};
496 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 496 if (fb_config_state == current_framebuffer_config_state &&
497 !gpu.dirty_flags.zeta_buffer) { 497 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
499 // single color targets). This is done because the guest registers may not change but the 499 // single color targets). This is done because the guest registers may not change but the
500 // host framebuffer may contain different attachments 500 // host framebuffer may contain different attachments
@@ -582,7 +582,7 @@ void RasterizerOpenGL::Clear() {
582 const auto prev_state{state}; 582 const auto prev_state{state};
583 SCOPE_EXIT({ prev_state.Apply(); }); 583 SCOPE_EXIT({ prev_state.Apply(); });
584 584
585 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 585 const auto& regs = system.GPU().Maxwell3D().regs;
586 bool use_color{}; 586 bool use_color{};
587 bool use_depth{}; 587 bool use_depth{};
588 bool use_stencil{}; 588 bool use_stencil{};
@@ -673,7 +673,7 @@ void RasterizerOpenGL::DrawArrays() {
673 return; 673 return;
674 674
675 MICROPROFILE_SCOPE(OpenGL_Drawing); 675 MICROPROFILE_SCOPE(OpenGL_Drawing);
676 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 676 auto& gpu = system.GPU().Maxwell3D();
677 const auto& regs = gpu.regs; 677 const auto& regs = gpu.regs;
678 678
679 ConfigureFramebuffers(state); 679 ConfigureFramebuffers(state);
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
721 // Add space for at least 18 constant buffers 721 // Add space for at least 18 constant buffers
722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
723 723
724 bool invalidate = buffer_cache.Map(buffer_size); 724 const bool invalidate = buffer_cache.Map(buffer_size);
725 if (invalidate) { 725 if (invalidate) {
726 // As all cached buffers are invalidated, we need to recheck their state. 726 // As all cached buffers are invalidated, we need to recheck their state.
727 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 727 gpu.dirty_flags.vertex_array.set();
728 } 728 }
729 729
730 const GLuint vao = SetupVertexFormat(); 730 const GLuint vao = SetupVertexFormat();
@@ -738,33 +738,21 @@ void RasterizerOpenGL::DrawArrays() {
738 shader_program_manager->ApplyTo(state); 738 shader_program_manager->ApplyTo(state);
739 state.Apply(); 739 state.Apply();
740 740
741 // Execute draw call 741 res_cache.SignalPreDrawCall();
742 params.DispatchDraw(); 742 params.DispatchDraw();
743 743 res_cache.SignalPostDrawCall();
744 // Disable scissor test
745 state.viewports[0].scissor.enabled = false;
746 744
747 accelerate_draw = AccelDraw::Disabled; 745 accelerate_draw = AccelDraw::Disabled;
748
749 // Unbind textures for potential future use as framebuffer attachments
750 for (auto& texture_unit : state.texture_units) {
751 texture_unit.Unbind();
752 }
753 state.Apply();
754} 746}
755 747
756void RasterizerOpenGL::FlushAll() {} 748void RasterizerOpenGL::FlushAll() {}
757 749
758void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 750void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
759 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 751 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760 752 res_cache.FlushRegion(addr, size);
761 if (Settings::values.use_accurate_gpu_emulation) {
762 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
763 res_cache.FlushRegion(addr, size);
764 }
765} 753}
766 754
767void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 755void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
768 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 756 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
769 res_cache.InvalidateRegion(addr, size); 757 res_cache.InvalidateRegion(addr, size);
770 shader_cache.InvalidateRegion(addr, size); 758 shader_cache.InvalidateRegion(addr, size);
@@ -772,15 +760,15 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
772 buffer_cache.InvalidateRegion(addr, size); 760 buffer_cache.InvalidateRegion(addr, size);
773} 761}
774 762
775void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 763void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
776 FlushRegion(addr, size); 764 FlushRegion(addr, size);
777 InvalidateRegion(addr, size); 765 InvalidateRegion(addr, size);
778} 766}
779 767
780bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 768bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
781 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 769 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
782 const MathUtil::Rectangle<u32>& src_rect, 770 const Common::Rectangle<u32>& src_rect,
783 const MathUtil::Rectangle<u32>& dst_rect) { 771 const Common::Rectangle<u32>& dst_rect) {
784 MICROPROFILE_SCOPE(OpenGL_Blits); 772 MICROPROFILE_SCOPE(OpenGL_Blits);
785 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); 773 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
786 return true; 774 return true;
@@ -794,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
794 782
795 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 783 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
796 784
797 const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; 785 const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
798 if (!surface) { 786 if (!surface) {
799 return {}; 787 return {};
800 } 788 }
@@ -805,7 +793,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
805 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 793 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
806 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 794 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
807 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 795 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
808 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 796
797 if (params.pixel_format != pixel_format) {
798 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
799 }
809 800
810 screen_info.display_texture = surface->Texture().handle; 801 screen_info.display_texture = surface->Texture().handle;
811 802
@@ -814,104 +805,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
814 805
815void RasterizerOpenGL::SamplerInfo::Create() { 806void RasterizerOpenGL::SamplerInfo::Create() {
816 sampler.Create(); 807 sampler.Create();
817 mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; 808 mag_filter = Tegra::Texture::TextureFilter::Linear;
818 wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; 809 min_filter = Tegra::Texture::TextureFilter::Linear;
819 uses_depth_compare = false; 810 wrap_u = Tegra::Texture::WrapMode::Wrap;
811 wrap_v = Tegra::Texture::WrapMode::Wrap;
812 wrap_p = Tegra::Texture::WrapMode::Wrap;
813 use_depth_compare = false;
820 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; 814 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
821 815
822 // default is GL_LINEAR_MIPMAP_LINEAR 816 // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
823 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 817 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
824 // Other attributes have correct defaults
825 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); 818 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
819
820 // Other attributes have correct defaults
826} 821}
827 822
828void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { 823void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
829 const GLuint s = sampler.handle; 824 const GLuint sampler_id = sampler.handle;
830 if (mag_filter != config.mag_filter) { 825 if (mag_filter != config.mag_filter) {
831 mag_filter = config.mag_filter; 826 mag_filter = config.mag_filter;
832 glSamplerParameteri( 827 glSamplerParameteri(
833 s, GL_TEXTURE_MAG_FILTER, 828 sampler_id, GL_TEXTURE_MAG_FILTER,
834 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); 829 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
835 } 830 }
836 if (min_filter != config.min_filter || mip_filter != config.mip_filter) { 831 if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
837 min_filter = config.min_filter; 832 min_filter = config.min_filter;
838 mip_filter = config.mip_filter; 833 mipmap_filter = config.mipmap_filter;
839 glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, 834 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
840 MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); 835 MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
841 } 836 }
842 837
843 if (wrap_u != config.wrap_u) { 838 if (wrap_u != config.wrap_u) {
844 wrap_u = config.wrap_u; 839 wrap_u = config.wrap_u;
845 glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); 840 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
846 } 841 }
847 if (wrap_v != config.wrap_v) { 842 if (wrap_v != config.wrap_v) {
848 wrap_v = config.wrap_v; 843 wrap_v = config.wrap_v;
849 glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); 844 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
850 } 845 }
851 if (wrap_p != config.wrap_p) { 846 if (wrap_p != config.wrap_p) {
852 wrap_p = config.wrap_p; 847 wrap_p = config.wrap_p;
853 glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); 848 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
854 } 849 }
855 850
856 if (uses_depth_compare != (config.depth_compare_enabled == 1)) { 851 if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
857 uses_depth_compare = (config.depth_compare_enabled == 1); 852 use_depth_compare = enabled;
858 if (uses_depth_compare) { 853 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
859 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); 854 use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
860 } else {
861 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
862 }
863 } 855 }
864 856
865 if (depth_compare_func != config.depth_compare_func) { 857 if (depth_compare_func != config.depth_compare_func) {
866 depth_compare_func = config.depth_compare_func; 858 depth_compare_func = config.depth_compare_func;
867 glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, 859 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
868 MaxwellToGL::DepthCompareFunc(depth_compare_func)); 860 MaxwellToGL::DepthCompareFunc(depth_compare_func));
869 } 861 }
870 862
871 GLvec4 new_border_color; 863 if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
872 if (config.srgb_conversion) {
873 new_border_color[0] = config.srgb_border_color_r / 255.0f;
874 new_border_color[1] = config.srgb_border_color_g / 255.0f;
875 new_border_color[2] = config.srgb_border_color_g / 255.0f;
876 } else {
877 new_border_color[0] = config.border_color_r;
878 new_border_color[1] = config.border_color_g;
879 new_border_color[2] = config.border_color_b;
880 }
881 new_border_color[3] = config.border_color_a;
882
883 if (border_color != new_border_color) {
884 border_color = new_border_color; 864 border_color = new_border_color;
885 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); 865 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
886 } 866 }
887 867
888 const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); 868 if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
889 if (anisotropic_max != max_anisotropic) { 869 max_anisotropic = anisotropic;
890 max_anisotropic = anisotropic_max;
891 if (GLAD_GL_ARB_texture_filter_anisotropic) { 870 if (GLAD_GL_ARB_texture_filter_anisotropic) {
892 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); 871 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
893 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 872 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
894 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); 873 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
895 } 874 }
896 } 875 }
897 const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
898 if (lod_min != min_lod) {
899 min_lod = lod_min;
900 glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
901 }
902 876
903 const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; 877 if (const float min = config.GetMinLod(); min_lod != min) {
904 if (lod_max != max_lod) { 878 min_lod = min;
905 max_lod = lod_max; 879 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
906 glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod);
907 } 880 }
908 const u32 bias = config.mip_lod_bias.Value(); 881 if (const float max = config.GetMaxLod(); max_lod != max) {
909 // Sign extend the 13-bit value. 882 max_lod = max;
910 constexpr u32 mask = 1U << (13 - 1); 883 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
911 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; 884 }
912 if (lod_bias != bias_lod) { 885
913 lod_bias = bias_lod; 886 if (const float bias = config.GetLodBias(); lod_bias != bias) {
914 glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias); 887 lod_bias = bias;
888 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
915 } 889 }
916} 890}
917 891
@@ -919,7 +893,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
919 const Shader& shader, GLuint program_handle, 893 const Shader& shader, GLuint program_handle,
920 BaseBindings base_bindings) { 894 BaseBindings base_bindings) {
921 MICROPROFILE_SCOPE(OpenGL_UBO); 895 MICROPROFILE_SCOPE(OpenGL_UBO);
922 const auto& gpu = Core::System::GetInstance().GPU(); 896 const auto& gpu = system.GPU();
923 const auto& maxwell3d = gpu.Maxwell3D(); 897 const auto& maxwell3d = gpu.Maxwell3D();
924 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; 898 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
925 const auto& entries = shader->GetShaderEntries().const_buffers; 899 const auto& entries = shader->GetShaderEntries().const_buffers;
@@ -951,8 +925,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
951 size = buffer.size; 925 size = buffer.size;
952 926
953 if (size > MaxConstbufferSize) { 927 if (size > MaxConstbufferSize) {
954 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 928 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
955 MaxConstbufferSize); 929 MaxConstbufferSize);
956 size = MaxConstbufferSize; 930 size = MaxConstbufferSize;
957 } 931 }
958 } else { 932 } else {
@@ -998,7 +972,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
998void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 972void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
999 GLuint program_handle, BaseBindings base_bindings) { 973 GLuint program_handle, BaseBindings base_bindings) {
1000 MICROPROFILE_SCOPE(OpenGL_Texture); 974 MICROPROFILE_SCOPE(OpenGL_Texture);
1001 const auto& gpu = Core::System::GetInstance().GPU(); 975 const auto& gpu = system.GPU();
1002 const auto& maxwell3d = gpu.Maxwell3D(); 976 const auto& maxwell3d = gpu.Maxwell3D();
1003 const auto& entries = shader->GetShaderEntries().samplers; 977 const auto& entries = shader->GetShaderEntries().samplers;
1004 978
@@ -1012,10 +986,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1012 986
1013 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 987 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1014 988
1015 Surface surface = res_cache.GetTextureSurface(texture, entry); 989 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1016 if (surface != nullptr) {
1017 state.texture_units[current_bindpoint].texture = 990 state.texture_units[current_bindpoint].texture =
1018 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 991 surface->Texture(entry.IsArray()).handle;
1019 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 992 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1020 texture.tic.w_source); 993 texture.tic.w_source);
1021 } else { 994 } else {
@@ -1026,7 +999,7 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1026} 999}
1027 1000
1028void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 1001void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1029 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1002 const auto& regs = system.GPU().Maxwell3D().regs;
1030 const bool geometry_shaders_enabled = 1003 const bool geometry_shaders_enabled =
1031 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1004 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1032 const std::size_t viewport_count = 1005 const std::size_t viewport_count =
@@ -1034,7 +1007,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1034 for (std::size_t i = 0; i < viewport_count; i++) { 1007 for (std::size_t i = 0; i < viewport_count; i++) {
1035 auto& viewport = current_state.viewports[i]; 1008 auto& viewport = current_state.viewports[i];
1036 const auto& src = regs.viewports[i]; 1009 const auto& src = regs.viewports[i];
1037 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 1010 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
1038 viewport.x = viewport_rect.left; 1011 viewport.x = viewport_rect.left;
1039 viewport.y = viewport_rect.bottom; 1012 viewport.y = viewport_rect.bottom;
1040 viewport.width = viewport_rect.GetWidth(); 1013 viewport.width = viewport_rect.GetWidth();
@@ -1049,7 +1022,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1049void RasterizerOpenGL::SyncClipEnabled( 1022void RasterizerOpenGL::SyncClipEnabled(
1050 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { 1023 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
1051 1024
1052 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1025 const auto& regs = system.GPU().Maxwell3D().regs;
1053 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ 1026 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
1054 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, 1027 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
1055 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, 1028 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
@@ -1066,7 +1039,7 @@ void RasterizerOpenGL::SyncClipCoef() {
1066} 1039}
1067 1040
1068void RasterizerOpenGL::SyncCullMode() { 1041void RasterizerOpenGL::SyncCullMode() {
1069 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1042 const auto& regs = system.GPU().Maxwell3D().regs;
1070 1043
1071 state.cull.enabled = regs.cull.enabled != 0; 1044 state.cull.enabled = regs.cull.enabled != 0;
1072 1045
@@ -1090,14 +1063,14 @@ void RasterizerOpenGL::SyncCullMode() {
1090} 1063}
1091 1064
1092void RasterizerOpenGL::SyncPrimitiveRestart() { 1065void RasterizerOpenGL::SyncPrimitiveRestart() {
1093 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1066 const auto& regs = system.GPU().Maxwell3D().regs;
1094 1067
1095 state.primitive_restart.enabled = regs.primitive_restart.enabled; 1068 state.primitive_restart.enabled = regs.primitive_restart.enabled;
1096 state.primitive_restart.index = regs.primitive_restart.index; 1069 state.primitive_restart.index = regs.primitive_restart.index;
1097} 1070}
1098 1071
1099void RasterizerOpenGL::SyncDepthTestState() { 1072void RasterizerOpenGL::SyncDepthTestState() {
1100 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1073 const auto& regs = system.GPU().Maxwell3D().regs;
1101 1074
1102 state.depth.test_enabled = regs.depth_test_enable != 0; 1075 state.depth.test_enabled = regs.depth_test_enable != 0;
1103 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1076 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
@@ -1109,7 +1082,7 @@ void RasterizerOpenGL::SyncDepthTestState() {
1109} 1082}
1110 1083
1111void RasterizerOpenGL::SyncStencilTestState() { 1084void RasterizerOpenGL::SyncStencilTestState() {
1112 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1085 const auto& regs = system.GPU().Maxwell3D().regs;
1113 state.stencil.test_enabled = regs.stencil_enable != 0; 1086 state.stencil.test_enabled = regs.stencil_enable != 0;
1114 1087
1115 if (!regs.stencil_enable) { 1088 if (!regs.stencil_enable) {
@@ -1143,7 +1116,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
1143} 1116}
1144 1117
1145void RasterizerOpenGL::SyncColorMask() { 1118void RasterizerOpenGL::SyncColorMask() {
1146 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1119 const auto& regs = system.GPU().Maxwell3D().regs;
1147 const std::size_t count = 1120 const std::size_t count =
1148 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1121 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
1149 for (std::size_t i = 0; i < count; i++) { 1122 for (std::size_t i = 0; i < count; i++) {
@@ -1157,18 +1130,18 @@ void RasterizerOpenGL::SyncColorMask() {
1157} 1130}
1158 1131
1159void RasterizerOpenGL::SyncMultiSampleState() { 1132void RasterizerOpenGL::SyncMultiSampleState() {
1160 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1133 const auto& regs = system.GPU().Maxwell3D().regs;
1161 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; 1134 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
1162 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; 1135 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
1163} 1136}
1164 1137
1165void RasterizerOpenGL::SyncFragmentColorClampState() { 1138void RasterizerOpenGL::SyncFragmentColorClampState() {
1166 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1139 const auto& regs = system.GPU().Maxwell3D().regs;
1167 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; 1140 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
1168} 1141}
1169 1142
1170void RasterizerOpenGL::SyncBlendState() { 1143void RasterizerOpenGL::SyncBlendState() {
1171 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1144 const auto& regs = system.GPU().Maxwell3D().regs;
1172 1145
1173 state.blend_color.red = regs.blend_color.r; 1146 state.blend_color.red = regs.blend_color.r;
1174 state.blend_color.green = regs.blend_color.g; 1147 state.blend_color.green = regs.blend_color.g;
@@ -1210,7 +1183,7 @@ void RasterizerOpenGL::SyncBlendState() {
1210} 1183}
1211 1184
1212void RasterizerOpenGL::SyncLogicOpState() { 1185void RasterizerOpenGL::SyncLogicOpState() {
1213 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1186 const auto& regs = system.GPU().Maxwell3D().regs;
1214 1187
1215 state.logic_op.enabled = regs.logic_op.enable != 0; 1188 state.logic_op.enabled = regs.logic_op.enable != 0;
1216 1189
@@ -1224,7 +1197,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
1224} 1197}
1225 1198
1226void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1199void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1227 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1200 const auto& regs = system.GPU().Maxwell3D().regs;
1228 const bool geometry_shaders_enabled = 1201 const bool geometry_shaders_enabled =
1229 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1202 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1230 const std::size_t viewport_count = 1203 const std::size_t viewport_count =
@@ -1246,21 +1219,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1246} 1219}
1247 1220
1248void RasterizerOpenGL::SyncTransformFeedback() { 1221void RasterizerOpenGL::SyncTransformFeedback() {
1249 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1222 const auto& regs = system.GPU().Maxwell3D().regs;
1250 1223 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1251 if (regs.tfb_enabled != 0) {
1252 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1253 UNREACHABLE();
1254 }
1255} 1224}
1256 1225
1257void RasterizerOpenGL::SyncPointState() { 1226void RasterizerOpenGL::SyncPointState() {
1258 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1227 const auto& regs = system.GPU().Maxwell3D().regs;
1259 state.point.size = regs.point_size; 1228 state.point.size = regs.point_size;
1260} 1229}
1261 1230
1262void RasterizerOpenGL::SyncPolygonOffset() { 1231void RasterizerOpenGL::SyncPolygonOffset() {
1263 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1232 const auto& regs = system.GPU().Maxwell3D().regs;
1264 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1233 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1265 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1234 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1266 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1235 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
@@ -1270,13 +1239,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1270} 1239}
1271 1240
1272void RasterizerOpenGL::CheckAlphaTests() { 1241void RasterizerOpenGL::CheckAlphaTests() {
1273 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1242 const auto& regs = system.GPU().Maxwell3D().regs;
1274 1243 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1275 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1244 "Alpha Testing is enabled with more than one rendertarget");
1276 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1277 "this behavior is undefined.");
1278 UNREACHABLE();
1279 }
1280} 1245}
1281 1246
1282} // namespace OpenGL 1247} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 258d62259..30f3e8acb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -57,17 +57,17 @@ public:
57 void DrawArrays() override; 57 void DrawArrays() override;
58 void Clear() override; 58 void Clear() override;
59 void FlushAll() override; 59 void FlushAll() override;
60 void FlushRegion(VAddr addr, u64 size) override; 60 void FlushRegion(CacheAddr addr, u64 size) override;
61 void InvalidateRegion(VAddr addr, u64 size) override; 61 void InvalidateRegion(CacheAddr addr, u64 size) override;
62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 62 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
64 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 64 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
65 const MathUtil::Rectangle<u32>& src_rect, 65 const Common::Rectangle<u32>& src_rect,
66 const MathUtil::Rectangle<u32>& dst_rect) override; 66 const Common::Rectangle<u32>& dst_rect) override;
67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
68 u32 pixel_stride) override; 68 u32 pixel_stride) override;
69 bool AccelerateDrawBatch(bool is_indexed) override; 69 bool AccelerateDrawBatch(bool is_indexed) override;
70 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; 70 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
71 void LoadDiskResources(const std::atomic_bool& stop_loading, 71 void LoadDiskResources(const std::atomic_bool& stop_loading,
72 const VideoCore::DiskResourceLoadCallback& callback) override; 72 const VideoCore::DiskResourceLoadCallback& callback) override;
73 73
@@ -94,11 +94,12 @@ private:
94 private: 94 private:
95 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; 95 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
96 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; 96 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
97 Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; 97 Tegra::Texture::TextureMipmapFilter mipmap_filter =
98 Tegra::Texture::TextureMipmapFilter::None;
98 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; 99 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
99 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; 100 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
100 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; 101 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
101 bool uses_depth_compare = false; 102 bool use_depth_compare = false;
102 Tegra::Texture::DepthCompareFunc depth_compare_func = 103 Tegra::Texture::DepthCompareFunc depth_compare_func =
103 Tegra::Texture::DepthCompareFunc::Always; 104 Tegra::Texture::DepthCompareFunc::Always;
104 GLvec4 border_color = {}; 105 GLvec4 border_color = {};
@@ -214,6 +215,7 @@ private:
214 GlobalRegionCacheOpenGL global_cache; 215 GlobalRegionCacheOpenGL global_cache;
215 216
216 Core::Frontend::EmuWindow& emu_window; 217 Core::Frontend::EmuWindow& emu_window;
218 Core::System& system;
217 219
218 ScreenInfo& screen_info; 220 ScreenInfo& screen_info;
219 221
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 59f671048..451de00e8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -20,7 +21,7 @@
20#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
21#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25 26
26namespace OpenGL { 27namespace OpenGL {
@@ -60,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
60 61
61 addr = cpu_addr ? *cpu_addr : 0; 62 addr = cpu_addr ? *cpu_addr : 0;
62 gpu_addr = gpu_addr_; 63 gpu_addr = gpu_addr_;
64 host_ptr = Memory::GetPointer(addr);
63 size_in_bytes = SizeInBytesRaw(); 65 size_in_bytes = SizeInBytesRaw();
64 66
65 if (IsPixelFormatASTC(pixel_format)) { 67 if (IsPixelFormatASTC(pixel_format)) {
@@ -399,7 +401,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
399 return format; 401 return format;
400} 402}
401 403
402MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 404/// Returns the discrepant array target
405constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
406 switch (target) {
407 case SurfaceTarget::Texture1D:
408 return GL_TEXTURE_1D_ARRAY;
409 case SurfaceTarget::Texture2D:
410 return GL_TEXTURE_2D_ARRAY;
411 case SurfaceTarget::Texture3D:
412 return GL_NONE;
413 case SurfaceTarget::Texture1DArray:
414 return GL_TEXTURE_1D;
415 case SurfaceTarget::Texture2DArray:
416 return GL_TEXTURE_2D;
417 case SurfaceTarget::TextureCubemap:
418 return GL_TEXTURE_CUBE_MAP_ARRAY;
419 case SurfaceTarget::TextureCubeArray:
420 return GL_TEXTURE_CUBE_MAP;
421 }
422 return GL_NONE;
423}
424
425Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
403 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 426 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
404 if (IsPixelFormatASTC(pixel_format)) { 427 if (IsPixelFormatASTC(pixel_format)) {
405 // ASTC formats must stop at the ATSC block size boundary 428 // ASTC formats must stop at the ATSC block size boundary
@@ -423,8 +446,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
423 for (u32 i = 0; i < params.depth; i++) { 446 for (u32 i = 0; i < params.depth; i++) {
424 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 447 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
425 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 448 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
426 params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, 449 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
427 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 450 gl_buffer.data() + offset_gl, params.addr + offset);
428 offset += layer_size; 451 offset += layer_size;
429 offset_gl += gl_size; 452 offset_gl += gl_size;
430 } 453 }
@@ -433,7 +456,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
433 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 456 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
434 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 457 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
435 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, 458 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
436 gl_buffer.data(), gl_buffer.size(), params.addr + offset); 459 gl_buffer.data(), params.addr + offset);
437 } 460 }
438} 461}
439 462
@@ -541,14 +564,16 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
541} 564}
542 565
543CachedSurface::CachedSurface(const SurfaceParams& params) 566CachedSurface::CachedSurface(const SurfaceParams& params)
544 : params(params), gl_target(SurfaceTargetToGL(params.target)), 567 : params{params}, gl_target{SurfaceTargetToGL(params.target)},
545 cached_size_in_bytes(params.size_in_bytes) { 568 cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
546 texture.Create(gl_target); 569 texture.Create(gl_target);
547 570
548 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) 571 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
549 // alternatives. This signals a bug on those functions. 572 // alternatives. This signals a bug on those functions.
550 const auto width = static_cast<GLsizei>(params.MipWidth(0)); 573 const auto width = static_cast<GLsizei>(params.MipWidth(0));
551 const auto height = static_cast<GLsizei>(params.MipHeight(0)); 574 const auto height = static_cast<GLsizei>(params.MipHeight(0));
575 memory_size = params.MemorySize();
576 reinterpreted = false;
552 577
553 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 578 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
554 gl_internal_format = format_tuple.internal_format; 579 gl_internal_format = format_tuple.internal_format;
@@ -594,103 +619,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
594 } 619 }
595} 620}
596 621
597static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
598 union S8Z24 {
599 BitField<0, 24, u32> z24;
600 BitField<24, 8, u32> s8;
601 };
602 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
603
604 union Z24S8 {
605 BitField<0, 8, u32> s8;
606 BitField<8, 24, u32> z24;
607 };
608 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
609
610 S8Z24 s8z24_pixel{};
611 Z24S8 z24s8_pixel{};
612 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
613 for (std::size_t y = 0; y < height; ++y) {
614 for (std::size_t x = 0; x < width; ++x) {
615 const std::size_t offset{bpp * (y * width + x)};
616 if (reverse) {
617 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
618 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
619 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
620 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
621 } else {
622 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
623 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
624 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
625 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
626 }
627 }
628 }
629}
630
631/**
632 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
633 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
634 * typical desktop GPUs.
635 */
636static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
637 u32 width, u32 height, u32 depth) {
638 switch (pixel_format) {
639 case PixelFormat::ASTC_2D_4X4:
640 case PixelFormat::ASTC_2D_8X8:
641 case PixelFormat::ASTC_2D_8X5:
642 case PixelFormat::ASTC_2D_5X4:
643 case PixelFormat::ASTC_2D_5X5:
644 case PixelFormat::ASTC_2D_4X4_SRGB:
645 case PixelFormat::ASTC_2D_8X8_SRGB:
646 case PixelFormat::ASTC_2D_8X5_SRGB:
647 case PixelFormat::ASTC_2D_5X4_SRGB:
648 case PixelFormat::ASTC_2D_5X5_SRGB:
649 case PixelFormat::ASTC_2D_10X8:
650 case PixelFormat::ASTC_2D_10X8_SRGB: {
651 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
652 u32 block_width{};
653 u32 block_height{};
654 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
655 data =
656 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
657 break;
658 }
659 case PixelFormat::S8Z24:
660 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
661 ConvertS8Z24ToZ24S8(data, width, height, false);
662 break;
663 }
664}
665
666/**
667 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
668 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
669 * with typical desktop GPUs.
670 */
671static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
672 u32 width, u32 height) {
673 switch (pixel_format) {
674 case PixelFormat::ASTC_2D_4X4:
675 case PixelFormat::ASTC_2D_8X8:
676 case PixelFormat::ASTC_2D_4X4_SRGB:
677 case PixelFormat::ASTC_2D_8X8_SRGB:
678 case PixelFormat::ASTC_2D_5X5:
679 case PixelFormat::ASTC_2D_5X5_SRGB:
680 case PixelFormat::ASTC_2D_10X8:
681 case PixelFormat::ASTC_2D_10X8_SRGB: {
682 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
683 static_cast<u32>(pixel_format));
684 UNREACHABLE();
685 break;
686 }
687 case PixelFormat::S8Z24:
688 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
689 ConvertS8Z24ToZ24S8(data, width, height, true);
690 break;
691 }
692}
693
694MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 622MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
695void CachedSurface::LoadGLBuffer() { 623void CachedSurface::LoadGLBuffer() {
696 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); 624 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -706,10 +634,9 @@ void CachedSurface::LoadGLBuffer() {
706 const u32 bpp = params.GetFormatBpp() / 8; 634 const u32 bpp = params.GetFormatBpp() / 8;
707 const u32 copy_size = params.width * bpp; 635 const u32 copy_size = params.width * bpp;
708 if (params.pitch == copy_size) { 636 if (params.pitch == copy_size) {
709 std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), 637 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
710 params.size_in_bytes_gl);
711 } else { 638 } else {
712 const u8* start = Memory::GetPointer(params.addr); 639 const u8* start{params.host_ptr};
713 u8* write_to = gl_buffer[0].data(); 640 u8* write_to = gl_buffer[0].data();
714 for (u32 h = params.height; h > 0; h--) { 641 for (u32 h = params.height; h > 0; h--) {
715 std::memcpy(write_to, start, copy_size); 642 std::memcpy(write_to, start, copy_size);
@@ -719,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
719 } 646 }
720 } 647 }
721 for (u32 i = 0; i < params.max_mip_level; i++) { 648 for (u32 i = 0; i < params.max_mip_level; i++) {
722 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 649 const u32 width = params.MipWidth(i);
723 params.MipHeight(i), params.MipDepth(i)); 650 const u32 height = params.MipHeight(i);
651 const u32 depth = params.MipDepth(i);
652 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
653 // Reserve size for RGBA8 conversion
654 constexpr std::size_t rgba_bpp = 4;
655 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
656 }
657 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
658 height, depth, true, true);
724 } 659 }
725} 660}
726 661
@@ -743,10 +678,8 @@ void CachedSurface::FlushGLBuffer() {
743 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 678 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
744 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 679 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
745 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 680 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
746 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
747 params.height); 682 params.height, params.depth, true, true);
748 const u8* const texture_src_data = Memory::GetPointer(params.addr);
749 ASSERT(texture_src_data);
750 if (params.is_tiled) { 683 if (params.is_tiled) {
751 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 684 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
752 params.block_width, static_cast<u32>(params.target)); 685 params.block_width, static_cast<u32>(params.target));
@@ -756,9 +689,9 @@ void CachedSurface::FlushGLBuffer() {
756 const u32 bpp = params.GetFormatBpp() / 8; 689 const u32 bpp = params.GetFormatBpp() / 8;
757 const u32 copy_size = params.width * bpp; 690 const u32 copy_size = params.width * bpp;
758 if (params.pitch == copy_size) { 691 if (params.pitch == copy_size) {
759 std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); 692 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
760 } else { 693 } else {
761 u8* start = Memory::GetPointer(params.addr); 694 u8* start{params.host_ptr};
762 const u8* read_to = gl_buffer[0].data(); 695 const u8* read_to = gl_buffer[0].data();
763 for (u32 h = params.height; h > 0; h--) { 696 for (u32 h = params.height; h > 0; h--) {
764 std::memcpy(start, read_to, copy_size); 697 std::memcpy(start, read_to, copy_size);
@@ -881,20 +814,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
881 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 814 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
882} 815}
883 816
884void CachedSurface::EnsureTextureView() { 817void CachedSurface::EnsureTextureDiscrepantView() {
885 if (texture_view.handle != 0) 818 if (discrepant_view.handle != 0)
886 return; 819 return;
887 820
888 const GLenum target{TargetLayer()}; 821 const GLenum target{GetArrayDiscrepantTarget(params.target)};
822 ASSERT(target != GL_NONE);
823
889 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 824 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
890 constexpr GLuint min_layer = 0; 825 constexpr GLuint min_layer = 0;
891 constexpr GLuint min_level = 0; 826 constexpr GLuint min_level = 0;
892 827
893 glGenTextures(1, &texture_view.handle); 828 glGenTextures(1, &discrepant_view.handle);
894 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 829 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
895 params.max_mip_level, min_layer, num_layers); 830 params.max_mip_level, min_layer, num_layers);
896 ApplyTextureDefaults(texture_view.handle, params.max_mip_level); 831 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
897 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, 832 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
898 reinterpret_cast<const GLint*>(swizzle.data())); 833 reinterpret_cast<const GLint*>(swizzle.data()));
899} 834}
900 835
@@ -920,8 +855,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
920 swizzle = {new_x, new_y, new_z, new_w}; 855 swizzle = {new_x, new_y, new_z, new_w};
921 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); 856 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
922 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 857 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
923 if (texture_view.handle != 0) { 858 if (discrepant_view.handle != 0) {
924 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 859 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
925 } 860 }
926} 861}
927 862
@@ -962,30 +897,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
962 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 897 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
963 const auto& regs{gpu.regs}; 898 const auto& regs{gpu.regs};
964 899
965 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 900 if (!gpu.dirty_flags.color_buffer[index]) {
966 return last_color_buffers[index]; 901 return current_color_buffers[index];
967 } 902 }
968 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 903 gpu.dirty_flags.color_buffer.reset(index);
969 904
970 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 905 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
971 906
972 if (index >= regs.rt_control.count) { 907 if (index >= regs.rt_control.count) {
973 return last_color_buffers[index] = {}; 908 return current_color_buffers[index] = {};
974 } 909 }
975 910
976 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 911 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
977 return last_color_buffers[index] = {}; 912 return current_color_buffers[index] = {};
978 } 913 }
979 914
980 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 915 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
981 916
982 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 917 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
983} 918}
984 919
985void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 920void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
986 surface->LoadGLBuffer(); 921 surface->LoadGLBuffer();
987 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 922 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
988 surface->MarkAsModified(false, *this); 923 surface->MarkAsModified(false, *this);
924 surface->MarkForReload(false);
989} 925}
990 926
991Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 927Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -994,21 +930,26 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
994 } 930 }
995 931
996 // Look up surface in the cache based on address 932 // Look up surface in the cache based on address
997 Surface surface{TryGet(params.addr)}; 933 Surface surface{TryGet(params.host_ptr)};
998 if (surface) { 934 if (surface) {
999 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 935 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
1000 // Use the cached surface as-is 936 // Use the cached surface as-is unless it's not synced with memory
937 if (surface->MustReload())
938 LoadSurface(surface);
1001 return surface; 939 return surface;
1002 } else if (preserve_contents) { 940 } else if (preserve_contents) {
1003 // If surface parameters changed and we care about keeping the previous data, recreate 941 // If surface parameters changed and we care about keeping the previous data, recreate
1004 // the surface from the old one 942 // the surface from the old one
1005 Surface new_surface{RecreateSurface(surface, params)}; 943 Surface new_surface{RecreateSurface(surface, params)};
1006 Unregister(surface); 944 UnregisterSurface(surface);
1007 Register(new_surface); 945 Register(new_surface);
946 if (new_surface->IsUploaded()) {
947 RegisterReinterpretSurface(new_surface);
948 }
1008 return new_surface; 949 return new_surface;
1009 } else { 950 } else {
1010 // Delete the old surface before creating a new one to prevent collisions. 951 // Delete the old surface before creating a new one to prevent collisions.
1011 Unregister(surface); 952 UnregisterSurface(surface);
1012 } 953 }
1013 } 954 }
1014 955
@@ -1043,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1043 for (u32 layer = 0; layer < dst_params.depth; layer++) { 984 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1044 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { 985 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1045 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); 986 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
1046 const Surface& copy = TryGet(sub_address); 987 const Surface& copy = TryGet(Memory::GetPointer(sub_address));
1047 if (!copy) 988 if (!copy)
1048 continue; 989 continue;
1049 const auto& src_params{copy->GetSurfaceParams()}; 990 const auto& src_params{copy->GetSurfaceParams()};
@@ -1062,8 +1003,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1062} 1003}
1063 1004
1064static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, 1005static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1065 const MathUtil::Rectangle<u32>& src_rect, 1006 const Common::Rectangle<u32>& src_rect,
1066 const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, 1007 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1067 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, 1008 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1068 std::size_t cubemap_face = 0) { 1009 std::size_t cubemap_face = 0) {
1069 1010
@@ -1193,7 +1134,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1193void RasterizerCacheOpenGL::FermiCopySurface( 1134void RasterizerCacheOpenGL::FermiCopySurface(
1194 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1135 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1195 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 1136 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1196 const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { 1137 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1197 1138
1198 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 1139 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1199 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 1140 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1220,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1220 const auto& dst_params{dst_surface->GetSurfaceParams()}; 1161 const auto& dst_params{dst_surface->GetSurfaceParams()};
1221 1162
1222 // Flush enough memory for both the source and destination surface 1163 // Flush enough memory for both the source and destination surface
1223 FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); 1164 FlushRegion(ToCacheAddr(src_params.host_ptr),
1165 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1224 1166
1225 LoadSurface(dst_surface); 1167 LoadSurface(dst_surface);
1226} 1168}
@@ -1257,7 +1199,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1257 case SurfaceTarget::TextureCubemap: 1199 case SurfaceTarget::TextureCubemap:
1258 case SurfaceTarget::Texture2DArray: 1200 case SurfaceTarget::Texture2DArray:
1259 case SurfaceTarget::TextureCubeArray: 1201 case SurfaceTarget::TextureCubeArray:
1260 FastLayeredCopySurface(old_surface, new_surface); 1202 if (old_params.pixel_format == new_params.pixel_format)
1203 FastLayeredCopySurface(old_surface, new_surface);
1204 else {
1205 AccurateCopySurface(old_surface, new_surface);
1206 }
1261 break; 1207 break;
1262 default: 1208 default:
1263 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 1209 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1268,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1268 return new_surface; 1214 return new_surface;
1269} 1215}
1270 1216
1271Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { 1217Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1272 return TryGet(addr); 1218 return TryGet(host_ptr);
1273} 1219}
1274 1220
1275void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { 1221void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1286,4 +1232,108 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1286 return {}; 1232 return {};
1287} 1233}
1288 1234
1235static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1236 u32 height) {
1237 for (u32 i = 0; i < params.max_mip_level; i++) {
1238 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1239 return {i};
1240 }
1241 }
1242 return {};
1243}
1244
1245static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
1246 const std::size_t size = params.LayerMemorySize();
1247 VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
1248 for (u32 i = 0; i < params.depth; i++) {
1249 if (start == addr) {
1250 return {i};
1251 }
1252 start += size;
1253 }
1254 return {};
1255}
1256
1257static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1258 const Surface blitted_surface) {
1259 const auto& dst_params = blitted_surface->GetSurfaceParams();
1260 const auto& src_params = render_surface->GetSurfaceParams();
1261 const std::size_t src_memory_size = src_params.size_in_bytes;
1262 const std::optional<u32> level =
1263 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1264 if (level.has_value()) {
1265 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1266 src_params.height == dst_params.MipHeight(*level) &&
1267 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1268 const std::optional<u32> slot =
1269 TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level);
1270 if (slot.has_value()) {
1271 glCopyImageSubData(render_surface->Texture().handle,
1272 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1273 blitted_surface->Texture().handle,
1274 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1275 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1276 blitted_surface->MarkAsModified(true, cache);
1277 return true;
1278 }
1279 }
1280 }
1281 return false;
1282}
1283
1284static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1285 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1286 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1287 if (bound2 > bound1)
1288 return true;
1289 const auto& dst_params = blitted_surface->GetSurfaceParams();
1290 const auto& src_params = render_surface->GetSurfaceParams();
1291 return (dst_params.component_type != src_params.component_type);
1292}
1293
1294static bool IsReinterpretInvalidSecond(const Surface render_surface,
1295 const Surface blitted_surface) {
1296 const auto& dst_params = blitted_surface->GetSurfaceParams();
1297 const auto& src_params = render_surface->GetSurfaceParams();
1298 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1299}
1300
1301bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1302 Surface intersect) {
1303 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1304 UnregisterSurface(intersect);
1305 return false;
1306 }
1307 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1308 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1309 UnregisterSurface(intersect);
1310 return false;
1311 }
1312 FlushObject(intersect);
1313 FlushObject(triggering_surface);
1314 intersect->MarkForReload(true);
1315 }
1316 return true;
1317}
1318
1319void RasterizerCacheOpenGL::SignalPreDrawCall() {
1320 if (texception && GLAD_GL_ARB_texture_barrier) {
1321 glTextureBarrier();
1322 }
1323 texception = false;
1324}
1325
1326void RasterizerCacheOpenGL::SignalPostDrawCall() {
1327 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1328 if (current_color_buffers[i] != nullptr) {
1329 Surface intersect =
1330 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1331 if (intersect != nullptr) {
1332 PartialReinterpretSurface(current_color_buffers[i], intersect);
1333 texception = true;
1334 }
1335 }
1336 }
1337}
1338
1289} // namespace OpenGL 1339} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 89d733c50..b3afad139 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,12 +28,13 @@ namespace OpenGL {
28 28
29class CachedSurface; 29class CachedSurface;
30using Surface = std::shared_ptr<CachedSurface>; 30using Surface = std::shared_ptr<CachedSurface>;
31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
32 32
33using SurfaceTarget = VideoCore::Surface::SurfaceTarget; 33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
34using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
35using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
36using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
37 38
38struct SurfaceParams { 39struct SurfaceParams {
39 enum class SurfaceClass { 40 enum class SurfaceClass {
@@ -71,7 +72,7 @@ struct SurfaceParams {
71 } 72 }
72 73
73 /// Returns the rectangle corresponding to this surface 74 /// Returns the rectangle corresponding to this surface
74 MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; 75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
75 76
76 /// Returns the total size of this surface in bytes, adjusted for compression 77 /// Returns the total size of this surface in bytes, adjusted for compression
77 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { 78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
140 return offset; 141 return offset;
141 } 142 }
142 143
144 std::size_t GetMipmapSingleSize(u32 mip_level) const {
145 return InnerMipmapMemorySize(mip_level, false, is_layered);
146 }
147
143 u32 MipWidth(u32 mip_level) const { 148 u32 MipWidth(u32 mip_level) const {
144 return std::max(1U, width >> mip_level); 149 return std::max(1U, width >> mip_level);
145 } 150 }
146 151
152 u32 MipWidthGobAligned(u32 mip_level) const {
153 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
154 }
155
147 u32 MipHeight(u32 mip_level) const { 156 u32 MipHeight(u32 mip_level) const {
148 return std::max(1U, height >> mip_level); 157 return std::max(1U, height >> mip_level);
149 } 158 }
@@ -288,6 +297,7 @@ struct SurfaceParams {
288 bool srgb_conversion; 297 bool srgb_conversion;
289 // Parameters used for caching 298 // Parameters used for caching
290 VAddr addr; 299 VAddr addr;
300 u8* host_ptr;
291 Tegra::GPUVAddr gpu_addr; 301 Tegra::GPUVAddr gpu_addr;
292 std::size_t size_in_bytes; 302 std::size_t size_in_bytes;
293 std::size_t size_in_bytes_gl; 303 std::size_t size_in_bytes_gl;
@@ -336,9 +346,9 @@ class RasterizerOpenGL;
336 346
337class CachedSurface final : public RasterizerCacheObject { 347class CachedSurface final : public RasterizerCacheObject {
338public: 348public:
339 CachedSurface(const SurfaceParams& params); 349 explicit CachedSurface(const SurfaceParams& params);
340 350
341 VAddr GetAddr() const override { 351 VAddr GetCpuAddr() const override {
342 return params.addr; 352 return params.addr;
343 } 353 }
344 354
@@ -346,6 +356,10 @@ public:
346 return cached_size_in_bytes; 356 return cached_size_in_bytes;
347 } 357 }
348 358
359 std::size_t GetMemorySize() const {
360 return memory_size;
361 }
362
349 void Flush() override { 363 void Flush() override {
350 FlushGLBuffer(); 364 FlushGLBuffer();
351 } 365 }
@@ -354,31 +368,19 @@ public:
354 return texture; 368 return texture;
355 } 369 }
356 370
357 const OGLTexture& TextureLayer() { 371 const OGLTexture& Texture(bool as_array) {
358 if (params.is_array) { 372 if (params.is_array == as_array) {
359 return Texture(); 373 return texture;
374 } else {
375 EnsureTextureDiscrepantView();
376 return discrepant_view;
360 } 377 }
361 EnsureTextureView();
362 return texture_view;
363 } 378 }
364 379
365 GLenum Target() const { 380 GLenum Target() const {
366 return gl_target; 381 return gl_target;
367 } 382 }
368 383
369 GLenum TargetLayer() const {
370 using VideoCore::Surface::SurfaceTarget;
371 switch (params.target) {
372 case SurfaceTarget::Texture1D:
373 return GL_TEXTURE_1D_ARRAY;
374 case SurfaceTarget::Texture2D:
375 return GL_TEXTURE_2D_ARRAY;
376 case SurfaceTarget::TextureCubemap:
377 return GL_TEXTURE_CUBE_MAP_ARRAY;
378 }
379 return Target();
380 }
381
382 const SurfaceParams& GetSurfaceParams() const { 384 const SurfaceParams& GetSurfaceParams() const {
383 return params; 385 return params;
384 } 386 }
@@ -395,19 +397,42 @@ public:
395 Tegra::Texture::SwizzleSource swizzle_z, 397 Tegra::Texture::SwizzleSource swizzle_z,
396 Tegra::Texture::SwizzleSource swizzle_w); 398 Tegra::Texture::SwizzleSource swizzle_w);
397 399
400 void MarkReinterpreted() {
401 reinterpreted = true;
402 }
403
404 bool IsReinterpreted() const {
405 return reinterpreted;
406 }
407
408 void MarkForReload(bool reload) {
409 must_reload = reload;
410 }
411
412 bool MustReload() const {
413 return must_reload;
414 }
415
416 bool IsUploaded() const {
417 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
418 }
419
398private: 420private:
399 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 421 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
400 422
401 void EnsureTextureView(); 423 void EnsureTextureDiscrepantView();
402 424
403 OGLTexture texture; 425 OGLTexture texture;
404 OGLTexture texture_view; 426 OGLTexture discrepant_view;
405 std::vector<std::vector<u8>> gl_buffer; 427 std::vector<std::vector<u8>> gl_buffer;
406 SurfaceParams params{}; 428 SurfaceParams params{};
407 GLenum gl_target{}; 429 GLenum gl_target{};
408 GLenum gl_internal_format{}; 430 GLenum gl_internal_format{};
409 std::size_t cached_size_in_bytes{}; 431 std::size_t cached_size_in_bytes{};
410 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; 432 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
433 std::size_t memory_size;
434 bool reinterpreted = false;
435 bool must_reload = false;
411}; 436};
412 437
413class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 438class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -425,13 +450,16 @@ public:
425 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); 450 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
426 451
427 /// Tries to find a framebuffer using on the provided CPU address 452 /// Tries to find a framebuffer using on the provided CPU address
428 Surface TryFindFramebufferSurface(VAddr addr) const; 453 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
429 454
430 /// Copies the contents of one surface to another 455 /// Copies the contents of one surface to another
431 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 456 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
432 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 457 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
433 const MathUtil::Rectangle<u32>& src_rect, 458 const Common::Rectangle<u32>& src_rect,
434 const MathUtil::Rectangle<u32>& dst_rect); 459 const Common::Rectangle<u32>& dst_rect);
460
461 void SignalPreDrawCall();
462 void SignalPostDrawCall();
435 463
436private: 464private:
437 void LoadSurface(const Surface& surface); 465 void LoadSurface(const Surface& surface);
@@ -449,6 +477,10 @@ private:
449 /// Tries to get a reserved surface for the specified parameters 477 /// Tries to get a reserved surface for the specified parameters
450 Surface TryGetReservedSurface(const SurfaceParams& params); 478 Surface TryGetReservedSurface(const SurfaceParams& params);
451 479
480 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
481 // returns true if the reinterpret was successful, false in case it was not.
482 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
483
452 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 484 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
453 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 485 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
454 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 486 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +497,50 @@ private:
465 OGLFramebuffer read_framebuffer; 497 OGLFramebuffer read_framebuffer;
466 OGLFramebuffer draw_framebuffer; 498 OGLFramebuffer draw_framebuffer;
467 499
500 bool texception = false;
501
468 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 502 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
469 /// using the new format. 503 /// using the new format.
470 OGLBuffer copy_pbo; 504 OGLBuffer copy_pbo;
471 505
472 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 506 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
507 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
473 Surface last_depth_buffer; 508 Surface last_depth_buffer;
509
510 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
511 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
512
513 static auto GetReinterpretInterval(const Surface& object) {
514 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
515 object->GetCacheAddr() + object->GetMemorySize() - 1);
516 }
517
518 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
519 SurfaceIntervalCache reinterpreted_surfaces;
520
521 void RegisterReinterpretSurface(Surface reinterpret_surface) {
522 auto interval = GetReinterpretInterval(reinterpret_surface);
523 reinterpreted_surfaces.insert({interval, reinterpret_surface});
524 reinterpret_surface->MarkReinterpreted();
525 }
526
527 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
528 const SurfaceInterval interval{addr};
529 for (auto& pair :
530 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
531 return pair.second;
532 }
533 return nullptr;
534 }
535
536 /// Unregisters an object from the cache
537 void UnregisterSurface(const Surface& object) {
538 if (object->IsReinterpreted()) {
539 auto interval = GetReinterpretInterval(object);
540 reinterpreted_surfaces.erase(interval);
541 }
542 Unregister(object);
543 }
474}; 544};
475 545
476} // namespace OpenGL 546} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4883e4f62..60a04e146 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
42} 42}
43 43
44/// Gets the shader program code from memory for the specified address 44/// Gets the shader program code from memory for the specified address
45ProgramCode GetShaderCode(VAddr addr) { 45ProgramCode GetShaderCode(const u8* host_ptr) {
46 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); 46 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
47 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 47 std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
48 return program_code; 48 return program_code;
49} 49}
50 50
@@ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() {
214 214
215} // namespace 215} // namespace
216 216
217CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 217CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
218 ShaderDiskCacheOpenGL& disk_cache, 218 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
219 const PrecompiledPrograms& precompiled_programs, 219 const PrecompiledPrograms& precompiled_programs,
220 ProgramCode&& program_code, ProgramCode&& program_code_b) 220 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
221 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, 221 : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier},
222 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { 222 program_type{program_type}, disk_cache{disk_cache},
223 precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
223 224
224 const std::size_t code_size = CalculateProgramSize(program_code); 225 const std::size_t code_size = CalculateProgramSize(program_code);
225 const std::size_t code_size_b = 226 const std::size_t code_size_b =
@@ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
243 disk_cache.SaveRaw(raw); 244 disk_cache.SaveRaw(raw);
244} 245}
245 246
246CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 247CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
247 ShaderDiskCacheOpenGL& disk_cache, 248 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
248 const PrecompiledPrograms& precompiled_programs, 249 const PrecompiledPrograms& precompiled_programs,
249 GLShader::ProgramResult result) 250 GLShader::ProgramResult result, u8* host_ptr)
250 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, 251 : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type},
251 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { 252 disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
253 host_ptr} {
252 254
253 code = std::move(result.first); 255 code = std::move(result.first);
254 entries = result.second; 256 entries = result.second;
@@ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
271 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 273 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
272 } 274 }
273 275
274 LabelGLObject(GL_PROGRAM, program->handle, addr); 276 LabelGLObject(GL_PROGRAM, program->handle, guest_addr);
275 } 277 }
276 278
277 handle = program->handle; 279 handle = program->handle;
@@ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
323 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 325 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
324 } 326 }
325 327
326 LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); 328 LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name);
327 329
328 return target_program->handle; 330 return target_program->handle;
329}; 331};
@@ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
489 const VAddr program_addr{GetShaderAddress(program)}; 491 const VAddr program_addr{GetShaderAddress(program)};
490 492
491 // Look up shader in the cache based on address 493 // Look up shader in the cache based on address
492 Shader shader{TryGet(program_addr)}; 494 const auto& host_ptr{Memory::GetPointer(program_addr)};
495 Shader shader{TryGet(host_ptr)};
493 496
494 if (!shader) { 497 if (!shader) {
495 // No shader found - create a new one 498 // No shader found - create a new one
496 ProgramCode program_code = GetShaderCode(program_addr); 499 const auto& host_ptr{Memory::GetPointer(program_addr)};
500 ProgramCode program_code{GetShaderCode(host_ptr)};
497 ProgramCode program_code_b; 501 ProgramCode program_code_b;
498 if (program == Maxwell::ShaderProgram::VertexA) { 502 if (program == Maxwell::ShaderProgram::VertexA) {
499 program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); 503 program_code_b = GetShaderCode(
504 Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
500 } 505 }
501 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 506 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
502 507
@@ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
504 if (found != precompiled_shaders.end()) { 509 if (found != precompiled_shaders.end()) {
505 shader = 510 shader =
506 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, 511 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
507 precompiled_programs, found->second); 512 precompiled_programs, found->second, host_ptr);
508 } else { 513 } else {
509 shader = std::make_shared<CachedShader>( 514 shader = std::make_shared<CachedShader>(
510 program_addr, unique_identifier, program, disk_cache, precompiled_programs, 515 program_addr, unique_identifier, program, disk_cache, precompiled_programs,
511 std::move(program_code), std::move(program_code_b)); 516 std::move(program_code), std::move(program_code_b), host_ptr);
512 } 517 }
513 Register(shader); 518 Register(shader);
514 } 519 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 97eed192f..81fe716b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
39 39
40class CachedShader final : public RasterizerCacheObject { 40class CachedShader final : public RasterizerCacheObject {
41public: 41public:
42 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 42 explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
43 ShaderDiskCacheOpenGL& disk_cache, 43 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
44 const PrecompiledPrograms& precompiled_programs, 44 const PrecompiledPrograms& precompiled_programs,
45 ProgramCode&& program_code, ProgramCode&& program_code_b); 45 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
46 46
47 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 47 explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
48 ShaderDiskCacheOpenGL& disk_cache, 48 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
49 const PrecompiledPrograms& precompiled_programs, 49 const PrecompiledPrograms& precompiled_programs,
50 GLShader::ProgramResult result); 50 GLShader::ProgramResult result, u8* host_ptr);
51 51
52 VAddr GetAddr() const override { 52 VAddr GetCpuAddr() const override {
53 return addr; 53 return guest_addr;
54 } 54 }
55 55
56 std::size_t GetSizeInBytes() const override { 56 std::size_t GetSizeInBytes() const override {
@@ -91,7 +91,8 @@ private:
91 91
92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; 92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
93 93
94 VAddr addr{}; 94 u8* host_ptr{};
95 VAddr guest_addr{};
95 u64 unique_identifier{}; 96 u64 unique_identifier{};
96 Maxwell::ShaderProgram program_type{}; 97 Maxwell::ShaderProgram program_type{};
97 ShaderDiskCacheOpenGL& disk_cache; 98 ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index db18f4dbe..11d1169f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <array> 5#include <array>
6#include <string> 6#include <string>
7#include <string_view> 7#include <string_view>
8#include <utility>
8#include <variant> 9#include <variant>
10#include <vector>
9 11
10#include <fmt/format.h> 12#include <fmt/format.h>
11 13
@@ -20,6 +22,7 @@
20namespace OpenGL::GLShader { 22namespace OpenGL::GLShader {
21 23
22using Tegra::Shader::Attribute; 24using Tegra::Shader::Attribute;
25using Tegra::Shader::AttributeUse;
23using Tegra::Shader::Header; 26using Tegra::Shader::Header;
24using Tegra::Shader::IpaInterpMode; 27using Tegra::Shader::IpaInterpMode;
25using Tegra::Shader::IpaMode; 28using Tegra::Shader::IpaMode;
@@ -288,34 +291,22 @@ private:
288 code.AddNewLine(); 291 code.AddNewLine();
289 } 292 }
290 293
291 std::string GetInputFlags(const IpaMode& input_mode) { 294 std::string GetInputFlags(AttributeUse attribute) {
292 const IpaSampleMode sample_mode = input_mode.sampling_mode;
293 const IpaInterpMode interp_mode = input_mode.interpolation_mode;
294 std::string out; 295 std::string out;
295 296
296 switch (interp_mode) { 297 switch (attribute) {
297 case IpaInterpMode::Flat: 298 case AttributeUse::Constant:
298 out += "flat "; 299 out += "flat ";
299 break; 300 break;
300 case IpaInterpMode::Linear: 301 case AttributeUse::ScreenLinear:
301 out += "noperspective "; 302 out += "noperspective ";
302 break; 303 break;
303 case IpaInterpMode::Perspective: 304 case AttributeUse::Perspective:
304 // Default, Smooth 305 // Default, Smooth
305 break; 306 break;
306 default: 307 default:
307 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); 308 LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
308 } 309 UNREACHABLE();
309 switch (sample_mode) {
310 case IpaSampleMode::Centroid:
311 // It can be implemented with the "centroid " keyword in GLSL
312 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
313 break;
314 case IpaSampleMode::Default:
315 // Default, n/a
316 break;
317 default:
318 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
319 } 310 }
320 return out; 311 return out;
321 } 312 }
@@ -324,16 +315,11 @@ private:
324 const auto& attributes = ir.GetInputAttributes(); 315 const auto& attributes = ir.GetInputAttributes();
325 for (const auto element : attributes) { 316 for (const auto element : attributes) {
326 const Attribute::Index index = element.first; 317 const Attribute::Index index = element.first;
327 const IpaMode& input_mode = *element.second.begin();
328 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { 318 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
329 // Skip when it's not a generic attribute 319 // Skip when it's not a generic attribute
330 continue; 320 continue;
331 } 321 }
332 322
333 ASSERT(element.second.size() > 0);
334 UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
335 "Multiple input flag modes are not supported in GLSL");
336
337 // TODO(bunnei): Use proper number of elements for these 323 // TODO(bunnei): Use proper number of elements for these
338 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); 324 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
339 if (stage != ShaderStage::Vertex) { 325 if (stage != ShaderStage::Vertex) {
@@ -345,8 +331,14 @@ private:
345 if (stage == ShaderStage::Geometry) { 331 if (stage == ShaderStage::Geometry) {
346 attr = "gs_" + attr + "[]"; 332 attr = "gs_" + attr + "[]";
347 } 333 }
348 code.AddLine("layout (location = " + std::to_string(idx) + ") " + 334 std::string suffix;
349 GetInputFlags(input_mode) + "in vec4 " + attr + ';'); 335 if (stage == ShaderStage::Fragment) {
336 const auto input_mode =
337 header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
338 suffix = GetInputFlags(input_mode);
339 }
340 code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
341 attr + ';');
350 } 342 }
351 if (!attributes.empty()) 343 if (!attributes.empty())
352 code.AddNewLine(); 344 code.AddNewLine();
@@ -727,7 +719,7 @@ private:
727 } 719 }
728 720
729 std::string GenerateTexture(Operation operation, const std::string& func, 721 std::string GenerateTexture(Operation operation, const std::string& func,
730 bool is_extra_int = false) { 722 const std::vector<std::pair<Type, Node>>& extras) {
731 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 723 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
732 724
733 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 725 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -748,36 +740,47 @@ private:
748 expr += Visit(operation[i]); 740 expr += Visit(operation[i]);
749 741
750 const std::size_t next = i + 1; 742 const std::size_t next = i + 1;
751 if (next < count || has_array || has_shadow) 743 if (next < count)
752 expr += ", "; 744 expr += ", ";
753 } 745 }
754 if (has_array) { 746 if (has_array) {
755 expr += "float(ftoi(" + Visit(meta->array) + "))"; 747 expr += ", float(ftoi(" + Visit(meta->array) + "))";
756 } 748 }
757 if (has_shadow) { 749 if (has_shadow) {
758 if (has_array) 750 expr += ", " + Visit(meta->depth_compare);
759 expr += ", ";
760 expr += Visit(meta->depth_compare);
761 } 751 }
762 expr += ')'; 752 expr += ')';
763 753
764 for (const Node extra : meta->extras) { 754 for (const auto& extra_pair : extras) {
755 const auto [type, operand] = extra_pair;
756 if (operand == nullptr) {
757 continue;
758 }
765 expr += ", "; 759 expr += ", ";
766 if (is_extra_int) { 760
767 if (const auto immediate = std::get_if<ImmediateNode>(extra)) { 761 switch (type) {
762 case Type::Int:
763 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
768 // Inline the string as an immediate integer in GLSL (some extra arguments are 764 // Inline the string as an immediate integer in GLSL (some extra arguments are
769 // required to be constant) 765 // required to be constant)
770 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 766 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
771 } else { 767 } else {
772 expr += "ftoi(" + Visit(extra) + ')'; 768 expr += "ftoi(" + Visit(operand) + ')';
773 } 769 }
774 } else { 770 break;
775 expr += Visit(extra); 771 case Type::Float:
772 expr += Visit(operand);
773 break;
774 default: {
775 const auto type_int = static_cast<u32>(type);
776 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
777 expr += '0';
778 break;
779 }
776 } 780 }
777 } 781 }
778 782
779 expr += ')'; 783 return expr + ')';
780 return expr;
781 } 784 }
782 785
783 std::string Assign(Operation operation) { 786 std::string Assign(Operation operation) {
@@ -1156,7 +1159,7 @@ private:
1156 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1159 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1157 ASSERT(meta); 1160 ASSERT(meta);
1158 1161
1159 std::string expr = GenerateTexture(operation, "texture"); 1162 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
1160 if (meta->sampler.IsShadow()) { 1163 if (meta->sampler.IsShadow()) {
1161 expr = "vec4(" + expr + ')'; 1164 expr = "vec4(" + expr + ')';
1162 } 1165 }
@@ -1167,7 +1170,7 @@ private:
1167 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1170 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1168 ASSERT(meta); 1171 ASSERT(meta);
1169 1172
1170 std::string expr = GenerateTexture(operation, "textureLod"); 1173 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
1171 if (meta->sampler.IsShadow()) { 1174 if (meta->sampler.IsShadow()) {
1172 expr = "vec4(" + expr + ')'; 1175 expr = "vec4(" + expr + ')';
1173 } 1176 }
@@ -1178,7 +1181,8 @@ private:
1178 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1181 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1179 ASSERT(meta); 1182 ASSERT(meta);
1180 1183
1181 return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + 1184 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1185 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
1182 GetSwizzle(meta->element); 1186 GetSwizzle(meta->element);
1183 } 1187 }
1184 1188
@@ -1207,8 +1211,8 @@ private:
1207 ASSERT(meta); 1211 ASSERT(meta);
1208 1212
1209 if (meta->element < 2) { 1213 if (meta->element < 2) {
1210 return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + 1214 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
1211 GetSwizzle(meta->element) + "))"; 1215 " * vec2(256))" + GetSwizzle(meta->element) + "))";
1212 } 1216 }
1213 return "0"; 1217 return "0";
1214 } 1218 }
@@ -1234,9 +1238,9 @@ private:
1234 else if (next < count) 1238 else if (next < count)
1235 expr += ", "; 1239 expr += ", ";
1236 } 1240 }
1237 for (std::size_t i = 0; i < meta->extras.size(); ++i) { 1241 if (meta->lod) {
1238 expr += ", "; 1242 expr += ", ";
1239 expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); 1243 expr += CastOperand(Visit(meta->lod), Type::Int);
1240 } 1244 }
1241 expr += ')'; 1245 expr += ')';
1242 1246
@@ -1584,4 +1588,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
1584 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 1588 return {decompiler.GetResult(), decompiler.GetShaderEntries()};
1585} 1589}
1586 1590
1587} // namespace OpenGL::GLShader \ No newline at end of file 1591} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 81882822b..82fc4d44b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,8 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once
6
7#include <cstring> 5#include <cstring>
8#include <fmt/format.h> 6#include <fmt/format.h>
9#include <lz4.h> 7#include <lz4.h>
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 04e1db911..7d96649af 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
124layout (location = 6) out vec4 FragColor6; 124layout (location = 6) out vec4 FragColor6;
125layout (location = 7) out vec4 FragColor7; 125layout (location = 7) out vec4 FragColor7;
126 126
127layout (location = 0) in vec4 position; 127layout (location = 0) in noperspective vec4 position;
128 128
129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { 129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
130 vec4 viewport_flip; 130 vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
172 return {out, program.second}; 172 return {out, program.second};
173} 173}
174 174
175} // namespace OpenGL::GLShader \ No newline at end of file 175} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 81af803bc..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,7 +11,9 @@
11namespace OpenGL { 11namespace OpenGL {
12 12
13OpenGLState OpenGLState::cur_state; 13OpenGLState OpenGLState::cur_state;
14
14bool OpenGLState::s_rgb_used; 15bool OpenGLState::s_rgb_used;
16
15OpenGLState::OpenGLState() { 17OpenGLState::OpenGLState() {
16 // These all match default OpenGL values 18 // These all match default OpenGL values
17 geometry_shaders.enabled = false; 19 geometry_shaders.enabled = false;
@@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() {
112} 114}
113 115
114void OpenGLState::ApplySRgb() const { 116void OpenGLState::ApplySRgb() const {
115 // sRGB
116 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { 117 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
117 if (framebuffer_srgb.enabled) { 118 if (framebuffer_srgb.enabled) {
118 // Track if sRGB is used 119 // Track if sRGB is used
@@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const {
125} 126}
126 127
127void OpenGLState::ApplyCulling() const { 128void OpenGLState::ApplyCulling() const {
128 // Culling 129 if (cull.enabled != cur_state.cull.enabled) {
129 const bool cull_changed = cull.enabled != cur_state.cull.enabled;
130 if (cull_changed) {
131 if (cull.enabled) { 130 if (cull.enabled) {
132 glEnable(GL_CULL_FACE); 131 glEnable(GL_CULL_FACE);
133 } else { 132 } else {
134 glDisable(GL_CULL_FACE); 133 glDisable(GL_CULL_FACE);
135 } 134 }
136 } 135 }
137 if (cull.enabled) {
138 if (cull_changed || cull.mode != cur_state.cull.mode) {
139 glCullFace(cull.mode);
140 }
141 136
142 if (cull_changed || cull.front_face != cur_state.cull.front_face) { 137 if (cull.mode != cur_state.cull.mode) {
143 glFrontFace(cull.front_face); 138 glCullFace(cull.mode);
144 } 139 }
140
141 if (cull.front_face != cur_state.cull.front_face) {
142 glFrontFace(cull.front_face);
145 } 143 }
146} 144}
147 145
@@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const {
172} 170}
173 171
174void OpenGLState::ApplyDepth() const { 172void OpenGLState::ApplyDepth() const {
175 // Depth test 173 if (depth.test_enabled != cur_state.depth.test_enabled) {
176 const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
177 if (depth_test_changed) {
178 if (depth.test_enabled) { 174 if (depth.test_enabled) {
179 glEnable(GL_DEPTH_TEST); 175 glEnable(GL_DEPTH_TEST);
180 } else { 176 } else {
181 glDisable(GL_DEPTH_TEST); 177 glDisable(GL_DEPTH_TEST);
182 } 178 }
183 } 179 }
184 if (depth.test_enabled && 180
185 (depth_test_changed || depth.test_func != cur_state.depth.test_func)) { 181 if (depth.test_func != cur_state.depth.test_func) {
186 glDepthFunc(depth.test_func); 182 glDepthFunc(depth.test_func);
187 } 183 }
188 // Depth mask 184
189 if (depth.write_mask != cur_state.depth.write_mask) { 185 if (depth.write_mask != cur_state.depth.write_mask) {
190 glDepthMask(depth.write_mask); 186 glDepthMask(depth.write_mask);
191 } 187 }
192} 188}
193 189
194void OpenGLState::ApplyPrimitiveRestart() const { 190void OpenGLState::ApplyPrimitiveRestart() const {
195 const bool primitive_restart_changed = 191 if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
196 primitive_restart.enabled != cur_state.primitive_restart.enabled;
197 if (primitive_restart_changed) {
198 if (primitive_restart.enabled) { 192 if (primitive_restart.enabled) {
199 glEnable(GL_PRIMITIVE_RESTART); 193 glEnable(GL_PRIMITIVE_RESTART);
200 } else { 194 } else {
201 glDisable(GL_PRIMITIVE_RESTART); 195 glDisable(GL_PRIMITIVE_RESTART);
202 } 196 }
203 } 197 }
204 if (primitive_restart_changed || 198
205 (primitive_restart.enabled && 199 if (primitive_restart.index != cur_state.primitive_restart.index) {
206 primitive_restart.index != cur_state.primitive_restart.index)) {
207 glPrimitiveRestartIndex(primitive_restart.index); 200 glPrimitiveRestartIndex(primitive_restart.index);
208 } 201 }
209} 202}
210 203
211void OpenGLState::ApplyStencilTest() const { 204void OpenGLState::ApplyStencilTest() const {
212 const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; 205 if (stencil.test_enabled != cur_state.stencil.test_enabled) {
213 if (stencil_test_changed) {
214 if (stencil.test_enabled) { 206 if (stencil.test_enabled) {
215 glEnable(GL_STENCIL_TEST); 207 glEnable(GL_STENCIL_TEST);
216 } else { 208 } else {
217 glDisable(GL_STENCIL_TEST); 209 glDisable(GL_STENCIL_TEST);
218 } 210 }
219 } 211 }
220 if (stencil.test_enabled) { 212
221 auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, 213 const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
222 const auto& prev_config) { 214 if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
223 if (stencil_test_changed || config.test_func != prev_config.test_func || 215 config.test_mask != prev_config.test_mask) {
224 config.test_ref != prev_config.test_ref || 216 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
225 config.test_mask != prev_config.test_mask) { 217 }
226 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); 218 if (config.action_depth_fail != prev_config.action_depth_fail ||
227 } 219 config.action_depth_pass != prev_config.action_depth_pass ||
228 if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || 220 config.action_stencil_fail != prev_config.action_stencil_fail) {
229 config.action_depth_pass != prev_config.action_depth_pass || 221 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
230 config.action_stencil_fail != prev_config.action_stencil_fail) { 222 config.action_depth_pass);
231 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, 223 }
232 config.action_depth_pass); 224 if (config.write_mask != prev_config.write_mask) {
233 } 225 glStencilMaskSeparate(face, config.write_mask);
234 if (config.write_mask != prev_config.write_mask) { 226 }
235 glStencilMaskSeparate(face, config.write_mask); 227 };
236 } 228 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
237 }; 229 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
238 config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
239 config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
240 }
241} 230}
242// Viewport does not affects glClearBuffer so emulate viewport using scissor test 231// Viewport does not affects glClearBuffer so emulate viewport using scissor test
243void OpenGLState::EmulateViewportWithScissor() { 232void OpenGLState::EmulateViewportWithScissor() {
@@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const {
278 updated.depth_range_far != current.depth_range_far) { 267 updated.depth_range_far != current.depth_range_far) {
279 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); 268 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
280 } 269 }
281 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 270
282 if (scissor_changed) { 271 if (updated.scissor.enabled != current.scissor.enabled) {
283 if (updated.scissor.enabled) { 272 if (updated.scissor.enabled) {
284 glEnablei(GL_SCISSOR_TEST, i); 273 glEnablei(GL_SCISSOR_TEST, i);
285 } else { 274 } else {
286 glDisablei(GL_SCISSOR_TEST, i); 275 glDisablei(GL_SCISSOR_TEST, i);
287 } 276 }
288 } 277 }
289 if (updated.scissor.enabled && 278
290 (scissor_changed || updated.scissor.x != current.scissor.x || 279 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
291 updated.scissor.y != current.scissor.y || 280 updated.scissor.width != current.scissor.width ||
292 updated.scissor.width != current.scissor.width || 281 updated.scissor.height != current.scissor.height) {
293 updated.scissor.height != current.scissor.height)) {
294 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, 282 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
295 updated.scissor.height); 283 updated.scissor.height);
296 } 284 }
@@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const {
302 updated.height != current.height) { 290 updated.height != current.height) {
303 glViewport(updated.x, updated.y, updated.width, updated.height); 291 glViewport(updated.x, updated.y, updated.width, updated.height);
304 } 292 }
293
305 if (updated.depth_range_near != current.depth_range_near || 294 if (updated.depth_range_near != current.depth_range_near ||
306 updated.depth_range_far != current.depth_range_far) { 295 updated.depth_range_far != current.depth_range_far) {
307 glDepthRange(updated.depth_range_near, updated.depth_range_far); 296 glDepthRange(updated.depth_range_near, updated.depth_range_far);
308 } 297 }
309 const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; 298
310 if (scissor_changed) { 299 if (updated.scissor.enabled != current.scissor.enabled) {
311 if (updated.scissor.enabled) { 300 if (updated.scissor.enabled) {
312 glEnable(GL_SCISSOR_TEST); 301 glEnable(GL_SCISSOR_TEST);
313 } else { 302 } else {
314 glDisable(GL_SCISSOR_TEST); 303 glDisable(GL_SCISSOR_TEST);
315 } 304 }
316 } 305 }
317 if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x || 306
318 updated.scissor.y != current.scissor.y || 307 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
319 updated.scissor.width != current.scissor.width || 308 updated.scissor.width != current.scissor.width ||
320 updated.scissor.height != current.scissor.height)) { 309 updated.scissor.height != current.scissor.height) {
321 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, 310 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
322 updated.scissor.height); 311 updated.scissor.height);
323 } 312 }
@@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const {
327void OpenGLState::ApplyGlobalBlending() const { 316void OpenGLState::ApplyGlobalBlending() const {
328 const Blend& current = cur_state.blend[0]; 317 const Blend& current = cur_state.blend[0];
329 const Blend& updated = blend[0]; 318 const Blend& updated = blend[0];
330 const bool blend_changed = updated.enabled != current.enabled; 319 if (updated.enabled != current.enabled) {
331 if (blend_changed) {
332 if (updated.enabled) { 320 if (updated.enabled) {
333 glEnable(GL_BLEND); 321 glEnable(GL_BLEND);
334 } else { 322 } else {
@@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const {
338 if (!updated.enabled) { 326 if (!updated.enabled) {
339 return; 327 return;
340 } 328 }
341 if (blend_changed || updated.src_rgb_func != current.src_rgb_func || 329 if (updated.src_rgb_func != current.src_rgb_func ||
342 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 330 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
343 updated.dst_a_func != current.dst_a_func) { 331 updated.dst_a_func != current.dst_a_func) {
344 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, 332 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
345 updated.dst_a_func); 333 updated.dst_a_func);
346 } 334 }
347 335
348 if (blend_changed || updated.rgb_equation != current.rgb_equation || 336 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
349 updated.a_equation != current.a_equation) {
350 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); 337 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
351 } 338 }
352} 339}
@@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const {
354void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { 341void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
355 const Blend& updated = blend[target]; 342 const Blend& updated = blend[target];
356 const Blend& current = cur_state.blend[target]; 343 const Blend& current = cur_state.blend[target];
357 const bool blend_changed = updated.enabled != current.enabled || force; 344 if (updated.enabled != current.enabled || force) {
358 if (blend_changed) {
359 if (updated.enabled) { 345 if (updated.enabled) {
360 glEnablei(GL_BLEND, static_cast<GLuint>(target)); 346 glEnablei(GL_BLEND, static_cast<GLuint>(target));
361 } else { 347 } else {
362 glDisablei(GL_BLEND, static_cast<GLuint>(target)); 348 glDisablei(GL_BLEND, static_cast<GLuint>(target));
363 } 349 }
364 } 350 }
365 if (!updated.enabled) { 351
366 return; 352 if (updated.src_rgb_func != current.src_rgb_func ||
367 }
368 if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
369 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 353 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
370 updated.dst_a_func != current.dst_a_func) { 354 updated.dst_a_func != current.dst_a_func) {
371 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, 355 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
372 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); 356 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
373 } 357 }
374 358
375 if (blend_changed || updated.rgb_equation != current.rgb_equation || 359 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
376 updated.a_equation != current.a_equation) {
377 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, 360 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
378 updated.a_equation); 361 updated.a_equation);
379 } 362 }
@@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const {
397} 380}
398 381
399void OpenGLState::ApplyLogicOp() const { 382void OpenGLState::ApplyLogicOp() const {
400 const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; 383 if (logic_op.enabled != cur_state.logic_op.enabled) {
401 if (logic_op_changed) {
402 if (logic_op.enabled) { 384 if (logic_op.enabled) {
403 glEnable(GL_COLOR_LOGIC_OP); 385 glEnable(GL_COLOR_LOGIC_OP);
404 } else { 386 } else {
@@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const {
406 } 388 }
407 } 389 }
408 390
409 if (logic_op.enabled && 391 if (logic_op.operation != cur_state.logic_op.operation) {
410 (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
411 glLogicOp(logic_op.operation); 392 glLogicOp(logic_op.operation);
412 } 393 }
413} 394}
414 395
415void OpenGLState::ApplyPolygonOffset() const { 396void OpenGLState::ApplyPolygonOffset() const {
416
417 const bool fill_enable_changed = 397 const bool fill_enable_changed =
418 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; 398 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
419 const bool line_enable_changed = 399 const bool line_enable_changed =
@@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const {
448 } 428 }
449 } 429 }
450 430
451 if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && 431 if (factor_changed || units_changed || clamp_changed) {
452 (factor_changed || units_changed || clamp_changed)) {
453
454 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { 432 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
455 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); 433 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
456 } else { 434 } else {
@@ -483,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
483 461
484 if (has_delta) { 462 if (has_delta) {
485 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
486 textures.data()); 464 textures.data() + first);
487 } 465 }
488} 466}
489 467
@@ -504,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
504 } 482 }
505 if (has_delta) { 483 if (has_delta) {
506 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
507 samplers.data()); 485 samplers.data() + first);
508 } 486 }
509} 487}
510 488
@@ -528,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const {
528 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { 506 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
529 return; 507 return;
530 } 508 }
531 if (depth_clamp.far_plane != depth_clamp.near_plane) { 509 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
532 UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!"); 510 "Unimplemented Depth Clamp Separation!");
533 } 511
534 if (depth_clamp.far_plane || depth_clamp.near_plane) { 512 if (depth_clamp.far_plane || depth_clamp.near_plane) {
535 glEnable(GL_DEPTH_CLAMP); 513 glEnable(GL_DEPTH_CLAMP);
536 } else { 514 } else {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index d40666ac6..b97576309 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -167,9 +167,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
167 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, 167 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
168 Memory::FlushMode::Flush); 168 Memory::FlushMode::Flush);
169 169
170 VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, 170 constexpr u32 linear_bpp = 4;
171 Memory::GetPointer(framebuffer_addr), 171 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
172 gl_framebuffer_data.data(), true); 172 framebuffer.width, framebuffer.height, bytes_per_pixel,
173 linear_bpp, Memory::GetPointer(framebuffer_addr),
174 gl_framebuffer_data.data());
173 175
174 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 176 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
175 177
@@ -244,6 +246,21 @@ void RendererOpenGL::InitOpenGLObjects() {
244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 246 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
245} 247}
246 248
249void RendererOpenGL::AddTelemetryFields() {
250 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
251 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
252 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
253
254 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
255 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
256 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
257
258 auto& telemetry_session = system.TelemetrySession();
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
260 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
261 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
262}
263
247void RendererOpenGL::CreateRasterizer() { 264void RendererOpenGL::CreateRasterizer() {
248 if (rasterizer) { 265 if (rasterizer) {
249 return; 266 return;
@@ -257,6 +274,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
257 const Tegra::FramebufferConfig& framebuffer) { 274 const Tegra::FramebufferConfig& framebuffer) {
258 texture.width = framebuffer.width; 275 texture.width = framebuffer.width;
259 texture.height = framebuffer.height; 276 texture.height = framebuffer.height;
277 texture.pixel_format = framebuffer.pixel_format;
260 278
261 GLint internal_format; 279 GLint internal_format;
262 switch (framebuffer.pixel_format) { 280 switch (framebuffer.pixel_format) {
@@ -380,7 +398,8 @@ void RendererOpenGL::CaptureScreenshot() {
380 GLuint renderbuffer; 398 GLuint renderbuffer;
381 glGenRenderbuffers(1, &renderbuffer); 399 glGenRenderbuffers(1, &renderbuffer);
382 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); 400 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
383 glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); 401 glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
402 layout.height);
384 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); 403 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
385 404
386 DrawScreen(layout); 405 DrawScreen(layout);
@@ -464,17 +483,7 @@ bool RendererOpenGL::Init() {
464 glDebugMessageCallback(DebugHandler, nullptr); 483 glDebugMessageCallback(DebugHandler, nullptr);
465 } 484 }
466 485
467 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 486 AddTelemetryFields();
468 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
469 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
470
471 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
472 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
473 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
474
475 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
476 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
477 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
478 487
479 if (!GLAD_GL_VERSION_4_3) { 488 if (!GLAD_GL_VERSION_4_3) {
480 return false; 489 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 7e13e566b..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
39/// Structure used for storing information about the display target for the Switch screen 39/// Structure used for storing information about the display target for the Switch screen
40struct ScreenInfo { 40struct ScreenInfo {
41 GLuint display_texture; 41 GLuint display_texture;
42 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; 42 const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
43 TextureInfo texture; 43 TextureInfo texture;
44}; 44};
45 45
@@ -60,6 +60,7 @@ public:
60 60
61private: 61private:
62 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
63 void CreateRasterizer(); 64 void CreateRasterizer();
64 65
65 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -102,7 +103,7 @@ private:
102 103
103 /// Used for transforming the framebuffer orientation 104 /// Used for transforming the framebuffer orientation
104 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 105 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
105 MathUtil::Rectangle<int> framebuffer_crop_rect; 106 Common::Rectangle<int> framebuffer_crop_rect;
106}; 107};
107 108
108} // namespace OpenGL 109} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..95eab3fec
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,123 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <optional>
8#include <tuple>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "core/memory.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
21 std::size_t alignment, u8* host_ptr)
22 : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
23 host_ptr} {}
24
25VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
26 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
27 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
28 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
29 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
30 vk::BufferUsageFlagBits::eIndexBuffer |
31 vk::BufferUsageFlagBits::eUniformBuffer;
32 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
33 vk::AccessFlagBits::eUniformRead;
34 stream_buffer =
35 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
36 vk::PipelineStageFlagBits::eAllCommands);
37 buffer_handle = stream_buffer->GetBuffer();
38}
39
40VKBufferCache::~VKBufferCache() = default;
41
42u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
43 bool cache) {
44 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
45 ASSERT_MSG(cpu_addr, "Invalid GPU address");
46
47 // Cache management is a big overhead, so only cache entries with a given size.
48 // TODO: Figure out which size is the best for given games.
49 cache &= size >= 2048;
50
51 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
52 if (cache) {
53 auto entry = TryGet(host_ptr);
54 if (entry) {
55 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
56 return entry->GetOffset();
57 }
58 Unregister(entry);
59 }
60 }
61
62 AlignBuffer(alignment);
63 const u64 uploaded_offset = buffer_offset;
64
65 if (!host_ptr) {
66 return uploaded_offset;
67 }
68
69 std::memcpy(buffer_ptr, host_ptr, size);
70 buffer_ptr += size;
71 buffer_offset += size;
72
73 if (cache) {
74 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
75 alignment, host_ptr);
76 Register(entry);
77 }
78
79 return uploaded_offset;
80}
81
82u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
83 AlignBuffer(alignment);
84 std::memcpy(buffer_ptr, raw_pointer, size);
85 const u64 uploaded_offset = buffer_offset;
86
87 buffer_ptr += size;
88 buffer_offset += size;
89 return uploaded_offset;
90}
91
92std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
93 AlignBuffer(alignment);
94 u8* const uploaded_ptr = buffer_ptr;
95 const u64 uploaded_offset = buffer_offset;
96
97 buffer_ptr += size;
98 buffer_offset += size;
99 return {uploaded_ptr, uploaded_offset};
100}
101
102void VKBufferCache::Reserve(std::size_t max_size) {
103 bool invalidate;
104 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
105 buffer_offset = buffer_offset_base;
106
107 if (invalidate) {
108 InvalidateAll();
109 }
110}
111
112VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
113 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
114}
115
116void VKBufferCache::AlignBuffer(std::size_t alignment) {
117 // Align the offset, not the mapped pointer
118 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
119 buffer_ptr += offset_aligned - buffer_offset;
120 buffer_offset = offset_aligned;
121}
122
123} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..8b415744b
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,104 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <tuple>
9
10#include "common/common_types.h"
11#include "video_core/gpu.h"
12#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15
16namespace Tegra {
17class MemoryManager;
18}
19
20namespace Vulkan {
21
22class VKDevice;
23class VKFence;
24class VKMemoryManager;
25class VKStreamBuffer;
26
27class CachedBufferEntry final : public RasterizerCacheObject {
28public:
29 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
30 u8* host_ptr);
31
32 VAddr GetCpuAddr() const override {
33 return cpu_addr;
34 }
35
36 std::size_t GetSizeInBytes() const override {
37 return size;
38 }
39
40 std::size_t GetSize() const {
41 return size;
42 }
43
44 u64 GetOffset() const {
45 return offset;
46 }
47
48 std::size_t GetAlignment() const {
49 return alignment;
50 }
51
52 // We do not have to flush this cache as things in it are never modified by us.
53 void Flush() override {}
54
55private:
56 VAddr cpu_addr{};
57 std::size_t size{};
58 u64 offset{};
59 std::size_t alignment{};
60};
61
62class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
63public:
64 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
65 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
66 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
67 ~VKBufferCache();
68
69 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
70 /// allocated.
71 u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
72 bool cache = true);
73
74 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
75 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
76
77 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
78 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
79
80 /// Reserves a region of memory to be used in subsequent upload/reserve operations.
81 void Reserve(std::size_t max_size);
82
83 /// Ensures that the set data is sent to the device.
84 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
85
86 /// Returns the buffer cache handle.
87 vk::Buffer GetBuffer() const {
88 return buffer_handle;
89 }
90
91private:
92 void AlignBuffer(std::size_t alignment);
93
94 Tegra::MemoryManager& tegra_memory_manager;
95
96 std::unique_ptr<VKStreamBuffer> stream_buffer;
97 vk::Buffer buffer_handle;
98
99 u8* buffer_ptr = nullptr;
100 u64 buffer_offset = 0;
101 u64 buffer_offset_base = 0;
102};
103
104} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 78a4e5f0e..00242ecbe 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
122 FormatType format_type) const { 122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format); 123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) { 124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", 125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 static_cast<u32>(wanted_format));
127 UNREACHABLE(); 126 UNREACHABLE();
128 return true; 127 return true;
129 } 128 }
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
219 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
220 }; 219 };
221 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); 220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
222 AddFormatQuery(vk::Format::eR5G6B5UnormPack16); 221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
223 AddFormatQuery(vk::Format::eD32Sfloat); 225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
224 AddFormatQuery(vk::Format::eD16UnormS8Uint); 227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
225 AddFormatQuery(vk::Format::eD24UnormS8Uint); 228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
226 AddFormatQuery(vk::Format::eD32SfloatS8Uint); 229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
227 234
228 return format_properties; 235 return format_properties;
229} 236}
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
new file mode 100644
index 000000000..0451babbf
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <tuple>
8#include <vector>
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16
17namespace Vulkan {
18
19// TODO(Rodrigo): Fine tune this number
20constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
21
22class VKMemoryAllocation final {
23public:
24 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
25 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
26 : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
27 shifted_type{ShiftType(type)}, is_mappable{properties &
28 vk::MemoryPropertyFlagBits::eHostVisible} {
29 if (is_mappable) {
30 const auto dev = device.GetLogical();
31 const auto& dld = device.GetDispatchLoader();
32 base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
33 }
34 }
35
36 ~VKMemoryAllocation() {
37 const auto dev = device.GetLogical();
38 const auto& dld = device.GetDispatchLoader();
39 if (is_mappable)
40 dev.unmapMemory(memory, dld);
41 dev.free(memory, nullptr, dld);
42 }
43
44 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
45 auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
46 static_cast<u64>(alignment));
47 if (!found) {
48 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
49 static_cast<u64>(alignment));
50 if (!found) {
51 // Signal out of memory, it'll try to do more allocations.
52 return nullptr;
53 }
54 }
55 u8* address = is_mappable ? base_address + *found : nullptr;
56 auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
57 *found + commit_size);
58 commits.push_back(commit.get());
59
60 // Last commit's address is highly probable to be free.
61 free_iterator = *found + commit_size;
62
63 return commit;
64 }
65
66 void Free(const VKMemoryCommitImpl* commit) {
67 ASSERT(commit);
68 const auto it =
69 std::find_if(commits.begin(), commits.end(),
70 [&](const auto& stored_commit) { return stored_commit == commit; });
71 if (it == commits.end()) {
72 LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
73 UNREACHABLE();
74 return;
75 }
76 commits.erase(it);
77 }
78
79 /// Returns whether this allocation is compatible with the arguments.
80 bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
81 return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
82 (type_mask & shifted_type) != 0;
83 }
84
85private:
86 static constexpr u32 ShiftType(u32 type) {
87 return 1U << type;
88 }
89
90 /// A memory allocator, it may return a free region between "start" and "end" with the solicited
91 /// requeriments.
92 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
93 u64 iterator = start;
94 while (iterator + size < end) {
95 const u64 try_left = Common::AlignUp(iterator, alignment);
96 const u64 try_right = try_left + size;
97
98 bool overlap = false;
99 for (const auto& commit : commits) {
100 const auto [commit_left, commit_right] = commit->interval;
101 if (try_left < commit_right && commit_left < try_right) {
102 // There's an overlap, continue the search where the overlapping commit ends.
103 iterator = commit_right;
104 overlap = true;
105 break;
106 }
107 }
108 if (!overlap) {
109 // A free address has been found.
110 return try_left;
111 }
112 }
113 // No free regions where found, return an empty optional.
114 return std::nullopt;
115 }
116
117 const VKDevice& device; ///< Vulkan device.
118 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
119 const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
120 const u64 alloc_size; ///< Size of this allocation.
121 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
122 const bool is_mappable; ///< Whether the allocation is mappable.
123
124 /// Base address of the mapped pointer.
125 u8* base_address{};
126
127 /// Hints where the next free region is likely going to be.
128 u64 free_iterator{};
129
130 /// Stores all commits done from this allocation.
131 std::vector<const VKMemoryCommitImpl*> commits;
132};
133
134VKMemoryManager::VKMemoryManager(const VKDevice& device)
135 : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
136 is_memory_unified{GetMemoryUnified(props)} {}
137
138VKMemoryManager::~VKMemoryManager() = default;
139
140VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
141 ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
142
143 // When a host visible commit is asked, search for host visible and coherent, otherwise search
144 // for a fast device local type.
145 const vk::MemoryPropertyFlags wanted_properties =
146 host_visible
147 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
148 : vk::MemoryPropertyFlagBits::eDeviceLocal;
149
150 const auto TryCommit = [&]() -> VKMemoryCommit {
151 for (auto& alloc : allocs) {
152 if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
153 continue;
154
155 if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
156 return commit;
157 }
158 }
159 return {};
160 };
161
162 if (auto commit = TryCommit(); commit) {
163 return commit;
164 }
165
166 // Commit has failed, allocate more memory.
167 if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
168 // TODO(Rodrigo): Try to use host memory.
169 LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
170 UNREACHABLE();
171 }
172
173 // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
174 // there's a bug.
175 auto commit = TryCommit();
176 ASSERT(commit);
177 return commit;
178}
179
180VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
181 const auto dev = device.GetLogical();
182 const auto& dld = device.GetDispatchLoader();
183 const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
184 auto commit = Commit(requeriments, host_visible);
185 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
186 return commit;
187}
188
189VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
190 const auto dev = device.GetLogical();
191 const auto& dld = device.GetDispatchLoader();
192 const auto requeriments = dev.getImageMemoryRequirements(image, dld);
193 auto commit = Commit(requeriments, host_visible);
194 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
195 return commit;
196}
197
198bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
199 u64 size) {
200 const u32 type = [&]() {
201 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
202 const auto flags = props.memoryTypes[type_index].propertyFlags;
203 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
204 // The type matches in type and in the wanted properties.
205 return type_index;
206 }
207 }
208 LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
209 UNREACHABLE();
210 return 0u;
211 }();
212
213 const auto dev = device.GetLogical();
214 const auto& dld = device.GetDispatchLoader();
215
216 // Try to allocate found type.
217 const vk::MemoryAllocateInfo memory_ai(size, type);
218 vk::DeviceMemory memory;
219 if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
220 res != vk::Result::eSuccess) {
221 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
222 return false;
223 }
224 allocs.push_back(
225 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
226 return true;
227}
228
229/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
230 for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
231 if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
232 // Memory is considered unified when heaps are device local only.
233 return false;
234 }
235 }
236 return true;
237}
238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end)
241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this);
245}
246
247u8* VKMemoryCommitImpl::GetData() const {
248 ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
249 return data;
250}
251
252} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
new file mode 100644
index 000000000..073597b35
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <utility>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKMemoryAllocation;
17class VKMemoryCommitImpl;
18
19using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
20
21class VKMemoryManager final {
22public:
23 explicit VKMemoryManager(const VKDevice& device);
24 ~VKMemoryManager();
25
26 /**
27 * Commits a memory with the specified requeriments.
28 * @param reqs Requeriments returned from a Vulkan call.
29 * @param host_visible Signals the allocator that it *must* use host visible and coherent
30 * memory. When passing false, it will try to allocate device local memory.
31 * @returns A memory commit.
32 */
33 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
34
35 /// Commits memory required by the buffer and binds it.
36 VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
37
38 /// Commits memory required by the image and binds it.
39 VKMemoryCommit Commit(vk::Image image, bool host_visible);
40
41 /// Returns true if the memory allocations are done always in host visible and coherent memory.
42 bool IsMemoryUnified() const {
43 return is_memory_unified;
44 }
45
46private:
47 /// Allocates a chunk of memory.
48 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
49
50 /// Returns true if the device uses an unified memory model.
51 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
52
53 const VKDevice& device; ///< Device handler.
54 const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
55 const bool is_memory_unified; ///< True if memory model is unified.
56 std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
57};
58
59class VKMemoryCommitImpl final {
60 friend VKMemoryAllocation;
61
62public:
63 explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
64 u64 begin, u64 end);
65 ~VKMemoryCommitImpl();
66
67 /// Returns the writeable memory map. The commit has to be mappable.
68 u8* GetData() const;
69
70 /// Returns the Vulkan memory handler.
71 vk::DeviceMemory GetMemory() const {
72 return memory;
73 }
74
75 /// Returns the start position of the commit relative to the allocation.
76 vk::DeviceSize GetOffset() const {
77 return static_cast<vk::DeviceSize>(interval.first);
78 }
79
80private:
81 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
82 vk::DeviceMemory memory; ///< Vulkan device memory handler.
83 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
84 u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
85};
86
87} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
new file mode 100644
index 000000000..a1e117443
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -0,0 +1,285 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include "common/assert.h"
8#include "common/logging/log.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_resource_manager.h"
12
13namespace Vulkan {
14
15// TODO(Rodrigo): Fine tune these numbers.
16constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
17constexpr std::size_t FENCES_GROW_STEP = 0x40;
18
19class CommandBufferPool final : public VKFencedPool {
20public:
21 CommandBufferPool(const VKDevice& device)
22 : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
23
24 void Allocate(std::size_t begin, std::size_t end) {
25 const auto dev = device.GetLogical();
26 const auto& dld = device.GetDispatchLoader();
27 const u32 graphics_family = device.GetGraphicsFamily();
28
29 auto pool = std::make_unique<Pool>();
30
31 // Command buffers are going to be commited, recorded, executed every single usage cycle.
32 // They are also going to be reseted when commited.
33 const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
34 vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
35 const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
36 pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
37
38 const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
39 vk::CommandBufferLevel::ePrimary,
40 static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
41 pool->cmdbufs =
42 dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
43
44 pools.push_back(std::move(pool));
45 }
46
47 vk::CommandBuffer Commit(VKFence& fence) {
48 const std::size_t index = CommitResource(fence);
49 const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
50 const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
51 return *pools[pool_index]->cmdbufs[sub_index];
52 }
53
54private:
55 struct Pool {
56 UniqueCommandPool handle;
57 std::vector<UniqueCommandBuffer> cmdbufs;
58 };
59
60 const VKDevice& device;
61
62 std::vector<std::unique_ptr<Pool>> pools;
63};
64
65VKResource::VKResource() = default;
66
67VKResource::~VKResource() = default;
68
69VKFence::VKFence(const VKDevice& device, UniqueFence handle)
70 : device{device}, handle{std::move(handle)} {}
71
72VKFence::~VKFence() = default;
73
74void VKFence::Wait() {
75 const auto dev = device.GetLogical();
76 const auto& dld = device.GetDispatchLoader();
77 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
78}
79
80void VKFence::Release() {
81 is_owned = false;
82}
83
84void VKFence::Commit() {
85 is_owned = true;
86 is_used = true;
87}
88
89bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
90 if (!is_used) {
91 // If a fence is not used it's always free.
92 return true;
93 }
94 if (is_owned && !owner_wait) {
95 // The fence is still being owned (Release has not been called) and ownership wait has
96 // not been asked.
97 return false;
98 }
99
100 const auto dev = device.GetLogical();
101 const auto& dld = device.GetDispatchLoader();
102 if (gpu_wait) {
103 // Wait for the fence if it has been requested.
104 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
105 } else {
106 if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
107 // Vulkan fence is not ready, not much it can do here
108 return false;
109 }
110 }
111
112 // Broadcast resources their free state.
113 for (auto* resource : protected_resources) {
114 resource->OnFenceRemoval(this);
115 }
116 protected_resources.clear();
117
118 // Prepare fence for reusage.
119 dev.resetFences({*handle}, dld);
120 is_used = false;
121 return true;
122}
123
124void VKFence::Protect(VKResource* resource) {
125 protected_resources.push_back(resource);
126}
127
128void VKFence::Unprotect(VKResource* resource) {
129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
130 ASSERT(it != protected_resources.end());
131
132 resource->OnFenceRemoval(this);
133 protected_resources.erase(it);
134}
135
136VKFenceWatch::VKFenceWatch() = default;
137
138VKFenceWatch::~VKFenceWatch() {
139 if (fence) {
140 fence->Unprotect(this);
141 }
142}
143
144void VKFenceWatch::Wait() {
145 if (fence == nullptr) {
146 return;
147 }
148 fence->Wait();
149 fence->Unprotect(this);
150}
151
152void VKFenceWatch::Watch(VKFence& new_fence) {
153 Wait();
154 fence = &new_fence;
155 fence->Protect(this);
156}
157
158bool VKFenceWatch::TryWatch(VKFence& new_fence) {
159 if (fence) {
160 return false;
161 }
162 fence = &new_fence;
163 fence->Protect(this);
164 return true;
165}
166
167void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
168 ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
169 fence = nullptr;
170}
171
172VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
173
174VKFencedPool::~VKFencedPool() = default;
175
176std::size_t VKFencedPool::CommitResource(VKFence& fence) {
177 const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
178 for (std::size_t iterator = begin; iterator < end; ++iterator) {
179 if (watches[iterator]->TryWatch(fence)) {
180 // The resource is now being watched, a free resource was successfully found.
181 return iterator;
182 }
183 }
184 return {};
185 };
186 // Try to find a free resource from the hinted position to the end.
187 auto found = Search(free_iterator, watches.size());
188 if (!found) {
189 // Search from beginning to the hinted position.
190 found = Search(0, free_iterator);
191 if (!found) {
192 // Both searches failed, the pool is full; handle it.
193 const std::size_t free_resource = ManageOverflow();
194
195 // Watch will wait for the resource to be free.
196 watches[free_resource]->Watch(fence);
197 found = free_resource;
198 }
199 }
200 // Free iterator is hinted to the resource after the one that's been commited.
201 free_iterator = (*found + 1) % watches.size();
202 return *found;
203}
204
205std::size_t VKFencedPool::ManageOverflow() {
206 const std::size_t old_capacity = watches.size();
207 Grow();
208
209 // The last entry is guaranted to be free, since it's the first element of the freshly
210 // allocated resources.
211 return old_capacity;
212}
213
214void VKFencedPool::Grow() {
215 const std::size_t old_capacity = watches.size();
216 watches.resize(old_capacity + grow_step);
217 std::generate(watches.begin() + old_capacity, watches.end(),
218 []() { return std::make_unique<VKFenceWatch>(); });
219 Allocate(old_capacity, old_capacity + grow_step);
220}
221
222VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
223 GrowFences(FENCES_GROW_STEP);
224 command_buffer_pool = std::make_unique<CommandBufferPool>(device);
225}
226
227VKResourceManager::~VKResourceManager() = default;
228
229VKFence& VKResourceManager::CommitFence() {
230 const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
231 const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
232 const auto hinted = fences.begin() + fences_iterator;
233
234 auto it = std::find_if(hinted, fences.end(), Tick);
235 if (it == fences.end()) {
236 it = std::find_if(fences.begin(), hinted, Tick);
237 if (it == hinted) {
238 return nullptr;
239 }
240 }
241 fences_iterator = std::distance(fences.begin(), it) + 1;
242 if (fences_iterator >= fences.size())
243 fences_iterator = 0;
244
245 auto& fence = *it;
246 fence->Commit();
247 return fence.get();
248 };
249
250 VKFence* found_fence = StepFences(false, false);
251 if (!found_fence) {
252 // Try again, this time waiting.
253 found_fence = StepFences(true, false);
254
255 if (!found_fence) {
256 // Allocate new fences and try again.
257 LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
258 fences.size() + FENCES_GROW_STEP);
259
260 GrowFences(FENCES_GROW_STEP);
261 found_fence = StepFences(true, false);
262 ASSERT(found_fence != nullptr);
263 }
264 }
265 return *found_fence;
266}
267
268vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
269 return command_buffer_pool->Commit(fence);
270}
271
272void VKResourceManager::GrowFences(std::size_t new_fences_count) {
273 const auto dev = device.GetLogical();
274 const auto& dld = device.GetDispatchLoader();
275 const vk::FenceCreateInfo fence_ci;
276
277 const std::size_t previous_size = fences.size();
278 fences.resize(previous_size + new_fences_count);
279
280 std::generate(fences.begin() + previous_size, fences.end(), [&]() {
281 return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
282 });
283}
284
285} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
new file mode 100644
index 000000000..5bfe4cead
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -0,0 +1,180 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9#include <vector>
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Vulkan {
13
14class VKDevice;
15class VKFence;
16class VKResourceManager;
17
18class CommandBufferPool;
19
20/// Interface for a Vulkan resource
21class VKResource {
22public:
23 explicit VKResource();
24 virtual ~VKResource();
25
26 /**
27 * Signals the object that an owning fence has been signaled.
28 * @param signaling_fence Fence that signals its usage end.
29 */
30 virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
31};
32
33/**
34 * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
35 * They must be commited from the resource manager. Their usage flow is: commit the fence from the
36 * resource manager, protect resources with it and use them, send the fence to an execution queue
37 * and Wait for it if needed and then call Release. Used resources will automatically be signaled
38 * when they are free to be reused.
39 * @brief Protects resources for concurrent usage and signals its release.
40 */
41class VKFence {
42 friend class VKResourceManager;
43
44public:
45 explicit VKFence(const VKDevice& device, UniqueFence handle);
46 ~VKFence();
47
48 /**
49 * Waits for the fence to be signaled.
50 * @warning You must have ownership of the fence and it has to be previously sent to a queue to
51 * call this function.
52 */
53 void Wait();
54
55 /**
56 * Releases ownership of the fence. Pass after it has been sent to an execution queue.
57 * Unmanaged usage of the fence after the call will result in undefined behavior because it may
58 * be being used for something else.
59 */
60 void Release();
61
62 /// Protects a resource with this fence.
63 void Protect(VKResource* resource);
64
65 /// Removes protection for a resource.
66 void Unprotect(VKResource* resource);
67
68 /// Retreives the fence.
69 operator vk::Fence() const {
70 return *handle;
71 }
72
73private:
74 /// Take ownership of the fence.
75 void Commit();
76
77 /**
78 * Updates the fence status.
79 * @warning Waiting for the owner might soft lock the execution.
80 * @param gpu_wait Wait for the fence to be signaled by the driver.
81 * @param owner_wait Wait for the owner to signal its freedom.
82 * @returns True if the fence is free. Waiting for gpu and owner will always return true.
83 */
84 bool Tick(bool gpu_wait, bool owner_wait);
85
86 const VKDevice& device; ///< Device handler
87 UniqueFence handle; ///< Vulkan fence
88 std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
89 bool is_owned = false; ///< The fence has been commited but not released yet.
90 bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
91};
92
93/**
94 * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
95 * resources without having to inherit VKResource from their handlers.
96 */
97class VKFenceWatch final : public VKResource {
98public:
99 explicit VKFenceWatch();
100 ~VKFenceWatch();
101
102 /// Waits for the fence to be released.
103 void Wait();
104
105 /**
106 * Waits for a previous fence and watches a new one.
107 * @param new_fence New fence to wait to.
108 */
109 void Watch(VKFence& new_fence);
110
111 /**
112 * Checks if it's currently being watched and starts watching it if it's available.
113 * @returns True if a watch has started, false if it's being watched.
114 */
115 bool TryWatch(VKFence& new_fence);
116
117 void OnFenceRemoval(VKFence* signaling_fence) override;
118
119private:
120 VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
121};
122
123/**
124 * Handles a pool of resources protected by fences. Manages resource overflow allocating more
125 * resources.
126 */
127class VKFencedPool {
128public:
129 explicit VKFencedPool(std::size_t grow_step);
130 virtual ~VKFencedPool();
131
132protected:
133 /**
134 * Commits a free resource and protects it with a fence. It may allocate new resources.
135 * @param fence Fence that protects the commited resource.
136 * @returns Index of the resource commited.
137 */
138 std::size_t CommitResource(VKFence& fence);
139
140 /// Called when a chunk of resources have to be allocated.
141 virtual void Allocate(std::size_t begin, std::size_t end) = 0;
142
143private:
144 /// Manages pool overflow allocating new resources.
145 std::size_t ManageOverflow();
146
147 /// Allocates a new page of resources.
148 void Grow();
149
150 std::size_t grow_step = 0; ///< Number of new resources created after an overflow
151 std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
152 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
153};
154
155/**
156 * The resource manager handles all resources that can be protected with a fence avoiding
157 * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
158 */
159class VKResourceManager final {
160public:
161 explicit VKResourceManager(const VKDevice& device);
162 ~VKResourceManager();
163
164 /// Commits a fence. It has to be sent to a queue and released.
165 VKFence& CommitFence();
166
167 /// Commits an unused command buffer and protects it with a fence.
168 vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
169
170private:
171 /// Allocates new fences.
172 void GrowFences(std::size_t new_fences_count);
173
174 const VKDevice& device; ///< Device handler.
175 std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
176 std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
177 std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
178};
179
180} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
new file mode 100644
index 000000000..ed3178f09
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <optional>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/cityhash.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
13#include "video_core/renderer_vulkan/vk_sampler_cache.h"
14#include "video_core/textures/texture.h"
15
16namespace Vulkan {
17
18static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return vk::BorderColor::eFloatTransparentBlack;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return vk::BorderColor::eFloatOpaqueBlack;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return vk::BorderColor::eFloatOpaqueWhite;
26 } else {
27 return {};
28 }
29}
30
31std::size_t SamplerCacheKey::Hash() const {
32 static_assert(sizeof(raw) % sizeof(u64) == 0);
33 return static_cast<std::size_t>(
34 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
35}
36
37bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
38 return raw == rhs.raw;
39}
40
41VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
42
43VKSamplerCache::~VKSamplerCache() = default;
44
45vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
46 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
47 auto& sampler = entry->second;
48 if (is_cache_miss) {
49 sampler = CreateSampler(tsc);
50 }
51 return *sampler;
52}
53
54UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
55 const float max_anisotropy = tsc.GetMaxAnisotropy();
56 const bool has_anisotropy = max_anisotropy > 1.0f;
57
58 const auto border_color = tsc.GetBorderColor();
59 const auto vk_border_color = TryConvertBorderColor(border_color);
60 UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
61 border_color[0], border_color[1], border_color[2], border_color[3]);
62
63 constexpr bool unnormalized_coords = false;
64
65 const vk::SamplerCreateInfo sampler_ci(
66 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
67 MaxwellToVK::Sampler::Filter(tsc.min_filter),
68 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
69 MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
70 MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
71 max_anisotropy, tsc.depth_compare_enabled,
72 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
73 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
74 unnormalized_coords);
75
76 const auto& dld = device.GetDispatchLoader();
77 const auto dev = device.GetLogical();
78 return dev.createSamplerUnique(sampler_ci, nullptr, dld);
79}
80
81} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
new file mode 100644
index 000000000..c6394dc87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/textures/texture.h"
12
13namespace Vulkan {
14
15class VKDevice;
16
17struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
18 std::size_t Hash() const;
19
20 bool operator==(const SamplerCacheKey& rhs) const;
21
22 bool operator!=(const SamplerCacheKey& rhs) const {
23 return !operator==(rhs);
24 }
25};
26
27} // namespace Vulkan
28
29namespace std {
30
31template <>
32struct hash<Vulkan::SamplerCacheKey> {
33 std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
34 return k.Hash();
35 }
36};
37
38} // namespace std
39
40namespace Vulkan {
41
42class VKSamplerCache {
43public:
44 explicit VKSamplerCache(const VKDevice& device);
45 ~VKSamplerCache();
46
47 vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
48
49private:
50 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
51
52 const VKDevice& device;
53 std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
54};
55
56} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
new file mode 100644
index 000000000..f1fea1871
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/renderer_vulkan/declarations.h"
8#include "video_core/renderer_vulkan/vk_device.h"
9#include "video_core/renderer_vulkan/vk_resource_manager.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h"
11
12namespace Vulkan {
13
14VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
15 : device{device}, resource_manager{resource_manager} {
16 next_fence = &resource_manager.CommitFence();
17 AllocateNewContext();
18}
19
20VKScheduler::~VKScheduler() = default;
21
22VKExecutionContext VKScheduler::GetExecutionContext() const {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore);
28 current_fence->Release();
29 AllocateNewContext();
30 return GetExecutionContext();
31}
32
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
34 SubmitExecution(semaphore);
35 current_fence->Wait();
36 current_fence->Release();
37 AllocateNewContext();
38 return GetExecutionContext();
39}
40
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
42 const auto& dld = device.GetDispatchLoader();
43 current_cmdbuf.end(dld);
44
45 const auto queue = device.GetGraphicsQueue();
46 const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
47 &semaphore);
48 queue.submit({submit_info}, *current_fence, dld);
49}
50
51void VKScheduler::AllocateNewContext() {
52 current_fence = next_fence;
53 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
54 next_fence = &resource_manager.CommitFence();
55
56 const auto& dld = device.GetDispatchLoader();
57 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
58}
59
60} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
new file mode 100644
index 000000000..cfaf5376f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/declarations.h"
9
10namespace Vulkan {
11
12class VKDevice;
13class VKExecutionContext;
14class VKFence;
15class VKResourceManager;
16
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler {
20public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler();
23
24 /// Gets the current execution context.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const;
26
27 /// Sends the current execution context to the GPU. It invalidates the current execution context
28 /// and returns a new one.
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
32 /// the current execution context and returns a new one.
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34
35private:
36 void SubmitExecution(vk::Semaphore semaphore);
37
38 void AllocateNewContext();
39
40 const VKDevice& device;
41 VKResourceManager& resource_manager;
42 vk::CommandBuffer current_cmdbuf;
43 VKFence* current_fence = nullptr;
44 VKFence* next_fence = nullptr;
45};
46
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
27 pipeline_stage} {
28 CreateBuffers(memory_manager, usage);
29 ReserveWatches(WATCHES_INITIAL_RESERVE);
30}
31
32VKStreamBuffer::~VKStreamBuffer() = default;
33
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
35 ASSERT(size <= buffer_size);
36 mapped_size = size;
37
38 if (offset + size > buffer_size) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and
40 // reset the state.
41 invalidation_mark = used_watches;
42 used_watches = 0;
43 offset = 0;
44 }
45
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47}
48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51
52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59
60 if (used_watches + 1 >= watches.size()) {
61 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 }
64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence());
66
67 offset += size;
68
69 return exctx;
70}
71
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
73 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
74 nullptr);
75
76 const auto dev = device.GetLogical();
77 const auto& dld = device.GetDispatchLoader();
78 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
79 commit = memory_manager.Commit(*buffer, true);
80 mapped_pointer = commit->GetData();
81}
82
83void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
84 const std::size_t previous_size = watches.size();
85 watches.resize(previous_size + grow_size);
86 std::generate(watches.begin() + previous_size, watches.end(),
87 []() { return std::make_unique<VKFenceWatch>(); });
88}
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <tuple>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler;
23
24class VKStreamBuffer {
25public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer();
30
31 /**
32 * Reserves a region of memory from the stream buffer.
33 * @param size Size to reserve.
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated.
36 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
41
42 vk::Buffer GetBuffer() const {
43 return *buffer;
44 }
45
46private:
47 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
49
50 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size);
52
53 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58
59 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62
63 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy.
65
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t>
69 invalidation_mark{}; ///< Number of watches used in the current invalidation.
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 740ac3118..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
168 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
169 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
170 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 55ec601ff..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
@@ -48,7 +30,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
48 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, 30 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
49 "Unaligned attribute loads are not supported"); 31 "Unaligned attribute loads are not supported");
50 32
51 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, 33 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
52 Tegra::Shader::IpaSampleMode::Default}; 34 Tegra::Shader::IpaSampleMode::Default};
53 35
54 u64 next_element = instr.attribute.fmt20.element; 36 u64 next_element = instr.attribute.fmt20.element;
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
247 } 229 }
248 break; 230 break;
249 } 231 }
250 case OpCode::Id::TEX: {
251 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
252 "AOFFI is not implemented");
253
254 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
255 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
256 }
257
258 const TextureType texture_type{instr.tex.texture_type};
259 const bool is_array = instr.tex.array != 0;
260 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
261 const auto process_mode = instr.tex.GetTextureProcessMode();
262 WriteTexInstructionFloat(
263 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
264 break;
265 }
266 case OpCode::Id::TEXS: {
267 const TextureType texture_type{instr.texs.GetTextureType()};
268 const bool is_array{instr.texs.IsArrayTexture()};
269 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
270 const auto process_mode = instr.texs.GetTextureProcessMode();
271
272 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
273 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
274 }
275
276 const Node4 components =
277 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
278
279 if (instr.texs.fp32_flag) {
280 WriteTexsInstructionFloat(bb, instr, components);
281 } else {
282 WriteTexsInstructionHalfFloat(bb, instr, components);
283 }
284 break;
285 }
286 case OpCode::Id::TLD4: {
287 ASSERT(instr.tld4.array == 0);
288 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
289 "AOFFI is not implemented");
290 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
291 "NDV is not implemented");
292 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
293 "PTP is not implemented");
294
295 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
296 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
297 }
298
299 const auto texture_type = instr.tld4.texture_type.Value();
300 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
301 const bool is_array = instr.tld4.array != 0;
302 WriteTexInstructionFloat(bb, instr,
303 GetTld4Code(instr, texture_type, depth_compare, is_array));
304 break;
305 }
306 case OpCode::Id::TLD4S: {
307 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
308 "AOFFI is not implemented");
309 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
310 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
311 }
312
313 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
314 const Node op_a = GetRegister(instr.gpr8);
315 const Node op_b = GetRegister(instr.gpr20);
316
317 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
318 std::vector<Node> coords;
319 if (depth_compare) {
320 // Note: TLD4S coordinate encoding works just like TEXS's
321 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
322 coords.push_back(op_a);
323 coords.push_back(op_y);
324 coords.push_back(op_b);
325 } else {
326 coords.push_back(op_a);
327 coords.push_back(op_b);
328 }
329 std::vector<Node> extras;
330 extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
331
332 const auto& sampler =
333 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
334
335 Node4 values;
336 for (u32 element = 0; element < values.size(); ++element) {
337 auto coords_copy = coords;
338 MetaTexture meta{sampler, {}, {}, extras, element};
339 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
340 }
341
342 WriteTexsInstructionFloat(bb, instr, values);
343 break;
344 }
345 case OpCode::Id::TXQ: {
346 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
347 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
348 }
349
350 // TODO: The new commits on the texture refactor, change the way samplers work.
351 // Sadly, not all texture instructions specify the type of texture their sampler
352 // uses. This must be fixed at a later instance.
353 const auto& sampler =
354 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
355
356 u32 indexer = 0;
357 switch (instr.txq.query_type) {
358 case Tegra::Shader::TextureQueryType::Dimension: {
359 for (u32 element = 0; element < 4; ++element) {
360 if (!instr.txq.IsComponentEnabled(element)) {
361 continue;
362 }
363 MetaTexture meta{sampler, {}, {}, {}, element};
364 const Node value =
365 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
366 SetTemporal(bb, indexer++, value);
367 }
368 for (u32 i = 0; i < indexer; ++i) {
369 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
370 }
371 break;
372 }
373 default:
374 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
375 static_cast<u32>(instr.txq.query_type.Value()));
376 }
377 break;
378 }
379 case OpCode::Id::TMML: {
380 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
381 "NDV is not implemented");
382
383 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
384 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
385 }
386
387 auto texture_type = instr.tmml.texture_type.Value();
388 const bool is_array = instr.tmml.array != 0;
389 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
390
391 std::vector<Node> coords;
392
393 // TODO: Add coordinates for different samplers once other texture types are implemented.
394 switch (texture_type) {
395 case TextureType::Texture1D:
396 coords.push_back(GetRegister(instr.gpr8));
397 break;
398 case TextureType::Texture2D:
399 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
400 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
401 break;
402 default:
403 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
404
405 // Fallback to interpreting as a 2D texture for now
406 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
407 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
408 texture_type = TextureType::Texture2D;
409 }
410
411 for (u32 element = 0; element < 2; ++element) {
412 auto params = coords;
413 MetaTexture meta{sampler, {}, {}, {}, element};
414 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
415 SetTemporal(bb, element, value);
416 }
417 for (u32 element = 0; element < 2; ++element) {
418 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
419 }
420
421 break;
422 }
423 case OpCode::Id::TLDS: {
424 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
425 const bool is_array{instr.tlds.IsArrayTexture()};
426
427 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
428 "AOFFI is not implemented");
429 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
430
431 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
432 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
433 }
434
435 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
436 break;
437 }
438 default: 232 default:
439 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
440 } 234 }
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
442 return pc; 236 return pc;
443} 237}
444 238
445const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
446 bool is_array, bool is_shadow) {
447 const auto offset = static_cast<std::size_t>(sampler.index.Value());
448
449 // If this sampler has already been used, return the existing mapping.
450 const auto itr =
451 std::find_if(used_samplers.begin(), used_samplers.end(),
452 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
453 if (itr != used_samplers.end()) {
454 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
455 itr->IsShadow() == is_shadow);
456 return *itr;
457 }
458
459 // Otherwise create a new mapping for this sampler
460 const std::size_t next_index = used_samplers.size();
461 const Sampler entry{offset, next_index, type, is_array, is_shadow};
462 return *used_samplers.emplace(entry).first;
463}
464
465void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
466 u32 dest_elem = 0;
467 for (u32 elem = 0; elem < 4; ++elem) {
468 if (!instr.tex.IsComponentEnabled(elem)) {
469 // Skip disabled components
470 continue;
471 }
472 SetTemporal(bb, dest_elem++, components[elem]);
473 }
474 // After writing values in temporals, move them to the real registers
475 for (u32 i = 0; i < dest_elem; ++i) {
476 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
477 }
478}
479
480void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
481 const Node4& components) {
482 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
483 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
484
485 u32 dest_elem = 0;
486 for (u32 component = 0; component < 4; ++component) {
487 if (!instr.texs.IsComponentEnabled(component))
488 continue;
489 SetTemporal(bb, dest_elem++, components[component]);
490 }
491
492 for (u32 i = 0; i < dest_elem; ++i) {
493 if (i < 2) {
494 // Write the first two swizzle components to gpr0 and gpr0+1
495 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
496 } else {
497 ASSERT(instr.texs.HasTwoDestinations());
498 // Write the rest of the swizzle components to gpr28 and gpr28+1
499 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
500 }
501 }
502}
503
504void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
505 const Node4& components) {
506 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
507 // float instruction).
508
509 Node4 values;
510 u32 dest_elem = 0;
511 for (u32 component = 0; component < 4; ++component) {
512 if (!instr.texs.IsComponentEnabled(component))
513 continue;
514 values[dest_elem++] = components[component];
515 }
516 if (dest_elem == 0)
517 return;
518
519 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
520
521 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
522 if (dest_elem <= 2) {
523 SetRegister(bb, instr.gpr0, first_value);
524 return;
525 }
526
527 SetTemporal(bb, 0, first_value);
528 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
529
530 SetRegister(bb, instr.gpr0, GetTemporal(0));
531 SetRegister(bb, instr.gpr28, GetTemporal(1));
532}
533
534Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
535 TextureProcessMode process_mode, std::vector<Node> coords,
536 Node array, Node depth_compare, u32 bias_offset) {
537 const bool is_array = array;
538 const bool is_shadow = depth_compare;
539
540 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
541 (texture_type == TextureType::TextureCube && is_array && is_shadow),
542 "This method is not supported.");
543
544 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
545
546 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
547 process_mode == TextureProcessMode::LL ||
548 process_mode == TextureProcessMode::LLA;
549
550 // LOD selection (either via bias or explicit textureLod) not supported in GL for
551 // sampler2DArrayShadow and samplerCubeArrayShadow.
552 const bool gl_lod_supported =
553 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
554 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
555
556 const OperationCode read_method =
557 lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
558
559 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
560
561 std::vector<Node> extras;
562 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
563 if (process_mode == TextureProcessMode::LZ) {
564 extras.push_back(Immediate(0.0f));
565 } else {
566 // If present, lod or bias are always stored in the register indexed by the gpr20
567 // field with an offset depending on the usage of the other registers
568 extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
569 }
570 }
571
572 Node4 values;
573 for (u32 element = 0; element < values.size(); ++element) {
574 auto copy_coords = coords;
575 MetaTexture meta{sampler, array, depth_compare, extras, element};
576 values[element] = Operation(read_method, meta, std::move(copy_coords));
577 }
578
579 return values;
580}
581
582Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
583 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
584 const bool lod_bias_enabled =
585 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
586
587 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
588 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
589 // If enabled arrays index is always stored in the gpr8 field
590 const u64 array_register = instr.gpr8.Value();
591 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
592 const u64 coord_register = array_register + (is_array ? 1 : 0);
593
594 std::vector<Node> coords;
595 for (std::size_t i = 0; i < coord_count; ++i) {
596 coords.push_back(GetRegister(coord_register + i));
597 }
598 // 1D.DC in OpenGL the 2nd component is ignored.
599 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
600 coords.push_back(Immediate(0.0f));
601 }
602
603 const Node array = is_array ? GetRegister(array_register) : nullptr;
604
605 Node dc{};
606 if (depth_compare) {
607 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
608 // or bias are used
609 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
610 dc = GetRegister(depth_register);
611 }
612
613 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
614}
615
616Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
617 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
618 const bool lod_bias_enabled =
619 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
620
621 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
622 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
623 // If enabled arrays index is always stored in the gpr8 field
624 const u64 array_register = instr.gpr8.Value();
625 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
626 const u64 coord_register = array_register + (is_array ? 1 : 0);
627 const u64 last_coord_register =
628 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
629 ? static_cast<u64>(instr.gpr20.Value())
630 : coord_register + 1;
631 const u32 bias_offset = coord_count > 2 ? 1 : 0;
632
633 std::vector<Node> coords;
634 for (std::size_t i = 0; i < coord_count; ++i) {
635 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
636 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
637 }
638
639 const Node array = is_array ? GetRegister(array_register) : nullptr;
640
641 Node dc{};
642 if (depth_compare) {
643 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
644 // or bias are used
645 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
646 dc = GetRegister(depth_register);
647 }
648
649 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
650}
651
652Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
653 bool is_array) {
654 const std::size_t coord_count = GetCoordCount(texture_type);
655 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
656 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
657
658 // If enabled arrays index is always stored in the gpr8 field
659 const u64 array_register = instr.gpr8.Value();
660 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
661 const u64 coord_register = array_register + (is_array ? 1 : 0);
662
663 std::vector<Node> coords;
664 for (size_t i = 0; i < coord_count; ++i)
665 coords.push_back(GetRegister(coord_register + i));
666
667 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
668
669 Node4 values;
670 for (u32 element = 0; element < values.size(); ++element) {
671 auto coords_copy = coords;
672 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
673 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
674 }
675
676 return values;
677}
678
679Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
680 const std::size_t type_coord_count = GetCoordCount(texture_type);
681 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
682
683 // If enabled arrays index is always stored in the gpr8 field
684 const u64 array_register = instr.gpr8.Value();
685 // if is array gpr20 is used
686 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
687
688 const u64 last_coord_register =
689 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
690 ? static_cast<u64>(instr.gpr20.Value())
691 : coord_register + 1;
692
693 std::vector<Node> coords;
694 for (std::size_t i = 0; i < type_coord_count; ++i) {
695 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
696 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
697 }
698
699 const Node array = is_array ? GetRegister(array_register) : nullptr;
700 // When lod is used always is in gpr20
701 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
702
703 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
704
705 Node4 values;
706 for (u32 element = 0; element < values.size(); ++element) {
707 auto coords_copy = coords;
708 MetaTexture meta{sampler, array, {}, {lod}, element};
709 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
710 }
711 return values;
712}
713
714std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
715 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
716 std::size_t max_coords, std::size_t max_inputs) {
717 const std::size_t coord_count = GetCoordCount(texture_type);
718
719 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
720 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
721 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
722 UNIMPLEMENTED_MSG("Unsupported Texture operation");
723 total_coord_count = std::min(total_coord_count, max_coords);
724 }
725 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
726 total_coord_count +=
727 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
728
729 return {coord_count, total_coord_count};
730}
731
732} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index f9502e3d0..d750a2936 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
135 instr.ipa.sample_mode.Value()}; 135 instr.ipa.sample_mode.Value()};
136 136
137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); 137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
138 const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); 138 Node value = attr;
139 const Tegra::Shader::Attribute::Index index = attribute.index.Value();
140 if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
141 index <= Tegra::Shader::Attribute::Index::Attribute_31) {
142 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
143 // In theory by setting them as perspective, OpenGL does the perspective correction.
144 // A way must figured to reverse the last step of it.
145 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
146 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
147 }
148 }
149 value = GetSaturatedFloat(value, instr.ipa.saturate);
139 150
140 SetRegister(bb, instr.gpr0, value); 151 SetRegister(bb, instr.gpr0, value);
141 break; 152 break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
175 return pc; 186 return pc;
176} 187}
177 188
178} // namespace VideoCommon::Shader \ No newline at end of file 189} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a99ae19bf
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,534 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) {
329 const bool is_array = array;
330 const bool is_shadow = depth_compare;
331
332 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
333 (texture_type == TextureType::TextureCube && is_array && is_shadow),
334 "This method is not supported.");
335
336 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
337
338 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
339 process_mode == TextureProcessMode::LL ||
340 process_mode == TextureProcessMode::LLA;
341
342 // LOD selection (either via bias or explicit textureLod) not supported in GL for
343 // sampler2DArrayShadow and samplerCubeArrayShadow.
344 const bool gl_lod_supported =
345 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
346 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
347
348 const OperationCode read_method =
349 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
350
351 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
352
353 Node bias = {};
354 Node lod = {};
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 switch (process_mode) {
357 case TextureProcessMode::LZ:
358 lod = Immediate(0.0f);
359 break;
360 case TextureProcessMode::LB:
361 // If present, lod or bias are always stored in the register indexed by the gpr20
362 // field with an offset depending on the usage of the other registers
363 bias = GetRegister(instr.gpr20.Value() + bias_offset);
364 break;
365 case TextureProcessMode::LL:
366 lod = GetRegister(instr.gpr20.Value() + bias_offset);
367 break;
368 default:
369 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
370 break;
371 }
372 }
373
374 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 }
380
381 return values;
382}
383
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
386 const bool lod_bias_enabled =
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
388
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
391 // If enabled arrays index is always stored in the gpr8 field
392 const u64 array_register = instr.gpr8.Value();
393 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
394 const u64 coord_register = array_register + (is_array ? 1 : 0);
395
396 std::vector<Node> coords;
397 for (std::size_t i = 0; i < coord_count; ++i) {
398 coords.push_back(GetRegister(coord_register + i));
399 }
400 // 1D.DC in OpenGL the 2nd component is ignored.
401 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
402 coords.push_back(Immediate(0.0f));
403 }
404
405 const Node array = is_array ? GetRegister(array_register) : nullptr;
406
407 Node dc{};
408 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
412 dc = GetRegister(depth_register);
413 }
414
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
416}
417
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
419 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
420 const bool lod_bias_enabled =
421 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
422
423 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
424 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
425 // If enabled arrays index is always stored in the gpr8 field
426 const u64 array_register = instr.gpr8.Value();
427 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
428 const u64 coord_register = array_register + (is_array ? 1 : 0);
429 const u64 last_coord_register =
430 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
431 ? static_cast<u64>(instr.gpr20.Value())
432 : coord_register + 1;
433 const u32 bias_offset = coord_count > 2 ? 1 : 0;
434
435 std::vector<Node> coords;
436 for (std::size_t i = 0; i < coord_count; ++i) {
437 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
438 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
439 }
440
441 const Node array = is_array ? GetRegister(array_register) : nullptr;
442
443 Node dc{};
444 if (depth_compare) {
445 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
446 // or bias are used
447 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
448 dc = GetRegister(depth_register);
449 }
450
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
452}
453
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) {
456 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
459
460 // If enabled arrays index is always stored in the gpr8 field
461 const u64 array_register = instr.gpr8.Value();
462 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
463 const u64 coord_register = array_register + (is_array ? 1 : 0);
464
465 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i)
467 coords.push_back(GetRegister(coord_register + i));
468
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470
471 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 }
477
478 return values;
479}
480
481Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
482 const std::size_t type_coord_count = GetCoordCount(texture_type);
483 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
484
485 // If enabled arrays index is always stored in the gpr8 field
486 const u64 array_register = instr.gpr8.Value();
487 // if is array gpr20 is used
488 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
489
490 const u64 last_coord_register =
491 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
492 ? static_cast<u64>(instr.gpr20.Value())
493 : coord_register + 1;
494
495 std::vector<Node> coords;
496 for (std::size_t i = 0; i < type_coord_count; ++i) {
497 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
498 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
499 }
500
501 const Node array = is_array ? GetRegister(array_register) : nullptr;
502 // When lod is used always is in gpr20
503 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
504
505 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
506
507 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 }
513 return values;
514}
515
516std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
517 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
518 std::size_t max_coords, std::size_t max_inputs) {
519 const std::size_t coord_count = GetCoordCount(texture_type);
520
521 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
522 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
523 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
524 UNIMPLEMENTED_MSG("Unsupported Texture operation");
525 total_coord_count = std::min(total_coord_count, max_coords);
526 }
527 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
528 total_coord_count +=
529 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
530
531 return {coord_count, total_coord_count};
532}
533
534} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 52c7f2c4e..5bc3a3900 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -290,7 +290,9 @@ struct MetaTexture {
290 const Sampler& sampler; 290 const Sampler& sampler;
291 Node array{}; 291 Node array{};
292 Node depth_compare{}; 292 Node depth_compare{};
293 std::vector<Node> extras; 293 Node bias{};
294 Node lod{};
295 Node component{};
294 u32 element{}; 296 u32 element{};
295}; 297};
296 298
@@ -614,6 +616,7 @@ private:
614 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 616 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
615 u32 DecodeConversion(NodeBlock& bb, u32 pc); 617 u32 DecodeConversion(NodeBlock& bb, u32 pc);
616 u32 DecodeMemory(NodeBlock& bb, u32 pc); 618 u32 DecodeMemory(NodeBlock& bb, u32 pc);
619 u32 DecodeTexture(NodeBlock& bb, u32 pc);
617 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 620 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
618 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 621 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
619 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 622 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index be4635342..33b071747 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
20 return {node, cursor}; 20 return {node, cursor};
21 } 21 }
22 if (const auto conditional = std::get_if<ConditionalNode>(node)) { 22 if (const auto conditional = std::get_if<ConditionalNode>(node)) {
23 const auto& code = conditional->GetCode(); 23 const auto& conditional_code = conditional->GetCode();
24 const auto [found, internal_cursor] = 24 const auto [found, internal_cursor] = FindOperation(
25 FindOperation(code, static_cast<s64>(code.size() - 1), operation_code); 25 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
26 if (found) 26 if (found)
27 return {found, cursor}; 27 return {found, cursor};
28 } 28 }
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
58 return nullptr; 58 return nullptr;
59 } 59 }
60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { 60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
61 const auto& code = conditional->GetCode(); 61 const auto& conditional_code = conditional->GetCode();
62 return TrackCbuf(tracked, code, static_cast<s64>(code.size())); 62 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
63 } 63 }
64 return nullptr; 64 return nullptr;
65} 65}
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 044ba116a..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
89 89
90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
91 switch (format) { 91 switch (format) {
92 // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
93 // gamma.
94 case Tegra::RenderTargetFormat::RGBA8_SRGB: 92 case Tegra::RenderTargetFormat::RGBA8_SRGB:
95 return PixelFormat::RGBA8_SRGB; 93 return PixelFormat::RGBA8_SRGB;
96 case Tegra::RenderTargetFormat::RGBA8_UNORM: 94 case Tegra::RenderTargetFormat::RGBA8_UNORM:
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/textures/astc.h"
14#include "video_core/textures/convert.h"
15
16namespace Tegra::Texture {
17
18using VideoCore::Surface::PixelFormat;
19
20template <bool reverse>
21void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
22 union S8Z24 {
23 BitField<0, 24, u32> z24;
24 BitField<24, 8, u32> s8;
25 };
26 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
27
28 union Z24S8 {
29 BitField<0, 8, u32> s8;
30 BitField<8, 24, u32> z24;
31 };
32 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
33
34 S8Z24 s8z24_pixel{};
35 Z24S8 z24s8_pixel{};
36 constexpr auto bpp{
37 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
38 for (std::size_t y = 0; y < height; ++y) {
39 for (std::size_t x = 0; x < width; ++x) {
40 const std::size_t offset{bpp * (y * width + x)};
41 if constexpr (reverse) {
42 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
43 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
44 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
45 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
46 } else {
47 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
48 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
49 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
50 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
51 }
52 }
53 }
54}
55
56static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
57 SwapS8Z24ToZ24S8<false>(data, width, height);
58}
59
60static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
61 SwapS8Z24ToZ24S8<true>(data, width, height);
62}
63
64void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
65 bool convert_astc, bool convert_s8z24) {
66 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
67 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
68 u32 block_width{};
69 u32 block_height{};
70 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
71 const std::vector<u8> rgba8_data =
72 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
73 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
74
75 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
76 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
77 }
78}
79
80void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
81 bool convert_astc, bool convert_s8z24) {
82 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
83 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
84 static_cast<u32>(pixel_format));
85 UNREACHABLE();
86
87 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
88 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
89 }
90}
91
92} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace Tegra::Texture {
11
12void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
13 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
14
15void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 105 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 106 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 107 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 109 }
110 pixel_base += stride_x; 110 pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 154 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 155 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 156 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 157 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format.
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 20void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 26std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 27 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
55 u32 block_height); 48 u32 block_height);
49
56/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 52 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0fc5530f2..93ecc6e31 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
@@ -282,34 +283,62 @@ enum class TextureMipmapFilter : u32 {
282 283
283struct TSCEntry { 284struct TSCEntry {
284 union { 285 union {
285 BitField<0, 3, WrapMode> wrap_u; 286 struct {
286 BitField<3, 3, WrapMode> wrap_v; 287 union {
287 BitField<6, 3, WrapMode> wrap_p; 288 BitField<0, 3, WrapMode> wrap_u;
288 BitField<9, 1, u32> depth_compare_enabled; 289 BitField<3, 3, WrapMode> wrap_v;
289 BitField<10, 3, DepthCompareFunc> depth_compare_func; 290 BitField<6, 3, WrapMode> wrap_p;
290 BitField<13, 1, u32> srgb_conversion; 291 BitField<9, 1, u32> depth_compare_enabled;
291 BitField<20, 3, u32> max_anisotropy; 292 BitField<10, 3, DepthCompareFunc> depth_compare_func;
293 BitField<13, 1, u32> srgb_conversion;
294 BitField<20, 3, u32> max_anisotropy;
295 };
296 union {
297 BitField<0, 2, TextureFilter> mag_filter;
298 BitField<4, 2, TextureFilter> min_filter;
299 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
300 BitField<9, 1, u32> cubemap_interface_filtering;
301 BitField<12, 13, u32> mip_lod_bias;
302 };
303 union {
304 BitField<0, 12, u32> min_lod_clamp;
305 BitField<12, 12, u32> max_lod_clamp;
306 BitField<24, 8, u32> srgb_border_color_r;
307 };
308 union {
309 BitField<12, 8, u32> srgb_border_color_g;
310 BitField<20, 8, u32> srgb_border_color_b;
311 };
312 std::array<f32, 4> border_color;
313 };
314 std::array<u8, 0x20> raw;
292 }; 315 };
293 union { 316
294 BitField<0, 2, TextureFilter> mag_filter; 317 float GetMaxAnisotropy() const {
295 BitField<4, 2, TextureFilter> min_filter; 318 return static_cast<float>(1U << max_anisotropy);
296 BitField<6, 2, TextureMipmapFilter> mip_filter; 319 }
297 BitField<9, 1, u32> cubemap_interface_filtering; 320
298 BitField<12, 13, u32> mip_lod_bias; 321 float GetMinLod() const {
299 }; 322 return static_cast<float>(min_lod_clamp) / 256.0f;
300 union { 323 }
301 BitField<0, 12, u32> min_lod_clamp; 324
302 BitField<12, 12, u32> max_lod_clamp; 325 float GetMaxLod() const {
303 BitField<24, 8, u32> srgb_border_color_r; 326 return static_cast<float>(max_lod_clamp) / 256.0f;
304 }; 327 }
305 union { 328
306 BitField<12, 8, u32> srgb_border_color_g; 329 float GetLodBias() const {
307 BitField<20, 8, u32> srgb_border_color_b; 330 // Sign extend the 13-bit value.
308 }; 331 constexpr u32 mask = 1U << (13 - 1);
309 float border_color_r; 332 return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
310 float border_color_g; 333 }
311 float border_color_b; 334
312 float border_color_a; 335 std::array<float, 4> GetBorderColor() const {
336 if (srgb_conversion) {
337 return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
338 srgb_border_color_b / 255.0f, border_color[3]};
339 }
340 return border_color;
341 }
313}; 342};
314static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 343static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
315 344
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <future>
9#include <string> 7#include <string>
10 8
11namespace WebService { 9namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..40da1a4e2 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/web_result.h" 12#include "common/web_result.h"
13#include "core/settings.h"
14#include "web_service/web_backend.h" 13#include "web_service/web_backend.h"
15 14
16namespace WebService { 15namespace WebService {
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..979b9ec14 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 73b04b749..d2c97b1f8 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,10 +20,7 @@
20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
21 21
22void EmuThread::run() { 22void EmuThread::run() {
23 if (!Settings::values.use_multi_core) { 23 render_window->MakeCurrent();
24 // Single core mode must acquire OpenGL context for entire emulation session
25 render_window->MakeCurrent();
26 }
27 24
28 MicroProfileOnThreadCreate("EmuThread"); 25 MicroProfileOnThreadCreate("EmuThread");
29 26
@@ -38,6 +35,11 @@ void EmuThread::run() {
38 35
39 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 36 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
40 37
38 if (Settings::values.use_asynchronous_gpu_emulation) {
39 // Release OpenGL context for the GPU thread
40 render_window->DoneCurrent();
41 }
42
41 // holds whether the cpu was running during the last iteration, 43 // holds whether the cpu was running during the last iteration,
42 // so that the DebugModeLeft signal can be emitted before the 44 // so that the DebugModeLeft signal can be emitted before the
43 // next execution step 45 // next execution step
@@ -121,7 +123,6 @@ GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
121 setAttribute(Qt::WA_AcceptTouchEvents); 123 setAttribute(Qt::WA_AcceptTouchEvents);
122 124
123 InputCommon::Init(); 125 InputCommon::Init();
124 InputCommon::StartJoystickEventHandler();
125 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent), 126 connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent),
126 &GMainWindow::OnLoadComplete); 127 &GMainWindow::OnLoadComplete);
127} 128}
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 5f0896f84..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,15 +53,15 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
61 button(QWizard::CancelButton)->setVisible(false); 61 button(QWizard::CancelButton)->setVisible(false);
62 62
63 testcase_watcher.setFuture(QtConcurrent::run( 63 testcase_watcher.setFuture(QtConcurrent::run(
64 [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); 64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
65 break; 65 break;
66 default: 66 default:
67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); 67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9546dadf..4650f96a3 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -209,7 +209,7 @@ void Config::ReadPlayerValues() {
209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 209 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
210 auto& player = Settings::values.players[p]; 210 auto& player = Settings::values.players[p];
211 211
212 player.connected = qt_config->value(QString("player_%1_connected").arg(p), false).toBool(); 212 player.connected = ReadSetting(QString("player_%1_connected").arg(p), false).toBool();
213 213
214 player.type = static_cast<Settings::ControllerType>( 214 player.type = static_cast<Settings::ControllerType>(
215 qt_config 215 qt_config
@@ -269,7 +269,7 @@ void Config::ReadPlayerValues() {
269} 269}
270 270
271void Config::ReadDebugValues() { 271void Config::ReadDebugValues() {
272 Settings::values.debug_pad_enabled = qt_config->value("debug_pad_enabled", false).toBool(); 272 Settings::values.debug_pad_enabled = ReadSetting("debug_pad_enabled", false).toBool();
273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 273 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); 274 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
275 Settings::values.debug_pad_buttons[i] = 275 Settings::values.debug_pad_buttons[i] =
@@ -298,7 +298,7 @@ void Config::ReadDebugValues() {
298} 298}
299 299
300void Config::ReadKeyboardValues() { 300void Config::ReadKeyboardValues() {
301 Settings::values.keyboard_enabled = qt_config->value("keyboard_enabled", false).toBool(); 301 Settings::values.keyboard_enabled = ReadSetting("keyboard_enabled", false).toBool();
302 302
303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(), 303 std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam); 304 Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -311,7 +311,7 @@ void Config::ReadKeyboardValues() {
311} 311}
312 312
313void Config::ReadMouseValues() { 313void Config::ReadMouseValues() {
314 Settings::values.mouse_enabled = qt_config->value("mouse_enabled", false).toBool(); 314 Settings::values.mouse_enabled = ReadSetting("mouse_enabled", false).toBool();
315 315
316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 316 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]); 317 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
@@ -327,16 +327,14 @@ void Config::ReadMouseValues() {
327} 327}
328 328
329void Config::ReadTouchscreenValues() { 329void Config::ReadTouchscreenValues() {
330 Settings::values.touchscreen.enabled = qt_config->value("touchscreen_enabled", true).toBool(); 330 Settings::values.touchscreen.enabled = ReadSetting("touchscreen_enabled", true).toBool();
331 Settings::values.touchscreen.device = 331 Settings::values.touchscreen.device =
332 qt_config->value("touchscreen_device", "engine:emu_window").toString().toStdString(); 332 ReadSetting("touchscreen_device", "engine:emu_window").toString().toStdString();
333 333
334 Settings::values.touchscreen.finger = qt_config->value("touchscreen_finger", 0).toUInt(); 334 Settings::values.touchscreen.finger = ReadSetting("touchscreen_finger", 0).toUInt();
335 Settings::values.touchscreen.rotation_angle = qt_config->value("touchscreen_angle", 0).toUInt(); 335 Settings::values.touchscreen.rotation_angle = ReadSetting("touchscreen_angle", 0).toUInt();
336 Settings::values.touchscreen.diameter_x = 336 Settings::values.touchscreen.diameter_x = ReadSetting("touchscreen_diameter_x", 15).toUInt();
337 qt_config->value("touchscreen_diameter_x", 15).toUInt(); 337 Settings::values.touchscreen.diameter_y = ReadSetting("touchscreen_diameter_y", 15).toUInt();
338 Settings::values.touchscreen.diameter_y =
339 qt_config->value("touchscreen_diameter_y", 15).toUInt();
340 qt_config->endGroup(); 338 qt_config->endGroup();
341} 339}
342 340
@@ -357,40 +355,41 @@ void Config::ReadValues() {
357 ReadTouchscreenValues(); 355 ReadTouchscreenValues();
358 356
359 Settings::values.motion_device = 357 Settings::values.motion_device =
360 qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01") 358 ReadSetting("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
361 .toString() 359 .toString()
362 .toStdString(); 360 .toStdString();
363 361
364 qt_config->beginGroup("Core"); 362 qt_config->beginGroup("Core");
365 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 363 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
366 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 364 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
367 qt_config->endGroup(); 365 qt_config->endGroup();
368 366
369 qt_config->beginGroup("Renderer"); 367 qt_config->beginGroup("Renderer");
370 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); 368 Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat();
371 Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool(); 369 Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
372 Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt(); 370 Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
373 Settings::values.use_disk_shader_cache = 371 Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool();
374 qt_config->value("use_disk_shader_cache", false).toBool();
375 Settings::values.use_accurate_gpu_emulation = 372 Settings::values.use_accurate_gpu_emulation =
376 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 373 ReadSetting("use_accurate_gpu_emulation", false).toBool();
374 Settings::values.use_asynchronous_gpu_emulation =
375 ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
377 376
378 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 377 Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
379 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 378 Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
380 Settings::values.bg_blue = qt_config->value("bg_blue", 0.0).toFloat(); 379 Settings::values.bg_blue = ReadSetting("bg_blue", 0.0).toFloat();
381 qt_config->endGroup(); 380 qt_config->endGroup();
382 381
383 qt_config->beginGroup("Audio"); 382 qt_config->beginGroup("Audio");
384 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); 383 Settings::values.sink_id = ReadSetting("output_engine", "auto").toString().toStdString();
385 Settings::values.enable_audio_stretching = 384 Settings::values.enable_audio_stretching =
386 qt_config->value("enable_audio_stretching", true).toBool(); 385 ReadSetting("enable_audio_stretching", true).toBool();
387 Settings::values.audio_device_id = 386 Settings::values.audio_device_id =
388 qt_config->value("output_device", "auto").toString().toStdString(); 387 ReadSetting("output_device", "auto").toString().toStdString();
389 Settings::values.volume = qt_config->value("volume", 1).toFloat(); 388 Settings::values.volume = ReadSetting("volume", 1).toFloat();
390 qt_config->endGroup(); 389 qt_config->endGroup();
391 390
392 qt_config->beginGroup("Data Storage"); 391 qt_config->beginGroup("Data Storage");
393 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 392 Settings::values.use_virtual_sd = ReadSetting("use_virtual_sd", true).toBool();
394 FileUtil::GetUserPath( 393 FileUtil::GetUserPath(
395 FileUtil::UserPath::NANDDir, 394 FileUtil::UserPath::NANDDir,
396 qt_config 395 qt_config
@@ -408,30 +407,30 @@ void Config::ReadValues() {
408 qt_config->endGroup(); 407 qt_config->endGroup();
409 408
410 qt_config->beginGroup("Core"); 409 qt_config->beginGroup("Core");
411 Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool(); 410 Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
412 Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool(); 411 Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
413 qt_config->endGroup(); 412 qt_config->endGroup();
414 413
415 qt_config->beginGroup("System"); 414 qt_config->beginGroup("System");
416 Settings::values.use_docked_mode = qt_config->value("use_docked_mode", false).toBool(); 415 Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
417 Settings::values.enable_nfc = qt_config->value("enable_nfc", true).toBool(); 416 Settings::values.enable_nfc = ReadSetting("enable_nfc", true).toBool();
418 417
419 Settings::values.current_user = std::clamp<int>(qt_config->value("current_user", 0).toInt(), 0, 418 Settings::values.current_user =
420 Service::Account::MAX_USERS - 1); 419 std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
421 420
422 Settings::values.language_index = qt_config->value("language_index", 1).toInt(); 421 Settings::values.language_index = ReadSetting("language_index", 1).toInt();
423 422
424 const auto rng_seed_enabled = qt_config->value("rng_seed_enabled", false).toBool(); 423 const auto rng_seed_enabled = ReadSetting("rng_seed_enabled", false).toBool();
425 if (rng_seed_enabled) { 424 if (rng_seed_enabled) {
426 Settings::values.rng_seed = qt_config->value("rng_seed", 0).toULongLong(); 425 Settings::values.rng_seed = ReadSetting("rng_seed", 0).toULongLong();
427 } else { 426 } else {
428 Settings::values.rng_seed = std::nullopt; 427 Settings::values.rng_seed = std::nullopt;
429 } 428 }
430 429
431 const auto custom_rtc_enabled = qt_config->value("custom_rtc_enabled", false).toBool(); 430 const auto custom_rtc_enabled = ReadSetting("custom_rtc_enabled", false).toBool();
432 if (custom_rtc_enabled) { 431 if (custom_rtc_enabled) {
433 Settings::values.custom_rtc = 432 Settings::values.custom_rtc =
434 std::chrono::seconds(qt_config->value("custom_rtc", 0).toULongLong()); 433 std::chrono::seconds(ReadSetting("custom_rtc", 0).toULongLong());
435 } else { 434 } else {
436 Settings::values.custom_rtc = std::nullopt; 435 Settings::values.custom_rtc = std::nullopt;
437 } 436 }
@@ -439,35 +438,35 @@ void Config::ReadValues() {
439 qt_config->endGroup(); 438 qt_config->endGroup();
440 439
441 qt_config->beginGroup("Miscellaneous"); 440 qt_config->beginGroup("Miscellaneous");
442 Settings::values.log_filter = qt_config->value("log_filter", "*:Info").toString().toStdString(); 441 Settings::values.log_filter = ReadSetting("log_filter", "*:Info").toString().toStdString();
443 Settings::values.use_dev_keys = qt_config->value("use_dev_keys", false).toBool(); 442 Settings::values.use_dev_keys = ReadSetting("use_dev_keys", false).toBool();
444 qt_config->endGroup(); 443 qt_config->endGroup();
445 444
446 qt_config->beginGroup("Debugging"); 445 qt_config->beginGroup("Debugging");
447 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool(); 446 Settings::values.use_gdbstub = ReadSetting("use_gdbstub", false).toBool();
448 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt(); 447 Settings::values.gdbstub_port = ReadSetting("gdbstub_port", 24689).toInt();
449 Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString(); 448 Settings::values.program_args = ReadSetting("program_args", "").toString().toStdString();
450 Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool(); 449 Settings::values.dump_exefs = ReadSetting("dump_exefs", false).toBool();
451 Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool(); 450 Settings::values.dump_nso = ReadSetting("dump_nso", false).toBool();
452 qt_config->endGroup(); 451 qt_config->endGroup();
453 452
454 qt_config->beginGroup("WebService"); 453 qt_config->beginGroup("WebService");
455 Settings::values.enable_telemetry = qt_config->value("enable_telemetry", true).toBool(); 454 Settings::values.enable_telemetry = ReadSetting("enable_telemetry", true).toBool();
456 Settings::values.web_api_url = 455 Settings::values.web_api_url =
457 qt_config->value("web_api_url", "https://api.yuzu-emu.org").toString().toStdString(); 456 ReadSetting("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
458 Settings::values.yuzu_username = qt_config->value("yuzu_username").toString().toStdString(); 457 Settings::values.yuzu_username = ReadSetting("yuzu_username").toString().toStdString();
459 Settings::values.yuzu_token = qt_config->value("yuzu_token").toString().toStdString(); 458 Settings::values.yuzu_token = ReadSetting("yuzu_token").toString().toStdString();
460 qt_config->endGroup(); 459 qt_config->endGroup();
461 460
462 const auto size = qt_config->beginReadArray("DisabledAddOns"); 461 const auto size = qt_config->beginReadArray("DisabledAddOns");
463 for (int i = 0; i < size; ++i) { 462 for (int i = 0; i < size; ++i) {
464 qt_config->setArrayIndex(i); 463 qt_config->setArrayIndex(i);
465 const auto title_id = qt_config->value("title_id", 0).toULongLong(); 464 const auto title_id = ReadSetting("title_id", 0).toULongLong();
466 std::vector<std::string> out; 465 std::vector<std::string> out;
467 const auto d_size = qt_config->beginReadArray("disabled"); 466 const auto d_size = qt_config->beginReadArray("disabled");
468 for (int j = 0; j < d_size; ++j) { 467 for (int j = 0; j < d_size; ++j) {
469 qt_config->setArrayIndex(j); 468 qt_config->setArrayIndex(j);
470 out.push_back(qt_config->value("d", "").toString().toStdString()); 469 out.push_back(ReadSetting("d", "").toString().toStdString());
471 } 470 }
472 qt_config->endArray(); 471 qt_config->endArray();
473 Settings::values.disabled_addons.insert_or_assign(title_id, out); 472 Settings::values.disabled_addons.insert_or_assign(title_id, out);
@@ -475,41 +474,38 @@ void Config::ReadValues() {
475 qt_config->endArray(); 474 qt_config->endArray();
476 475
477 qt_config->beginGroup("UI"); 476 qt_config->beginGroup("UI");
478 UISettings::values.theme = qt_config->value("theme", UISettings::themes[0].second).toString(); 477 UISettings::values.theme = ReadSetting("theme", UISettings::themes[0].second).toString();
479 UISettings::values.enable_discord_presence = 478 UISettings::values.enable_discord_presence =
480 qt_config->value("enable_discord_presence", true).toBool(); 479 ReadSetting("enable_discord_presence", true).toBool();
481 UISettings::values.screenshot_resolution_factor = 480 UISettings::values.screenshot_resolution_factor =
482 static_cast<u16>(qt_config->value("screenshot_resolution_factor", 0).toUInt()); 481 static_cast<u16>(ReadSetting("screenshot_resolution_factor", 0).toUInt());
483 UISettings::values.select_user_on_boot = 482 UISettings::values.select_user_on_boot = ReadSetting("select_user_on_boot", false).toBool();
484 qt_config->value("select_user_on_boot", false).toBool();
485 483
486 qt_config->beginGroup("UIGameList"); 484 qt_config->beginGroup("UIGameList");
487 UISettings::values.show_unknown = qt_config->value("show_unknown", true).toBool(); 485 UISettings::values.show_unknown = ReadSetting("show_unknown", true).toBool();
488 UISettings::values.show_add_ons = qt_config->value("show_add_ons", true).toBool(); 486 UISettings::values.show_add_ons = ReadSetting("show_add_ons", true).toBool();
489 UISettings::values.icon_size = qt_config->value("icon_size", 64).toUInt(); 487 UISettings::values.icon_size = ReadSetting("icon_size", 64).toUInt();
490 UISettings::values.row_1_text_id = qt_config->value("row_1_text_id", 3).toUInt(); 488 UISettings::values.row_1_text_id = ReadSetting("row_1_text_id", 3).toUInt();
491 UISettings::values.row_2_text_id = qt_config->value("row_2_text_id", 2).toUInt(); 489 UISettings::values.row_2_text_id = ReadSetting("row_2_text_id", 2).toUInt();
492 qt_config->endGroup(); 490 qt_config->endGroup();
493 491
494 qt_config->beginGroup("UILayout"); 492 qt_config->beginGroup("UILayout");
495 UISettings::values.geometry = qt_config->value("geometry").toByteArray(); 493 UISettings::values.geometry = ReadSetting("geometry").toByteArray();
496 UISettings::values.state = qt_config->value("state").toByteArray(); 494 UISettings::values.state = ReadSetting("state").toByteArray();
497 UISettings::values.renderwindow_geometry = 495 UISettings::values.renderwindow_geometry = ReadSetting("geometryRenderWindow").toByteArray();
498 qt_config->value("geometryRenderWindow").toByteArray(); 496 UISettings::values.gamelist_header_state = ReadSetting("gameListHeaderState").toByteArray();
499 UISettings::values.gamelist_header_state =
500 qt_config->value("gameListHeaderState").toByteArray();
501 UISettings::values.microprofile_geometry = 497 UISettings::values.microprofile_geometry =
502 qt_config->value("microProfileDialogGeometry").toByteArray(); 498 ReadSetting("microProfileDialogGeometry").toByteArray();
503 UISettings::values.microprofile_visible = 499 UISettings::values.microprofile_visible =
504 qt_config->value("microProfileDialogVisible", false).toBool(); 500 ReadSetting("microProfileDialogVisible", false).toBool();
505 qt_config->endGroup(); 501 qt_config->endGroup();
506 502
507 qt_config->beginGroup("Paths"); 503 qt_config->beginGroup("Paths");
508 UISettings::values.roms_path = qt_config->value("romsPath").toString(); 504 UISettings::values.roms_path = ReadSetting("romsPath").toString();
509 UISettings::values.symbols_path = qt_config->value("symbolsPath").toString(); 505 UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
510 UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString(); 506 UISettings::values.gamedir = ReadSetting("gameListRootDir", ".").toString();
511 UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool(); 507 UISettings::values.gamedir_deepscan = ReadSetting("gameListDeepScan", false).toBool();
512 UISettings::values.recent_files = qt_config->value("recentFiles").toStringList(); 508 UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
513 qt_config->endGroup(); 509 qt_config->endGroup();
514 510
515 qt_config->beginGroup("Shortcuts"); 511 qt_config->beginGroup("Shortcuts");
@@ -522,8 +518,8 @@ void Config::ReadValues() {
522 qt_config->beginGroup(hotkey); 518 qt_config->beginGroup(hotkey);
523 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut( 519 UISettings::values.shortcuts.emplace_back(UISettings::Shortcut(
524 group + "/" + hotkey, 520 group + "/" + hotkey,
525 UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(), 521 UISettings::ContextualShortcut(ReadSetting("KeySeq").toString(),
526 qt_config->value("Context").toInt()))); 522 ReadSetting("Context").toInt())));
527 qt_config->endGroup(); 523 qt_config->endGroup();
528 } 524 }
529 525
@@ -531,16 +527,16 @@ void Config::ReadValues() {
531 } 527 }
532 qt_config->endGroup(); 528 qt_config->endGroup();
533 529
534 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool(); 530 UISettings::values.single_window_mode = ReadSetting("singleWindowMode", true).toBool();
535 UISettings::values.fullscreen = qt_config->value("fullscreen", false).toBool(); 531 UISettings::values.fullscreen = ReadSetting("fullscreen", false).toBool();
536 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool(); 532 UISettings::values.display_titlebar = ReadSetting("displayTitleBars", true).toBool();
537 UISettings::values.show_filter_bar = qt_config->value("showFilterBar", true).toBool(); 533 UISettings::values.show_filter_bar = ReadSetting("showFilterBar", true).toBool();
538 UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool(); 534 UISettings::values.show_status_bar = ReadSetting("showStatusBar", true).toBool();
539 UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool(); 535 UISettings::values.confirm_before_closing = ReadSetting("confirmClose", true).toBool();
540 UISettings::values.first_start = qt_config->value("firstStart", true).toBool(); 536 UISettings::values.first_start = ReadSetting("firstStart", true).toBool();
541 UISettings::values.callout_flags = qt_config->value("calloutFlags", 0).toUInt(); 537 UISettings::values.callout_flags = ReadSetting("calloutFlags", 0).toUInt();
542 UISettings::values.show_console = qt_config->value("showConsole", false).toBool(); 538 UISettings::values.show_console = ReadSetting("showConsole", false).toBool();
543 UISettings::values.profile_index = qt_config->value("profileIndex", 0).toUInt(); 539 UISettings::values.profile_index = ReadSetting("profileIndex", 0).toUInt();
544 540
545 ApplyDefaultProfileIfInputInvalid(); 541 ApplyDefaultProfileIfInputInvalid();
546 542
@@ -551,62 +547,79 @@ void Config::SavePlayerValues() {
551 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) { 547 for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
552 const auto& player = Settings::values.players[p]; 548 const auto& player = Settings::values.players[p];
553 549
554 qt_config->setValue(QString("player_%1_connected").arg(p), player.connected); 550 WriteSetting(QString("player_%1_connected").arg(p), player.connected, false);
555 qt_config->setValue(QString("player_%1_type").arg(p), static_cast<u8>(player.type)); 551 WriteSetting(QString("player_%1_type").arg(p), static_cast<u8>(player.type),
552 static_cast<u8>(Settings::ControllerType::DualJoycon));
556 553
557 qt_config->setValue(QString("player_%1_body_color_left").arg(p), player.body_color_left); 554 WriteSetting(QString("player_%1_body_color_left").arg(p), player.body_color_left,
558 qt_config->setValue(QString("player_%1_body_color_right").arg(p), player.body_color_right); 555 Settings::JOYCON_BODY_NEON_BLUE);
559 qt_config->setValue(QString("player_%1_button_color_left").arg(p), 556 WriteSetting(QString("player_%1_body_color_right").arg(p), player.body_color_right,
560 player.button_color_left); 557 Settings::JOYCON_BODY_NEON_RED);
561 qt_config->setValue(QString("player_%1_button_color_right").arg(p), 558 WriteSetting(QString("player_%1_button_color_left").arg(p), player.button_color_left,
562 player.button_color_right); 559 Settings::JOYCON_BUTTONS_NEON_BLUE);
560 WriteSetting(QString("player_%1_button_color_right").arg(p), player.button_color_right,
561 Settings::JOYCON_BUTTONS_NEON_RED);
563 562
564 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 563 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
565 qt_config->setValue(QString("player_%1_").arg(p) + 564 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
566 QString::fromStdString(Settings::NativeButton::mapping[i]), 565 WriteSetting(QString("player_%1_").arg(p) +
567 QString::fromStdString(player.buttons[i])); 566 QString::fromStdString(Settings::NativeButton::mapping[i]),
567 QString::fromStdString(player.buttons[i]),
568 QString::fromStdString(default_param));
568 } 569 }
569 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 570 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
570 qt_config->setValue(QString("player_%1_").arg(p) + 571 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
571 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 572 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
572 QString::fromStdString(player.analogs[i])); 573 default_analogs[i][3], default_analogs[i][4], 0.5f);
574 WriteSetting(QString("player_%1_").arg(p) +
575 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
576 QString::fromStdString(player.analogs[i]),
577 QString::fromStdString(default_param));
573 } 578 }
574 } 579 }
575} 580}
576 581
577void Config::SaveDebugValues() { 582void Config::SaveDebugValues() {
578 qt_config->setValue("debug_pad_enabled", Settings::values.debug_pad_enabled); 583 WriteSetting("debug_pad_enabled", Settings::values.debug_pad_enabled, false);
579 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { 584 for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
580 qt_config->setValue(QString("debug_pad_") + 585 std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
581 QString::fromStdString(Settings::NativeButton::mapping[i]), 586 WriteSetting(QString("debug_pad_") +
582 QString::fromStdString(Settings::values.debug_pad_buttons[i])); 587 QString::fromStdString(Settings::NativeButton::mapping[i]),
588 QString::fromStdString(Settings::values.debug_pad_buttons[i]),
589 QString::fromStdString(default_param));
583 } 590 }
584 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { 591 for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
585 qt_config->setValue(QString("debug_pad_") + 592 std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
586 QString::fromStdString(Settings::NativeAnalog::mapping[i]), 593 default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
587 QString::fromStdString(Settings::values.debug_pad_analogs[i])); 594 default_analogs[i][3], default_analogs[i][4], 0.5f);
595 WriteSetting(QString("debug_pad_") +
596 QString::fromStdString(Settings::NativeAnalog::mapping[i]),
597 QString::fromStdString(Settings::values.debug_pad_analogs[i]),
598 QString::fromStdString(default_param));
588 } 599 }
589} 600}
590 601
591void Config::SaveMouseValues() { 602void Config::SaveMouseValues() {
592 qt_config->setValue("mouse_enabled", Settings::values.mouse_enabled); 603 WriteSetting("mouse_enabled", Settings::values.mouse_enabled, false);
593 604
594 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) { 605 for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
595 qt_config->setValue(QString("mouse_") + 606 std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
596 QString::fromStdString(Settings::NativeMouseButton::mapping[i]), 607 WriteSetting(QString("mouse_") +
597 QString::fromStdString(Settings::values.mouse_buttons[i])); 608 QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
609 QString::fromStdString(Settings::values.mouse_buttons[i]),
610 QString::fromStdString(default_param));
598 } 611 }
599} 612}
600 613
601void Config::SaveTouchscreenValues() { 614void Config::SaveTouchscreenValues() {
602 qt_config->setValue("touchscreen_enabled", Settings::values.touchscreen.enabled); 615 WriteSetting("touchscreen_enabled", Settings::values.touchscreen.enabled, true);
603 qt_config->setValue("touchscreen_device", 616 WriteSetting("touchscreen_device", QString::fromStdString(Settings::values.touchscreen.device),
604 QString::fromStdString(Settings::values.touchscreen.device)); 617 "engine:emu_window");
605 618
606 qt_config->setValue("touchscreen_finger", Settings::values.touchscreen.finger); 619 WriteSetting("touchscreen_finger", Settings::values.touchscreen.finger, 0);
607 qt_config->setValue("touchscreen_angle", Settings::values.touchscreen.rotation_angle); 620 WriteSetting("touchscreen_angle", Settings::values.touchscreen.rotation_angle, 0);
608 qt_config->setValue("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x); 621 WriteSetting("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x, 15);
609 qt_config->setValue("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y); 622 WriteSetting("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y, 15);
610} 623}
611 624
612void Config::SaveValues() { 625void Config::SaveValues() {
@@ -617,89 +630,96 @@ void Config::SaveValues() {
617 SaveMouseValues(); 630 SaveMouseValues();
618 SaveTouchscreenValues(); 631 SaveTouchscreenValues();
619 632
620 qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device)); 633 WriteSetting("motion_device", QString::fromStdString(Settings::values.motion_device),
621 qt_config->setValue("keyboard_enabled", Settings::values.keyboard_enabled); 634 "engine:motion_emu,update_period:100,sensitivity:0.01");
635 WriteSetting("keyboard_enabled", Settings::values.keyboard_enabled, false);
622 636
623 qt_config->endGroup(); 637 qt_config->endGroup();
624 638
625 qt_config->beginGroup("Core"); 639 qt_config->beginGroup("Core");
626 qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit); 640 WriteSetting("use_cpu_jit", Settings::values.use_cpu_jit, true);
627 qt_config->setValue("use_multi_core", Settings::values.use_multi_core); 641 WriteSetting("use_multi_core", Settings::values.use_multi_core, false);
628 qt_config->endGroup(); 642 qt_config->endGroup();
629 643
630 qt_config->beginGroup("Renderer"); 644 qt_config->beginGroup("Renderer");
631 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); 645 WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0);
632 qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit); 646 WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
633 qt_config->setValue("frame_limit", Settings::values.frame_limit); 647 WriteSetting("frame_limit", Settings::values.frame_limit, 100);
634 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); 648 WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true);
635 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 649 WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
650 WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
651 false);
636 652
637 // Cast to double because Qt's written float values are not human-readable 653 // Cast to double because Qt's written float values are not human-readable
638 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 654 WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
639 qt_config->setValue("bg_green", (double)Settings::values.bg_green); 655 WriteSetting("bg_green", (double)Settings::values.bg_green, 0.0);
640 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 656 WriteSetting("bg_blue", (double)Settings::values.bg_blue, 0.0);
641 qt_config->endGroup(); 657 qt_config->endGroup();
642 658
643 qt_config->beginGroup("Audio"); 659 qt_config->beginGroup("Audio");
644 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); 660 WriteSetting("output_engine", QString::fromStdString(Settings::values.sink_id), "auto");
645 qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); 661 WriteSetting("enable_audio_stretching", Settings::values.enable_audio_stretching, true);
646 qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id)); 662 WriteSetting("output_device", QString::fromStdString(Settings::values.audio_device_id), "auto");
647 qt_config->setValue("volume", Settings::values.volume); 663 WriteSetting("volume", Settings::values.volume, 1.0f);
648 qt_config->endGroup(); 664 qt_config->endGroup();
649 665
650 qt_config->beginGroup("Data Storage"); 666 qt_config->beginGroup("Data Storage");
651 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 667 WriteSetting("use_virtual_sd", Settings::values.use_virtual_sd, true);
652 qt_config->setValue("nand_directory", 668 WriteSetting("nand_directory",
653 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir))); 669 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
654 qt_config->setValue("sdmc_directory", 670 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
655 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir))); 671 WriteSetting("sdmc_directory",
672 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
673 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
656 qt_config->endGroup(); 674 qt_config->endGroup();
657 675
658 qt_config->beginGroup("System"); 676 qt_config->beginGroup("System");
659 qt_config->setValue("use_docked_mode", Settings::values.use_docked_mode); 677 WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
660 qt_config->setValue("enable_nfc", Settings::values.enable_nfc); 678 WriteSetting("enable_nfc", Settings::values.enable_nfc, true);
661 qt_config->setValue("current_user", Settings::values.current_user); 679 WriteSetting("current_user", Settings::values.current_user, 0);
662 qt_config->setValue("language_index", Settings::values.language_index); 680 WriteSetting("language_index", Settings::values.language_index, 1);
663 681
664 qt_config->setValue("rng_seed_enabled", Settings::values.rng_seed.has_value()); 682 WriteSetting("rng_seed_enabled", Settings::values.rng_seed.has_value(), false);
665 qt_config->setValue("rng_seed", Settings::values.rng_seed.value_or(0)); 683 WriteSetting("rng_seed", Settings::values.rng_seed.value_or(0), 0);
666 684
667 qt_config->setValue("custom_rtc_enabled", Settings::values.custom_rtc.has_value()); 685 WriteSetting("custom_rtc_enabled", Settings::values.custom_rtc.has_value(), false);
668 qt_config->setValue("custom_rtc", 686 WriteSetting("custom_rtc",
669 QVariant::fromValue<long long>( 687 QVariant::fromValue<long long>(
670 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count())); 688 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
689 0);
671 690
672 qt_config->endGroup(); 691 qt_config->endGroup();
673 692
674 qt_config->beginGroup("Miscellaneous"); 693 qt_config->beginGroup("Miscellaneous");
675 qt_config->setValue("log_filter", QString::fromStdString(Settings::values.log_filter)); 694 WriteSetting("log_filter", QString::fromStdString(Settings::values.log_filter), "*:Info");
676 qt_config->setValue("use_dev_keys", Settings::values.use_dev_keys); 695 WriteSetting("use_dev_keys", Settings::values.use_dev_keys, false);
677 qt_config->endGroup(); 696 qt_config->endGroup();
678 697
679 qt_config->beginGroup("Debugging"); 698 qt_config->beginGroup("Debugging");
680 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub); 699 WriteSetting("use_gdbstub", Settings::values.use_gdbstub, false);
681 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port); 700 WriteSetting("gdbstub_port", Settings::values.gdbstub_port, 24689);
682 qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args)); 701 WriteSetting("program_args", QString::fromStdString(Settings::values.program_args), "");
683 qt_config->setValue("dump_exefs", Settings::values.dump_exefs); 702 WriteSetting("dump_exefs", Settings::values.dump_exefs, false);
684 qt_config->setValue("dump_nso", Settings::values.dump_nso); 703 WriteSetting("dump_nso", Settings::values.dump_nso, false);
685 qt_config->endGroup(); 704 qt_config->endGroup();
686 705
687 qt_config->beginGroup("WebService"); 706 qt_config->beginGroup("WebService");
688 qt_config->setValue("enable_telemetry", Settings::values.enable_telemetry); 707 WriteSetting("enable_telemetry", Settings::values.enable_telemetry, true);
689 qt_config->setValue("web_api_url", QString::fromStdString(Settings::values.web_api_url)); 708 WriteSetting("web_api_url", QString::fromStdString(Settings::values.web_api_url),
690 qt_config->setValue("yuzu_username", QString::fromStdString(Settings::values.yuzu_username)); 709 "https://api.yuzu-emu.org");
691 qt_config->setValue("yuzu_token", QString::fromStdString(Settings::values.yuzu_token)); 710 WriteSetting("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
711 WriteSetting("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
692 qt_config->endGroup(); 712 qt_config->endGroup();
693 713
694 qt_config->beginWriteArray("DisabledAddOns"); 714 qt_config->beginWriteArray("DisabledAddOns");
695 int i = 0; 715 int i = 0;
696 for (const auto& elem : Settings::values.disabled_addons) { 716 for (const auto& elem : Settings::values.disabled_addons) {
697 qt_config->setArrayIndex(i); 717 qt_config->setArrayIndex(i);
698 qt_config->setValue("title_id", QVariant::fromValue<u64>(elem.first)); 718 WriteSetting("title_id", QVariant::fromValue<u64>(elem.first), 0);
699 qt_config->beginWriteArray("disabled"); 719 qt_config->beginWriteArray("disabled");
700 for (std::size_t j = 0; j < elem.second.size(); ++j) { 720 for (std::size_t j = 0; j < elem.second.size(); ++j) {
701 qt_config->setArrayIndex(static_cast<int>(j)); 721 qt_config->setArrayIndex(static_cast<int>(j));
702 qt_config->setValue("d", QString::fromStdString(elem.second[j])); 722 WriteSetting("d", QString::fromStdString(elem.second[j]), "");
703 } 723 }
704 qt_config->endArray(); 724 qt_config->endArray();
705 ++i; 725 ++i;
@@ -707,60 +727,86 @@ void Config::SaveValues() {
707 qt_config->endArray(); 727 qt_config->endArray();
708 728
709 qt_config->beginGroup("UI"); 729 qt_config->beginGroup("UI");
710 qt_config->setValue("theme", UISettings::values.theme); 730 WriteSetting("theme", UISettings::values.theme, UISettings::themes[0].second);
711 qt_config->setValue("enable_discord_presence", UISettings::values.enable_discord_presence); 731 WriteSetting("enable_discord_presence", UISettings::values.enable_discord_presence, true);
712 qt_config->setValue("screenshot_resolution_factor", 732 WriteSetting("screenshot_resolution_factor", UISettings::values.screenshot_resolution_factor,
713 UISettings::values.screenshot_resolution_factor); 733 0);
714 qt_config->setValue("select_user_on_boot", UISettings::values.select_user_on_boot); 734 WriteSetting("select_user_on_boot", UISettings::values.select_user_on_boot, false);
715 735
716 qt_config->beginGroup("UIGameList"); 736 qt_config->beginGroup("UIGameList");
717 qt_config->setValue("show_unknown", UISettings::values.show_unknown); 737 WriteSetting("show_unknown", UISettings::values.show_unknown, true);
718 qt_config->setValue("show_add_ons", UISettings::values.show_add_ons); 738 WriteSetting("show_add_ons", UISettings::values.show_add_ons, true);
719 qt_config->setValue("icon_size", UISettings::values.icon_size); 739 WriteSetting("icon_size", UISettings::values.icon_size, 64);
720 qt_config->setValue("row_1_text_id", UISettings::values.row_1_text_id); 740 WriteSetting("row_1_text_id", UISettings::values.row_1_text_id, 3);
721 qt_config->setValue("row_2_text_id", UISettings::values.row_2_text_id); 741 WriteSetting("row_2_text_id", UISettings::values.row_2_text_id, 2);
722 qt_config->endGroup(); 742 qt_config->endGroup();
723 743
724 qt_config->beginGroup("UILayout"); 744 qt_config->beginGroup("UILayout");
725 qt_config->setValue("geometry", UISettings::values.geometry); 745 WriteSetting("geometry", UISettings::values.geometry);
726 qt_config->setValue("state", UISettings::values.state); 746 WriteSetting("state", UISettings::values.state);
727 qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry); 747 WriteSetting("geometryRenderWindow", UISettings::values.renderwindow_geometry);
728 qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state); 748 WriteSetting("gameListHeaderState", UISettings::values.gamelist_header_state);
729 qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry); 749 WriteSetting("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
730 qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible); 750 WriteSetting("microProfileDialogVisible", UISettings::values.microprofile_visible, false);
731 qt_config->endGroup(); 751 qt_config->endGroup();
732 752
733 qt_config->beginGroup("Paths"); 753 qt_config->beginGroup("Paths");
734 qt_config->setValue("romsPath", UISettings::values.roms_path); 754 WriteSetting("romsPath", UISettings::values.roms_path);
735 qt_config->setValue("symbolsPath", UISettings::values.symbols_path); 755 WriteSetting("symbolsPath", UISettings::values.symbols_path);
736 qt_config->setValue("screenshotPath", UISettings::values.screenshot_path); 756 WriteSetting("screenshotPath", UISettings::values.screenshot_path);
737 qt_config->setValue("gameListRootDir", UISettings::values.gamedir); 757 WriteSetting("gameListRootDir", UISettings::values.gamedir, ".");
738 qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan); 758 WriteSetting("gameListDeepScan", UISettings::values.gamedir_deepscan, false);
739 qt_config->setValue("recentFiles", UISettings::values.recent_files); 759 WriteSetting("recentFiles", UISettings::values.recent_files);
740 qt_config->endGroup(); 760 qt_config->endGroup();
741 761
742 qt_config->beginGroup("Shortcuts"); 762 qt_config->beginGroup("Shortcuts");
743 for (auto shortcut : UISettings::values.shortcuts) { 763 for (auto shortcut : UISettings::values.shortcuts) {
744 qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first); 764 WriteSetting(shortcut.first + "/KeySeq", shortcut.second.first);
745 qt_config->setValue(shortcut.first + "/Context", shortcut.second.second); 765 WriteSetting(shortcut.first + "/Context", shortcut.second.second);
746 } 766 }
747 qt_config->endGroup(); 767 qt_config->endGroup();
748 768
749 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode); 769 WriteSetting("singleWindowMode", UISettings::values.single_window_mode, true);
750 qt_config->setValue("fullscreen", UISettings::values.fullscreen); 770 WriteSetting("fullscreen", UISettings::values.fullscreen, false);
751 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar); 771 WriteSetting("displayTitleBars", UISettings::values.display_titlebar, true);
752 qt_config->setValue("showFilterBar", UISettings::values.show_filter_bar); 772 WriteSetting("showFilterBar", UISettings::values.show_filter_bar, true);
753 qt_config->setValue("showStatusBar", UISettings::values.show_status_bar); 773 WriteSetting("showStatusBar", UISettings::values.show_status_bar, true);
754 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing); 774 WriteSetting("confirmClose", UISettings::values.confirm_before_closing, true);
755 qt_config->setValue("firstStart", UISettings::values.first_start); 775 WriteSetting("firstStart", UISettings::values.first_start, true);
756 qt_config->setValue("calloutFlags", UISettings::values.callout_flags); 776 WriteSetting("calloutFlags", UISettings::values.callout_flags, 0);
757 qt_config->setValue("showConsole", UISettings::values.show_console); 777 WriteSetting("showConsole", UISettings::values.show_console, false);
758 qt_config->setValue("profileIndex", UISettings::values.profile_index); 778 WriteSetting("profileIndex", UISettings::values.profile_index, 0);
759 qt_config->endGroup(); 779 qt_config->endGroup();
760} 780}
761 781
782QVariant Config::ReadSetting(const QString& name) const {
783 return qt_config->value(name);
784}
785
786QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) const {
787 QVariant result;
788 if (qt_config->value(name + "/default", false).toBool()) {
789 result = default_value;
790 } else {
791 result = qt_config->value(name, default_value);
792 }
793 return result;
794}
795
796void Config::WriteSetting(const QString& name, const QVariant& value) {
797 qt_config->setValue(name, value);
798}
799
800void Config::WriteSetting(const QString& name, const QVariant& value,
801 const QVariant& default_value) {
802 qt_config->setValue(name + "/default", value == default_value);
803 qt_config->setValue(name, value);
804}
805
762void Config::Reload() { 806void Config::Reload() {
763 ReadValues(); 807 ReadValues();
808 // To apply default value changes
809 SaveValues();
764 Settings::Apply(); 810 Settings::Apply();
765} 811}
766 812
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index e73ad19bb..f4185db18 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,6 +42,11 @@ private:
42 void SaveMouseValues(); 42 void SaveMouseValues();
43 void SaveTouchscreenValues(); 43 void SaveTouchscreenValues();
44 44
45 QVariant ReadSetting(const QString& name) const;
46 QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
47 void WriteSetting(const QString& name, const QVariant& value);
48 void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
49
45 std::unique_ptr<QSettings> qt_config; 50 std::unique_ptr<QSettings> qt_config;
46 std::string qt_config_loc; 51 std::string qt_config_loc;
47}; 52};
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 0f5dd534b..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
75 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
78 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
79 Settings::values.bg_blue)); 81 Settings::values.bg_blue));
80} 82}
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
86 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
87 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
88 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
89 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
90 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
91 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 824f5810a..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -64,6 +64,13 @@
64 </widget> 64 </widget>
65 </item> 65 </item>
66 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
67 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
68 <item> 75 <item>
69 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 209798521..71683da8e 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
398 398
399 for (unsigned int y = 0; y < surface_height; ++y) { 399 for (unsigned int y = 0; y < surface_height; ++y) {
400 for (unsigned int x = 0; x < surface_width; ++x) { 400 for (unsigned int x = 0; x < surface_width; ++x) {
401 Math::Vec4<u8> color; 401 Common::Vec4<u8> color;
402 color[0] = texture_data[x + y * surface_width + 0]; 402 color[0] = texture_data[x + y * surface_width + 0];
403 color[1] = texture_data[x + y * surface_width + 1]; 403 color[1] = texture_data[x + y * surface_width + 1];
404 color[2] = texture_data[x + y * surface_width + 2]; 404 color[2] = texture_data[x + y * surface_width + 2];
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index f50225d5f..06ad74ffe 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const {
81 return text; 81 return text;
82} 82}
83 83
84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { 84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
85 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 85 : mutex_address(mutex_address) {
86
87 mutex_value = Memory::Read32(mutex_address); 86 mutex_value = Memory::Read32(mutex_address);
88 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); 87 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
89 owner = handle_table.Get<Kernel::Thread>(owner_handle); 88 owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -316,7 +315,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
316 315
317 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 316 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
318 if (mutex_wait_address != 0) { 317 if (mutex_wait_address != 0) {
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); 318 const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
320 } else { 320 } else {
321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); 321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
322 } 322 }
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 365c3dbfe..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,6 +17,7 @@
17class EmuThread; 17class EmuThread;
18 18
19namespace Kernel { 19namespace Kernel {
20class HandleTable;
20class ReadableEvent; 21class ReadableEvent;
21class WaitObject; 22class WaitObject;
22class Thread; 23class Thread;
@@ -72,7 +73,7 @@ public:
72class WaitTreeMutexInfo : public WaitTreeExpandableItem { 73class WaitTreeMutexInfo : public WaitTreeExpandableItem {
73 Q_OBJECT 74 Q_OBJECT
74public: 75public:
75 explicit WaitTreeMutexInfo(VAddr mutex_address); 76 explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
76 ~WaitTreeMutexInfo() override; 77 ~WaitTreeMutexInfo() override;
77 78
78 QString GetText() const override; 79 QString GetText() const override;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 1d460c189..41ba3c4c6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,6 +11,7 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
@@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
339 .arg(QString::fromStdString(std::to_string(key_code)))); 340 .arg(QString::fromStdString(std::to_string(key_code))));
340 }; 341 };
341 342
343 QMessageBox::information(
344 this, tr("Exit"),
345 tr("To exit the web application, use the game provided controls to select exit, select the "
346 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
347
342 bool running_exit_check = false; 348 bool running_exit_check = false;
343 while (!finished) { 349 while (!finished) {
344 QApplication::processEvents(); 350 QApplication::processEvents();
@@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() {
522 Qt::ApplicationShortcut); 528 Qt::ApplicationShortcut);
523 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", 529 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
524 QKeySequence(QKeySequence::Print)); 530 QKeySequence(QKeySequence::Print));
531 hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10));
525 532
526 hotkey_registry.LoadHotkeys(); 533 hotkey_registry.LoadHotkeys();
527 534
@@ -561,7 +568,10 @@ void GMainWindow::InitializeHotkeys() {
561 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 568 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
562 UpdateStatusBar(); 569 UpdateStatusBar();
563 }); 570 });
564 constexpr u16 SPEED_LIMIT_STEP = 5; 571 // TODO: Remove this comment/static whenever the next major release of
572 // MSVC occurs and we make it a requirement (see:
573 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
574 static constexpr u16 SPEED_LIMIT_STEP = 5;
565 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), 575 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
566 &QShortcut::activated, this, [&] { 576 &QShortcut::activated, this, [&] {
567 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 577 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -588,6 +598,12 @@ void GMainWindow::InitializeHotkeys() {
588 OnCaptureScreenshot(); 598 OnCaptureScreenshot();
589 } 599 }
590 }); 600 });
601 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
602 &QShortcut::activated, this, [&] {
603 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
604 OnDockedModeChanged(!Settings::values.use_docked_mode,
605 Settings::values.use_docked_mode);
606 });
591} 607}
592 608
593void GMainWindow::SetDefaultUIGeometry() { 609void GMainWindow::SetDefaultUIGeometry() {
@@ -846,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
846 } 862 }
847 game_path = filename; 863 game_path = filename;
848 864
849 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 865 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
850 return true; 866 return true;
851} 867}
852 868
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ff05b3179..32e78049c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -346,7 +346,7 @@ void Config::ReadValues() {
346 346
347 // Renderer 347 // Renderer
348 Settings::values.resolution_factor = 348 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 349 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -354,17 +354,20 @@ void Config::ReadValues() {
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
355 Settings::values.use_accurate_gpu_emulation = 355 Settings::values.use_accurate_gpu_emulation =
356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
357 Settings::values.use_asynchronous_gpu_emulation =
358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
357 359
358 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 360 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
359 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 361 Settings::values.bg_green =
360 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 362 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
363 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
361 364
362 // Audio 365 // Audio
363 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 366 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
364 Settings::values.enable_audio_stretching = 367 Settings::values.enable_audio_stretching =
365 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 368 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
366 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 369 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
367 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 370 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
368 371
369 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 372 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
370 373
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index a81986f8e..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -118,6 +118,10 @@ use_disk_shader_cache =
118# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
119use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
120 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
121# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
122# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
123bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7df8eff53..de7a26e14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -135,16 +135,16 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
135} 135}
136 136
137EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { 137EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
138 InputCommon::Init();
139
140 SDL_SetMainReady();
141
142 // Initialize the window 138 // Initialize the window
143 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { 139 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
144 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); 140 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
145 exit(1); 141 exit(1);
146 } 142 }
147 143
144 InputCommon::Init();
145
146 SDL_SetMainReady();
147
148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); 148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
149 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); 149 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
150 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); 150 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
@@ -201,11 +201,9 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
201} 201}
202 202
203EmuWindow_SDL2::~EmuWindow_SDL2() { 203EmuWindow_SDL2::~EmuWindow_SDL2() {
204 InputCommon::SDL::CloseSDLJoysticks(); 204 InputCommon::Shutdown();
205 SDL_GL_DeleteContext(gl_context); 205 SDL_GL_DeleteContext(gl_context);
206 SDL_Quit(); 206 SDL_Quit();
207
208 InputCommon::Shutdown();
209} 207}
210 208
211void EmuWindow_SDL2::SwapBuffers() { 209void EmuWindow_SDL2::SwapBuffers() {
@@ -262,7 +260,6 @@ void EmuWindow_SDL2::PollEvents() {
262 is_open = false; 260 is_open = false;
263 break; 261 break;
264 default: 262 default:
265 InputCommon::SDL::HandleGameControllerEvent(event);
266 break; 263 break;
267 } 264 }
268 } 265 }
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c34b5467f..c6c66a787 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -216,7 +216,7 @@ int main(int argc, char** argv) {
216 } 216 }
217 } 217 }
218 218
219 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
220 220
221 system.Renderer().Rasterizer().LoadDiskResources(); 221 system.Renderer().Rasterizer().LoadDiskResources();
222 222