summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/audio_renderer.h14
-rw-r--r--src/audio_core/cubeb_sink.cpp15
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/common/bit_field.h16
-rw-r--r--src/common/color.h40
-rw-r--r--src/common/logging/backend.cpp50
-rw-r--r--src/common/logging/backend.h4
-rw-r--r--src/common/math_util.h4
-rw-r--r--src/common/quaternion.h10
-rw-r--r--src/common/vector_math.h4
-rw-r--r--src/core/CMakeLists.txt1
-rw-r--r--src/core/core.cpp32
-rw-r--r--src/core/core.h4
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/frontend/framebuffer_layout.cpp12
-rw-r--r--src/core/frontend/framebuffer_layout.h2
-rw-r--r--src/core/frontend/input.h2
-rw-r--r--src/core/hle/ipc.h4
-rw-r--r--src/core/hle/ipc_helpers.h2
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp147
-rw-r--r--src/core/hle/kernel/address_arbiter.h74
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h9
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/handle_table.cpp40
-rw-r--r--src/core/hle/kernel/handle_table.h25
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/kernel.cpp30
-rw-r--r--src/core/hle/kernel/kernel.h26
-rw-r--r--src/core/hle/kernel/process.cpp8
-rw-r--r--src/core/hle/kernel/process_capability.cpp4
-rw-r--r--src/core/hle/kernel/process_capability.h4
-rw-r--r--src/core/hle/kernel/server_session.cpp91
-rw-r--r--src/core/hle/kernel/server_session.h53
-rw-r--r--src/core/hle/kernel/shared_memory.cpp5
-rw-r--r--src/core/hle/kernel/svc.cpp40
-rw-r--r--src/core/hle/kernel/thread.cpp2
-rw-r--r--src/core/hle/kernel/vm_manager.cpp47
-rw-r--r--src/core/hle/kernel/vm_manager.h24
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/audio/audout_u.cpp17
-rw-r--r--src/core/hle/service/audio/audren_u.cpp64
-rw-r--r--src/core/hle/service/audio/audren_u.h3
-rw-r--r--src/core/hle/service/audio/errors.h15
-rw-r--r--src/core/hle/service/hid/hid.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h4
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp2
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/vi/vi.cpp2
-rw-r--r--src/core/memory.cpp31
-rw-r--r--src/core/settings.cpp3
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/input_common/motion_emu.cpp28
-rw-r--r--src/tests/core/arm/arm_test_common.cpp6
-rw-r--r--src/video_core/CMakeLists.txt16
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp15
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp3
-rw-r--r--src/video_core/engines/kepler_compute.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp66
-rw-r--r--src/video_core/engines/maxwell_3d.h17
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h1
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h59
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp152
-rw-r--r--src/video_core/gpu_thread.h133
-rw-r--r--src/video_core/rasterizer_cache.h18
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp83
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp294
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h119
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp28
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp116
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/decoders.h18
-rw-r--r--src/web_service/verify_login.h2
-rw-r--r--src/web_service/web_backend.cpp1
-rw-r--r--src/yuzu/applets/web_browser.cpp2
-rw-r--r--src/yuzu/bootmanager.cpp10
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/config.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp2
-rw-r--r--src/yuzu/debugger/wait_tree.cpp8
-rw-r--r--src/yuzu/debugger/wait_tree.h3
-rw-r--r--src/yuzu/main.cpp20
-rw-r--r--src/yuzu_cmd/config.cpp13
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
128 files changed, 2716 insertions, 836 deletions
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 201ec7a3c..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -46,16 +46,18 @@ struct AudioRendererParameter {
46 u32_le sample_rate; 46 u32_le sample_rate;
47 u32_le sample_count; 47 u32_le sample_count;
48 u32_le mix_buffer_count; 48 u32_le mix_buffer_count;
49 u32_le unknown_c; 49 u32_le submix_count;
50 u32_le voice_count; 50 u32_le voice_count;
51 u32_le sink_count; 51 u32_le sink_count;
52 u32_le effect_count; 52 u32_le effect_count;
53 u32_le unknown_1c; 53 u32_le performance_frame_count;
54 u8 unknown_20; 54 u8 is_voice_drop_enabled;
55 INSERT_PADDING_BYTES(3); 55 u8 unknown_21;
56 u8 unknown_22;
57 u8 execution_mode;
56 u32_le splitter_count; 58 u32_le splitter_count;
57 u32_le unknown_2c; 59 u32_le num_splitter_send_channels;
58 INSERT_PADDING_WORDS(1); 60 u32_le unknown_30;
59 u32_le revision; 61 u32_le revision;
60}; 62};
61static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); 63static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index dc45dedd3..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _WIN32
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _WIN32
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..7433c39ba 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
111template <std::size_t Position, std::size_t Bits, typename T> 111template <std::size_t Position, std::size_t Bits, typename T>
112struct BitField { 112struct BitField {
113private: 113private:
114 // We hide the copy assigment operator here, because the default copy
115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable.
118 constexpr BitField& operator=(const BitField&) = default;
119
120 // UnderlyingType is T for non-enum types and the underlying type of T if 114 // UnderlyingType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 115 // T is an enumeration. Note that T is wrapped within an enable_if in the
122 // former case to workaround compile errors which arise when using 116 // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
163 BitField(T val) = delete; 157 BitField(T val) = delete;
164 BitField& operator=(T val) = delete; 158 BitField& operator=(T val) = delete;
165 159
166 // Force default constructor to be created 160 constexpr BitField() noexcept = default;
167 // so that we can use this within unions 161
168 constexpr BitField() = default; 162 constexpr BitField(const BitField&) noexcept = default;
163 constexpr BitField& operator=(const BitField&) noexcept = default;
164
165 constexpr BitField(BitField&&) noexcept = default;
166 constexpr BitField& operator=(BitField&&) noexcept = default;
169 167
170 constexpr FORCE_INLINE operator T() const { 168 constexpr FORCE_INLINE operator T() const {
171 return Value(); 169 return Value();
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
55/** 55/**
56 * Decode a color stored in RGBA8 format 56 * Decode a color stored in RGBA8 format
57 * @param bytes Pointer to encoded source color 57 * @param bytes Pointer to encoded source color
58 * @return Result color decoded as Math::Vec4<u8> 58 * @return Result color decoded as Common::Vec4<u8>
59 */ 59 */
60inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { 60inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
61 return {bytes[3], bytes[2], bytes[1], bytes[0]}; 61 return {bytes[3], bytes[2], bytes[1], bytes[0]};
62} 62}
63 63
64/** 64/**
65 * Decode a color stored in RGB8 format 65 * Decode a color stored in RGB8 format
66 * @param bytes Pointer to encoded source color 66 * @param bytes Pointer to encoded source color
67 * @return Result color decoded as Math::Vec4<u8> 67 * @return Result color decoded as Common::Vec4<u8>
68 */ 68 */
69inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { 69inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
70 return {bytes[2], bytes[1], bytes[0], 255}; 70 return {bytes[2], bytes[1], bytes[0], 255};
71} 71}
72 72
73/** 73/**
74 * Decode a color stored in RG8 (aka HILO8) format 74 * Decode a color stored in RG8 (aka HILO8) format
75 * @param bytes Pointer to encoded source color 75 * @param bytes Pointer to encoded source color
76 * @return Result color decoded as Math::Vec4<u8> 76 * @return Result color decoded as Common::Vec4<u8>
77 */ 77 */
78inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { 78inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
79 return {bytes[1], bytes[0], 0, 255}; 79 return {bytes[1], bytes[0], 0, 255};
80} 80}
81 81
82/** 82/**
83 * Decode a color stored in RGB565 format 83 * Decode a color stored in RGB565 format
84 * @param bytes Pointer to encoded source color 84 * @param bytes Pointer to encoded source color
85 * @return Result color decoded as Math::Vec4<u8> 85 * @return Result color decoded as Common::Vec4<u8>
86 */ 86 */
87inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { 87inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
88 u16_le pixel; 88 u16_le pixel;
89 std::memcpy(&pixel, bytes, sizeof(pixel)); 89 std::memcpy(&pixel, bytes, sizeof(pixel));
90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), 90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
94/** 94/**
95 * Decode a color stored in RGB5A1 format 95 * Decode a color stored in RGB5A1 format
96 * @param bytes Pointer to encoded source color 96 * @param bytes Pointer to encoded source color
97 * @return Result color decoded as Math::Vec4<u8> 97 * @return Result color decoded as Common::Vec4<u8>
98 */ 98 */
99inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { 99inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
100 u16_le pixel; 100 u16_le pixel;
101 std::memcpy(&pixel, bytes, sizeof(pixel)); 101 std::memcpy(&pixel, bytes, sizeof(pixel));
102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), 102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
106/** 106/**
107 * Decode a color stored in RGBA4 format 107 * Decode a color stored in RGBA4 format
108 * @param bytes Pointer to encoded source color 108 * @param bytes Pointer to encoded source color
109 * @return Result color decoded as Math::Vec4<u8> 109 * @return Result color decoded as Common::Vec4<u8>
110 */ 110 */
111inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { 111inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
112 u16_le pixel; 112 u16_le pixel;
113 std::memcpy(&pixel, bytes, sizeof(pixel)); 113 std::memcpy(&pixel, bytes, sizeof(pixel));
114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), 114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
138/** 138/**
139 * Decode a depth value and a stencil value stored in D24S8 format 139 * Decode a depth value and a stencil value stored in D24S8 format
140 * @param bytes Pointer to encoded source values 140 * @param bytes Pointer to encoded source values
141 * @return Resulting values stored as a Math::Vec2 141 * @return Resulting values stored as a Common::Vec2
142 */ 142 */
143inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { 143inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; 144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
145} 145}
146 146
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
149 * @param color Source color to encode 149 * @param color Source color to encode
150 * @param bytes Destination pointer to store encoded color 150 * @param bytes Destination pointer to store encoded color
151 */ 151 */
152inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { 152inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
153 bytes[3] = color.r(); 153 bytes[3] = color.r();
154 bytes[2] = color.g(); 154 bytes[2] = color.g();
155 bytes[1] = color.b(); 155 bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
161 * @param color Source color to encode 161 * @param color Source color to encode
162 * @param bytes Destination pointer to store encoded color 162 * @param bytes Destination pointer to store encoded color
163 */ 163 */
164inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { 164inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
165 bytes[2] = color.r(); 165 bytes[2] = color.r();
166 bytes[1] = color.g(); 166 bytes[1] = color.g();
167 bytes[0] = color.b(); 167 bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
172 * @param color Source color to encode 172 * @param color Source color to encode
173 * @param bytes Destination pointer to store encoded color 173 * @param bytes Destination pointer to store encoded color
174 */ 174 */
175inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { 175inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
176 bytes[1] = color.r(); 176 bytes[1] = color.r();
177 bytes[0] = color.g(); 177 bytes[0] = color.g();
178} 178}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
181 * @param color Source color to encode 181 * @param color Source color to encode
182 * @param bytes Destination pointer to store encoded color 182 * @param bytes Destination pointer to store encoded color
183 */ 183 */
184inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { 184inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
185 const u16_le data = 185 const u16_le data =
186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); 186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
187 187
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
193 * @param color Source color to encode 193 * @param color Source color to encode
194 * @param bytes Destination pointer to store encoded color 194 * @param bytes Destination pointer to store encoded color
195 */ 195 */
196inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { 196inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | 197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); 198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
199 199
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
205 * @param color Source color to encode 205 * @param color Source color to encode
206 * @param bytes Destination pointer to store encoded color 206 * @param bytes Destination pointer to store encoded color
207 */ 207 */
208inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { 208inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | 209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
211 211
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index b369f199f..4462ff3fb 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,8 +39,10 @@ public:
39 Impl(Impl const&) = delete; 39 Impl(Impl const&) = delete;
40 const Impl& operator=(Impl const&) = delete; 40 const Impl& operator=(Impl const&) = delete;
41 41
42 void PushEntry(Entry e) { 42 void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
43 message_queue.Push(std::move(e)); 43 const char* function, std::string message) {
44 message_queue.Push(
45 CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
44 } 46 }
45 47
46 void AddBackend(std::unique_ptr<Backend> backend) { 48 void AddBackend(std::unique_ptr<Backend> backend) {
@@ -108,11 +110,30 @@ private:
108 backend_thread.join(); 110 backend_thread.join();
109 } 111 }
110 112
113 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
114 const char* function, std::string message) const {
115 using std::chrono::duration_cast;
116 using std::chrono::steady_clock;
117
118 Entry entry;
119 entry.timestamp =
120 duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
121 entry.log_class = log_class;
122 entry.log_level = log_level;
123 entry.filename = Common::TrimSourcePath(filename);
124 entry.line_num = line_nr;
125 entry.function = function;
126 entry.message = std::move(message);
127
128 return entry;
129 }
130
111 std::mutex writing_mutex; 131 std::mutex writing_mutex;
112 std::thread backend_thread; 132 std::thread backend_thread;
113 std::vector<std::unique_ptr<Backend>> backends; 133 std::vector<std::unique_ptr<Backend>> backends;
114 Common::MPSCQueue<Log::Entry> message_queue; 134 Common::MPSCQueue<Log::Entry> message_queue;
115 Filter filter; 135 Filter filter;
136 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
116}; 137};
117 138
118void ConsoleBackend::Write(const Entry& entry) { 139void ConsoleBackend::Write(const Entry& entry) {
@@ -271,25 +292,6 @@ const char* GetLevelName(Level log_level) {
271#undef LVL 292#undef LVL
272} 293}
273 294
274Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
275 const char* function, std::string message) {
276 using std::chrono::duration_cast;
277 using std::chrono::steady_clock;
278
279 static steady_clock::time_point time_origin = steady_clock::now();
280
281 Entry entry;
282 entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
283 entry.log_class = log_class;
284 entry.log_level = log_level;
285 entry.filename = Common::TrimSourcePath(filename);
286 entry.line_num = line_nr;
287 entry.function = function;
288 entry.message = std::move(message);
289
290 return entry;
291}
292
293void SetGlobalFilter(const Filter& filter) { 295void SetGlobalFilter(const Filter& filter) {
294 Impl::Instance().SetGlobalFilter(filter); 296 Impl::Instance().SetGlobalFilter(filter);
295} 297}
@@ -314,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
314 if (!filter.CheckMessage(log_class, log_level)) 316 if (!filter.CheckMessage(log_class, log_level))
315 return; 317 return;
316 318
317 Entry entry = 319 instance.PushEntry(log_class, log_level, filename, line_num, function,
318 CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); 320 fmt::vformat(format, args));
319
320 instance.PushEntry(std::move(entry));
321} 321}
322} // namespace Log 322} // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index a31ee6968..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -135,10 +135,6 @@ const char* GetLogClassName(Class log_class);
135 */ 135 */
136const char* GetLevelName(Level log_level); 136const char* GetLevelName(Level log_level);
137 137
138/// Creates a log entry by formatting the given source location, and message.
139Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
140 const char* function, std::string message);
141
142/** 138/**
143 * The global filter will prevent any messages from even being processed if they are filtered. Each 139 * The global filter will prevent any messages from even being processed if they are filtered. Each
144 * backend can have a filter, but if the level is lower than the global filter, the backend will 140 * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
7#include <cstdlib> 7#include <cstdlib>
8#include <type_traits> 8#include <type_traits>
9 9
10namespace MathUtil { 10namespace Common {
11 11
12constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
13 13
@@ -41,4 +41,4 @@ struct Rectangle {
41 } 41 }
42}; 42};
43 43
44} // namespace MathUtil 44} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
6 6
7#include "common/vector_math.h" 7#include "common/vector_math.h"
8 8
9namespace Math { 9namespace Common {
10 10
11template <typename T> 11template <typename T>
12class Quaternion { 12class Quaternion {
13public: 13public:
14 Math::Vec3<T> xyz; 14 Vec3<T> xyz;
15 T w{}; 15 T w{};
16 16
17 Quaternion<decltype(-T{})> Inverse() const { 17 Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
38}; 38};
39 39
40template <typename T> 40template <typename T>
41auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { 41auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); 42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
43} 43}
44 44
45inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { 45inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
46 return {axis * std::sin(angle / 2), std::cos(angle / 2)}; 46 return {axis * std::sin(angle / 2), std::cos(angle / 2)};
47} 47}
48 48
49} // namespace Math 49} // namespace Common
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
33#include <cmath> 33#include <cmath>
34#include <type_traits> 34#include <type_traits>
35 35
36namespace Math { 36namespace Common {
37 37
38template <typename T> 38template <typename T>
39class Vec2; 39class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
690 return MakeVec(x, yzw[0], yzw[1], yzw[2]); 690 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
691} 691}
692 692
693} // namespace Math 693} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 988356c65..8ccb2d5f0 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -217,6 +217,7 @@ add_library(core STATIC
217 hle/service/audio/audren_u.h 217 hle/service/audio/audren_u.h
218 hle/service/audio/codecctl.cpp 218 hle/service/audio/codecctl.cpp
219 hle/service/audio/codecctl.h 219 hle/service/audio/codecctl.h
220 hle/service/audio/errors.h
220 hle/service/audio/hwopus.cpp 221 hle/service/audio/hwopus.cpp
221 hle/service/audio/hwopus.h 222 hle/service/audio/hwopus.h
222 hle/service/bcat/bcat.cpp 223 hle/service/bcat/bcat.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index ab7181a05..eba2177d1 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
36#include "frontend/applets/software_keyboard.h" 36#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 37#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 38#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 39#include "video_core/gpu_asynch.h"
40#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 41#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 42#include "video_core/video_core.h"
42 43
@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
78 return vfs->OpenFile(path, FileSys::Mode::Read); 79 return vfs->OpenFile(path, FileSys::Mode::Read);
79} 80}
80struct System::Impl { 81struct System::Impl {
82 explicit Impl(System& system) : kernel{system} {}
81 83
82 Cpu& CurrentCpuCore() { 84 Cpu& CurrentCpuCore() {
83 return cpu_core_manager.GetCurrentCore(); 85 return cpu_core_manager.GetCurrentCore();
@@ -95,7 +97,7 @@ struct System::Impl {
95 LOG_DEBUG(HW_Memory, "initialized OK"); 97 LOG_DEBUG(HW_Memory, "initialized OK");
96 98
97 core_timing.Initialize(); 99 core_timing.Initialize();
98 kernel.Initialize(core_timing); 100 kernel.Initialize();
99 101
100 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( 102 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
101 std::chrono::system_clock::now().time_since_epoch()); 103 std::chrono::system_clock::now().time_since_epoch());
@@ -128,10 +130,16 @@ struct System::Impl {
128 return ResultStatus::ErrorVideoCore; 130 return ResultStatus::ErrorVideoCore;
129 } 131 }
130 132
131 gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); 133 is_powered_on = true;
134
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
137 } else {
138 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
139 }
132 140
133 cpu_core_manager.Initialize(system); 141 cpu_core_manager.Initialize(system);
134 is_powered_on = true; 142
135 LOG_DEBUG(Core, "Initialized OK"); 143 LOG_DEBUG(Core, "Initialized OK");
136 144
137 // Reset counters and set time origin to current frame 145 // Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {
182 190
183 void Shutdown() { 191 void Shutdown() {
184 // Log last frame performance stats 192 // Log last frame performance stats
185 auto perf_results = GetAndResetPerfStats(); 193 const auto perf_results = GetAndResetPerfStats();
186 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 194 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
187 perf_results.emulation_speed * 100.0); 195 perf_results.emulation_speed * 100.0);
188 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 196 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
189 perf_results.game_fps); 197 perf_results.game_fps);
190 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 198 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
191 perf_results.frametime * 1000.0); 199 perf_results.frametime * 1000.0);
192 200
193 is_powered_on = false; 201 is_powered_on = false;
194 202
@@ -265,7 +273,7 @@ struct System::Impl {
265 Core::FrameLimiter frame_limiter; 273 Core::FrameLimiter frame_limiter;
266}; 274};
267 275
268System::System() : impl{std::make_unique<Impl>()} {} 276System::System() : impl{std::make_unique<Impl>(*this)} {}
269System::~System() = default; 277System::~System() = default;
270 278
271Cpu& System::CurrentCpuCore() { 279Cpu& System::CurrentCpuCore() {
diff --git a/src/core/core.h b/src/core/core.h
index d720013f7..ba76a41d8 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
293 return System::GetInstance().CurrentArmInterface(); 293 return System::GetInstance().CurrentArmInterface();
294} 294}
295 295
296inline TelemetrySession& Telemetry() {
297 return System::GetInstance().TelemetrySession();
298}
299
300inline Kernel::Process* CurrentProcess() { 296inline Kernel::Process* CurrentProcess() {
301 return System::GetInstance().CurrentProcess(); 297 return System::GetInstance().CurrentProcess();
302} 298}
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..e29afd630 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); 67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
68} 68}
69 69
70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { 70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
71 new_x = std::max(new_x, framebuffer_layout.screen.left); 71 new_x = std::max(new_x, framebuffer_layout.screen.left);
72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1); 72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
73 73
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 7006a37b3..d0bcb4660 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
166 /** 166 /**
167 * Clip the provided coordinates to be inside the touchscreen area. 167 * Clip the provided coordinates to be inside the touchscreen area.
168 */ 168 */
169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); 169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
170}; 170};
171 171
172} // namespace Core::Frontend 172} // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
12 12
13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio 13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
14template <class T> 14template <class T>
15static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, 15static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
16 float screen_aspect_ratio) { 16 float screen_aspect_ratio) {
17 float scale = std::min(static_cast<float>(window_area.GetWidth()), 17 float scale = std::min(static_cast<float>(window_area.GetWidth()),
18 window_area.GetHeight() / screen_aspect_ratio); 18 window_area.GetHeight() / screen_aspect_ratio);
19 return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), 19 return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
20 static_cast<T>(std::round(scale * screen_aspect_ratio))}; 20 static_cast<T>(std::round(scale * screen_aspect_ratio))};
21} 21}
22 22
23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { 23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
31 ScreenUndocked::Width}; 31 ScreenUndocked::Width};
32 MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; 32 Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
33 MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); 33 Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
34 34
35 float window_aspect_ratio = static_cast<float>(height) / width; 35 float window_aspect_ratio = static_cast<float>(height) / width;
36 36
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
16 unsigned width{ScreenUndocked::Width}; 16 unsigned width{ScreenUndocked::Width};
17 unsigned height{ScreenUndocked::Height}; 17 unsigned height{ScreenUndocked::Height};
18 18
19 MathUtil::Rectangle<unsigned> screen; 19 Common::Rectangle<unsigned> screen;
20 20
21 /** 21 /**
22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked 22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
124 * Orientation is determined by right-hand rule. 124 * Orientation is determined by right-hand rule.
125 * Units: deg/sec 125 * Units: deg/sec
126 */ 126 */
127using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; 127using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
128 128
129/** 129/**
130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are 130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..455d1f346 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 90f276ee8..079283830 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -350,7 +350,7 @@ public:
350 template <class T> 350 template <class T>
351 std::shared_ptr<T> PopIpcInterface() { 351 std::shared_ptr<T> PopIpcInterface() {
352 ASSERT(context->Session()->IsDomain()); 352 ASSERT(context->Session()->IsDomain());
353 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 353 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
355 } 355 }
356}; 356};
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index a250d088d..9780a7849 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_cpu.h" 11#include "core/core_cpu.h"
12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -17,58 +18,16 @@
17#include "core/hle/result.h" 18#include "core/hle/result.h"
18#include "core/memory.h" 19#include "core/memory.h"
19 20
20namespace Kernel::AddressArbiter { 21namespace Kernel {
21 22namespace {
22// Performs actual address waiting logic.
23static ResultCode WaitForAddress(VAddr address, s64 timeout) {
24 SharedPtr<Thread> current_thread = GetCurrentThread();
25 current_thread->SetArbiterWaitAddress(address);
26 current_thread->SetStatus(ThreadStatus::WaitArb);
27 current_thread->InvalidateWakeupCallback();
28
29 current_thread->WakeAfterDelay(timeout);
30
31 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
32 return RESULT_TIMEOUT;
33}
34
35// Gets the threads waiting on an address.
36static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
37 const auto RetrieveWaitingThreads = [](std::size_t core_index,
38 std::vector<SharedPtr<Thread>>& waiting_threads,
39 VAddr arb_addr) {
40 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
41 const auto& thread_list = scheduler.GetThreadList();
42
43 for (const auto& thread : thread_list) {
44 if (thread->GetArbiterWaitAddress() == arb_addr)
45 waiting_threads.push_back(thread);
46 }
47 };
48
49 // Retrieve all threads that are waiting for this address.
50 std::vector<SharedPtr<Thread>> threads;
51 RetrieveWaitingThreads(0, threads, address);
52 RetrieveWaitingThreads(1, threads, address);
53 RetrieveWaitingThreads(2, threads, address);
54 RetrieveWaitingThreads(3, threads, address);
55
56 // Sort them by priority, such that the highest priority ones come first.
57 std::sort(threads.begin(), threads.end(),
58 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
59 return lhs->GetPriority() < rhs->GetPriority();
60 });
61
62 return threads;
63}
64
65// Wake up num_to_wake (or all) threads in a vector. 23// Wake up num_to_wake (or all) threads in a vector.
66static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { 24void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
67 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 25 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
68 // them all. 26 // them all.
69 std::size_t last = waiting_threads.size(); 27 std::size_t last = waiting_threads.size();
70 if (num_to_wake > 0) 28 if (num_to_wake > 0) {
71 last = num_to_wake; 29 last = num_to_wake;
30 }
72 31
73 // Signal the waiting threads. 32 // Signal the waiting threads.
74 for (std::size_t i = 0; i < last; i++) { 33 for (std::size_t i = 0; i < last; i++) {
@@ -78,42 +37,41 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
78 waiting_threads[i]->ResumeFromWait(); 37 waiting_threads[i]->ResumeFromWait();
79 } 38 }
80} 39}
40} // Anonymous namespace
81 41
82// Signals an address being waited on. 42AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
83ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { 43AddressArbiter::~AddressArbiter() = default;
84 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
85 44
45ResultCode AddressArbiter::SignalToAddress(VAddr address, s32 num_to_wake) {
46 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
86 WakeThreads(waiting_threads, num_to_wake); 47 WakeThreads(waiting_threads, num_to_wake);
87 return RESULT_SUCCESS; 48 return RESULT_SUCCESS;
88} 49}
89 50
90// Signals an address being waited on and increments its value if equal to the value argument. 51ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
91ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { 52 s32 num_to_wake) {
92 // Ensure that we can write to the address. 53 // Ensure that we can write to the address.
93 if (!Memory::IsValidVirtualAddress(address)) { 54 if (!Memory::IsValidVirtualAddress(address)) {
94 return ERR_INVALID_ADDRESS_STATE; 55 return ERR_INVALID_ADDRESS_STATE;
95 } 56 }
96 57
97 if (static_cast<s32>(Memory::Read32(address)) == value) { 58 if (static_cast<s32>(Memory::Read32(address)) != value) {
98 Memory::Write32(address, static_cast<u32>(value + 1));
99 } else {
100 return ERR_INVALID_STATE; 59 return ERR_INVALID_STATE;
101 } 60 }
102 61
62 Memory::Write32(address, static_cast<u32>(value + 1));
103 return SignalToAddress(address, num_to_wake); 63 return SignalToAddress(address, num_to_wake);
104} 64}
105 65
106// Signals an address being waited on and modifies its value based on waiting thread count if equal 66ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
107// to the value argument. 67 s32 num_to_wake) {
108ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
109 s32 num_to_wake) {
110 // Ensure that we can write to the address. 68 // Ensure that we can write to the address.
111 if (!Memory::IsValidVirtualAddress(address)) { 69 if (!Memory::IsValidVirtualAddress(address)) {
112 return ERR_INVALID_ADDRESS_STATE; 70 return ERR_INVALID_ADDRESS_STATE;
113 } 71 }
114 72
115 // Get threads waiting on the address. 73 // Get threads waiting on the address.
116 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); 74 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
117 75
118 // Determine the modified value depending on the waiting count. 76 // Determine the modified value depending on the waiting count.
119 s32 updated_value; 77 s32 updated_value;
@@ -125,31 +83,31 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu
125 updated_value = value; 83 updated_value = value;
126 } 84 }
127 85
128 if (static_cast<s32>(Memory::Read32(address)) == value) { 86 if (static_cast<s32>(Memory::Read32(address)) != value) {
129 Memory::Write32(address, static_cast<u32>(updated_value));
130 } else {
131 return ERR_INVALID_STATE; 87 return ERR_INVALID_STATE;
132 } 88 }
133 89
90 Memory::Write32(address, static_cast<u32>(updated_value));
134 WakeThreads(waiting_threads, num_to_wake); 91 WakeThreads(waiting_threads, num_to_wake);
135 return RESULT_SUCCESS; 92 return RESULT_SUCCESS;
136} 93}
137 94
138// Waits on an address if the value passed is less than the argument value, optionally decrementing. 95ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
139ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { 96 bool should_decrement) {
140 // Ensure that we can read the address. 97 // Ensure that we can read the address.
141 if (!Memory::IsValidVirtualAddress(address)) { 98 if (!Memory::IsValidVirtualAddress(address)) {
142 return ERR_INVALID_ADDRESS_STATE; 99 return ERR_INVALID_ADDRESS_STATE;
143 } 100 }
144 101
145 s32 cur_value = static_cast<s32>(Memory::Read32(address)); 102 const s32 cur_value = static_cast<s32>(Memory::Read32(address));
146 if (cur_value < value) { 103 if (cur_value >= value) {
147 if (should_decrement) {
148 Memory::Write32(address, static_cast<u32>(cur_value - 1));
149 }
150 } else {
151 return ERR_INVALID_STATE; 104 return ERR_INVALID_STATE;
152 } 105 }
106
107 if (should_decrement) {
108 Memory::Write32(address, static_cast<u32>(cur_value - 1));
109 }
110
153 // Short-circuit without rescheduling, if timeout is zero. 111 // Short-circuit without rescheduling, if timeout is zero.
154 if (timeout == 0) { 112 if (timeout == 0) {
155 return RESULT_TIMEOUT; 113 return RESULT_TIMEOUT;
@@ -158,8 +116,7 @@ ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool
158 return WaitForAddress(address, timeout); 116 return WaitForAddress(address, timeout);
159} 117}
160 118
161// Waits on an address if the value passed is equal to the argument value. 119ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
162ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163 // Ensure that we can read the address. 120 // Ensure that we can read the address.
164 if (!Memory::IsValidVirtualAddress(address)) { 121 if (!Memory::IsValidVirtualAddress(address)) {
165 return ERR_INVALID_ADDRESS_STATE; 122 return ERR_INVALID_ADDRESS_STATE;
@@ -175,4 +132,46 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
175 132
176 return WaitForAddress(address, timeout); 133 return WaitForAddress(address, timeout);
177} 134}
178} // namespace Kernel::AddressArbiter 135
136ResultCode AddressArbiter::WaitForAddress(VAddr address, s64 timeout) {
137 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
138 current_thread->SetArbiterWaitAddress(address);
139 current_thread->SetStatus(ThreadStatus::WaitArb);
140 current_thread->InvalidateWakeupCallback();
141
142 current_thread->WakeAfterDelay(timeout);
143
144 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
145 return RESULT_TIMEOUT;
146}
147
148std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
149 const auto RetrieveWaitingThreads = [this](std::size_t core_index,
150 std::vector<SharedPtr<Thread>>& waiting_threads,
151 VAddr arb_addr) {
152 const auto& scheduler = system.Scheduler(core_index);
153 const auto& thread_list = scheduler.GetThreadList();
154
155 for (const auto& thread : thread_list) {
156 if (thread->GetArbiterWaitAddress() == arb_addr) {
157 waiting_threads.push_back(thread);
158 }
159 }
160 };
161
162 // Retrieve all threads that are waiting for this address.
163 std::vector<SharedPtr<Thread>> threads;
164 RetrieveWaitingThreads(0, threads, address);
165 RetrieveWaitingThreads(1, threads, address);
166 RetrieveWaitingThreads(2, threads, address);
167 RetrieveWaitingThreads(3, threads, address);
168
169 // Sort them by priority, such that the highest priority ones come first.
170 std::sort(threads.begin(), threads.end(),
171 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
172 return lhs->GetPriority() < rhs->GetPriority();
173 });
174
175 return threads;
176}
177} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index b58f21bec..e0c36f2e3 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -5,28 +5,68 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/hle/kernel/address_arbiter.h"
8 9
9union ResultCode; 10union ResultCode;
10 11
11namespace Kernel::AddressArbiter { 12namespace Core {
13class System;
14}
12 15
13enum class ArbitrationType { 16namespace Kernel {
14 WaitIfLessThan = 0,
15 DecrementAndWaitIfLessThan = 1,
16 WaitIfEqual = 2,
17};
18 17
19enum class SignalType { 18class Thread;
20 Signal = 0, 19
21 IncrementAndSignalIfEqual = 1, 20class AddressArbiter {
22 ModifyByWaitingCountAndSignalIfEqual = 2, 21public:
23}; 22 enum class ArbitrationType {
23 WaitIfLessThan = 0,
24 DecrementAndWaitIfLessThan = 1,
25 WaitIfEqual = 2,
26 };
27
28 enum class SignalType {
29 Signal = 0,
30 IncrementAndSignalIfEqual = 1,
31 ModifyByWaitingCountAndSignalIfEqual = 2,
32 };
33
34 explicit AddressArbiter(Core::System& system);
35 ~AddressArbiter();
36
37 AddressArbiter(const AddressArbiter&) = delete;
38 AddressArbiter& operator=(const AddressArbiter&) = delete;
39
40 AddressArbiter(AddressArbiter&&) = default;
41 AddressArbiter& operator=(AddressArbiter&&) = delete;
24 42
25ResultCode SignalToAddress(VAddr address, s32 num_to_wake); 43 /// Signals an address being waited on.
26ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); 44 ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
27ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
28 45
29ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); 46 /// Signals an address being waited on and increments its value if equal to the value argument.
30ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 47 ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
48
49 /// Signals an address being waited on and modifies its value based on waiting thread count if
50 /// equal to the value argument.
51 ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
52 s32 num_to_wake);
53
54 /// Waits on an address if the value passed is less than the argument value,
55 /// optionally decrementing.
56 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
57 bool should_decrement);
58
59 /// Waits on an address if the value passed is equal to the argument value.
60 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
61
62private:
63 // Waits on the given address with a timeout in nanoseconds
64 ResultCode WaitForAddress(VAddr address, s64 timeout);
65
66 // Gets the threads waiting on an address.
67 std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
68
69 Core::System& system;
70};
31 71
32} // namespace Kernel::AddressArbiter 72} // namespace Kernel
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..b1f39aad7 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
17constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; 18constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
18constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; 19constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
19constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; 20constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
16constexpr u16 GetSlot(Handle handle) { 16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15; 17 return static_cast<u16>(handle >> 15);
18} 18}
19 19
20constexpr u16 GetGeneration(Handle handle) { 20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF; 21 return static_cast<u16>(handle & 0x7FFF);
22} 22}
23} // Anonymous namespace 23} // Anonymous namespace
24 24
25HandleTable::HandleTable() { 25HandleTable::HandleTable() {
26 next_generation = 1;
27 Clear(); 26 Clear();
28} 27}
29 28
30HandleTable::~HandleTable() = default; 29HandleTable::~HandleTable() = default;
31 30
31ResultCode HandleTable::SetSize(s32 handle_table_size) {
32 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
33 return ERR_OUT_OF_MEMORY;
34 }
35
36 // Values less than or equal to zero indicate to use the maximum allowable
37 // size for the handle table in the actual kernel, so we ignore the given
38 // value in that case, since we assume this by default unless this function
39 // is called.
40 if (handle_table_size > 0) {
41 table_size = static_cast<u16>(handle_table_size);
42 }
43
44 return RESULT_SUCCESS;
45}
46
32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 47ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
33 DEBUG_ASSERT(obj != nullptr); 48 DEBUG_ASSERT(obj != nullptr);
34 49
35 u16 slot = next_free_slot; 50 const u16 slot = next_free_slot;
36 if (slot >= generations.size()) { 51 if (slot >= table_size) {
37 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 52 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
38 return ERR_HANDLE_TABLE_FULL; 53 return ERR_HANDLE_TABLE_FULL;
39 } 54 }
40 next_free_slot = generations[slot]; 55 next_free_slot = generations[slot];
41 56
42 u16 generation = next_generation++; 57 const u16 generation = next_generation++;
43 58
44 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. 59 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
45 // Horizon OS uses zero to represent an invalid handle, so skip to 1. 60 // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
64} 79}
65 80
66ResultCode HandleTable::Close(Handle handle) { 81ResultCode HandleTable::Close(Handle handle) {
67 if (!IsValid(handle)) 82 if (!IsValid(handle)) {
68 return ERR_INVALID_HANDLE; 83 return ERR_INVALID_HANDLE;
84 }
69 85
70 u16 slot = GetSlot(handle); 86 const u16 slot = GetSlot(handle);
71 87
72 objects[slot] = nullptr; 88 objects[slot] = nullptr;
73 89
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
77} 93}
78 94
79bool HandleTable::IsValid(Handle handle) const { 95bool HandleTable::IsValid(Handle handle) const {
80 std::size_t slot = GetSlot(handle); 96 const std::size_t slot = GetSlot(handle);
81 u16 generation = GetGeneration(handle); 97 const u16 generation = GetGeneration(handle);
82 98
83 return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; 99 return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
84} 100}
85 101
86SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { 102SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
97} 113}
98 114
99void HandleTable::Clear() { 115void HandleTable::Clear() {
100 for (u16 i = 0; i < MAX_COUNT; ++i) { 116 for (u16 i = 0; i < table_size; ++i) {
101 generations[i] = i + 1; 117 generations[i] = i + 1;
102 objects[i] = nullptr; 118 objects[i] = nullptr;
103 } 119 }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
50 ~HandleTable(); 50 ~HandleTable();
51 51
52 /** 52 /**
53 * Sets the number of handles that may be in use at one time
54 * for this handle table.
55 *
56 * @param handle_table_size The desired size to limit the handle table to.
57 *
58 * @returns an error code indicating if initialization was successful.
59 * If initialization was not successful, then ERR_OUT_OF_MEMORY
60 * will be returned.
61 *
62 * @pre handle_table_size must be within the range [0, 1024]
63 */
64 ResultCode SetSize(s32 handle_table_size);
65
66 /**
53 * Allocates a handle for the given object. 67 * Allocates a handle for the given object.
54 * @return The created Handle or one of the following errors: 68 * @return The created Handle or one of the following errors:
55 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. 69 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
104 std::array<u16, MAX_COUNT> generations; 118 std::array<u16, MAX_COUNT> generations;
105 119
106 /** 120 /**
121 * The limited size of the handle table. This can be specified by process
122 * capabilities in order to restrict the overall number of handles that
123 * can be created in a process instance
124 */
125 u16 table_size = static_cast<u16>(MAX_COUNT);
126
127 /**
107 * Global counter of the number of created handles. Stored in `generations` when a handle is 128 * Global counter of the number of created handles. Stored in `generations` when a handle is
108 * created, and wraps around to 1 when it hits 0x8000. 129 * created, and wraps around to 1 when it hits 0x8000.
109 */ 130 */
110 u16 next_generation; 131 u16 next_generation = 1;
111 132
112 /// Head of the free slots linked list. 133 /// Head of the free slots linked list.
113 u16 next_free_slot; 134 u16 next_free_slot = 0;
114}; 135};
115 136
116} // namespace Kernel 137} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index dd749eed4..04ea9349e 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@
12 12
13#include "core/core.h" 13#include "core/core.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/client_port.h" 16#include "core/hle/kernel/client_port.h"
16#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
86} 87}
87 88
88struct KernelCore::Impl { 89struct KernelCore::Impl {
89 void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) { 90 explicit Impl(Core::System& system) : address_arbiter{system}, system{system} {}
91
92 void Initialize(KernelCore& kernel) {
90 Shutdown(); 93 Shutdown();
91 94
92 InitializeSystemResourceLimit(kernel); 95 InitializeSystemResourceLimit(kernel);
93 InitializeThreads(core_timing); 96 InitializeThreads();
94 } 97 }
95 98
96 void Shutdown() { 99 void Shutdown() {
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
122 ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); 125 ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
123 } 126 }
124 127
125 void InitializeThreads(Core::Timing::CoreTiming& core_timing) { 128 void InitializeThreads() {
126 thread_wakeup_event_type = 129 thread_wakeup_event_type =
127 core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); 130 system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
128 } 131 }
129 132
130 std::atomic<u32> next_object_id{0}; 133 std::atomic<u32> next_object_id{0};
@@ -135,6 +138,8 @@ struct KernelCore::Impl {
135 std::vector<SharedPtr<Process>> process_list; 138 std::vector<SharedPtr<Process>> process_list;
136 Process* current_process = nullptr; 139 Process* current_process = nullptr;
137 140
141 Kernel::AddressArbiter address_arbiter;
142
138 SharedPtr<ResourceLimit> system_resource_limit; 143 SharedPtr<ResourceLimit> system_resource_limit;
139 144
140 Core::Timing::EventType* thread_wakeup_event_type = nullptr; 145 Core::Timing::EventType* thread_wakeup_event_type = nullptr;
@@ -145,15 +150,18 @@ struct KernelCore::Impl {
145 /// Map of named ports managed by the kernel, which can be retrieved using 150 /// Map of named ports managed by the kernel, which can be retrieved using
146 /// the ConnectToPort SVC. 151 /// the ConnectToPort SVC.
147 NamedPortTable named_ports; 152 NamedPortTable named_ports;
153
154 // System context
155 Core::System& system;
148}; 156};
149 157
150KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} 158KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
151KernelCore::~KernelCore() { 159KernelCore::~KernelCore() {
152 Shutdown(); 160 Shutdown();
153} 161}
154 162
155void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) { 163void KernelCore::Initialize() {
156 impl->Initialize(*this, core_timing); 164 impl->Initialize(*this);
157} 165}
158 166
159void KernelCore::Shutdown() { 167void KernelCore::Shutdown() {
@@ -184,6 +192,14 @@ const Process* KernelCore::CurrentProcess() const {
184 return impl->current_process; 192 return impl->current_process;
185} 193}
186 194
195AddressArbiter& KernelCore::AddressArbiter() {
196 return impl->address_arbiter;
197}
198
199const AddressArbiter& KernelCore::AddressArbiter() const {
200 return impl->address_arbiter;
201}
202
187void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) { 203void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
188 impl->named_ports.emplace(std::move(name), std::move(port)); 204 impl->named_ports.emplace(std::move(name), std::move(port));
189} 205}
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 154bced42..4d292aca9 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,6 +11,10 @@
11template <typename T> 11template <typename T>
12class ResultVal; 12class ResultVal;
13 13
14namespace Core {
15class System;
16}
17
14namespace Core::Timing { 18namespace Core::Timing {
15class CoreTiming; 19class CoreTiming;
16struct EventType; 20struct EventType;
@@ -18,6 +22,7 @@ struct EventType;
18 22
19namespace Kernel { 23namespace Kernel {
20 24
25class AddressArbiter;
21class ClientPort; 26class ClientPort;
22class HandleTable; 27class HandleTable;
23class Process; 28class Process;
@@ -30,7 +35,14 @@ private:
30 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; 35 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
31 36
32public: 37public:
33 KernelCore(); 38 /// Constructs an instance of the kernel using the given System
39 /// instance as a context for any necessary system-related state,
40 /// such as threads, CPU core state, etc.
41 ///
42 /// @post After execution of the constructor, the provided System
43 /// object *must* outlive the kernel instance itself.
44 ///
45 explicit KernelCore(Core::System& system);
34 ~KernelCore(); 46 ~KernelCore();
35 47
36 KernelCore(const KernelCore&) = delete; 48 KernelCore(const KernelCore&) = delete;
@@ -40,11 +52,7 @@ public:
40 KernelCore& operator=(KernelCore&&) = delete; 52 KernelCore& operator=(KernelCore&&) = delete;
41 53
42 /// Resets the kernel to a clean slate for use. 54 /// Resets the kernel to a clean slate for use.
43 /// 55 void Initialize();
44 /// @param core_timing CoreTiming instance used to create any necessary
45 /// kernel-specific callback events.
46 ///
47 void Initialize(Core::Timing::CoreTiming& core_timing);
48 56
49 /// Clears all resources in use by the kernel instance. 57 /// Clears all resources in use by the kernel instance.
50 void Shutdown(); 58 void Shutdown();
@@ -67,6 +75,12 @@ public:
67 /// Retrieves a const pointer to the current process. 75 /// Retrieves a const pointer to the current process.
68 const Process* CurrentProcess() const; 76 const Process* CurrentProcess() const;
69 77
78 /// Provides a reference to the kernel's address arbiter.
79 Kernel::AddressArbiter& AddressArbiter();
80
81 /// Provides a const reference to the kernel's address arbiter.
82 const Kernel::AddressArbiter& AddressArbiter() const;
83
70 /// Adds a port to the named port table 84 /// Adds a port to the named port table
71 void AddNamedPort(std::string name, SharedPtr<ClientPort> port); 85 void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
72 86
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..8009150e0 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
99 vm_manager.Reset(metadata.GetAddressSpaceType()); 99 vm_manager.Reset(metadata.GetAddressSpaceType());
100 100
101 const auto& caps = metadata.GetKernelCapabilities(); 101 const auto& caps = metadata.GetKernelCapabilities();
102 return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); 102 const auto capability_init_result =
103 capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
104 if (capability_init_result.IsError()) {
105 return capability_init_result;
106 }
107
108 return handle_table.SetSize(capabilities.GetHandleTableSize());
103} 109}
104 110
105void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { 111void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
96 interrupt_capabilities.set(); 96 interrupt_capabilities.set();
97 97
98 // Allow using the maximum possible amount of handles 98 // Allow using the maximum possible amount of handles
99 handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); 99 handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
100 100
101 // Allow all debugging capabilities. 101 // Allow all debugging capabilities.
102 is_debuggable = true; 102 is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
337 return ERR_RESERVED_VALUE; 337 return ERR_RESERVED_VALUE;
338 } 338 }
339 339
340 handle_table_size = (flags >> 16) & 0x3FF; 340 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
341 return RESULT_SUCCESS; 341 return RESULT_SUCCESS;
342} 342}
343 343
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
156 } 156 }
157 157
158 /// Gets the number of total allowable handles for the process' handle table. 158 /// Gets the number of total allowable handles for the process' handle table.
159 u32 GetHandleTableSize() const { 159 s32 GetHandleTableSize() const {
160 return handle_table_size; 160 return handle_table_size;
161 } 161 }
162 162
@@ -252,7 +252,7 @@ private:
252 u64 core_mask = 0; 252 u64 core_mask = 0;
253 u64 priority_mask = 0; 253 u64 priority_mask = 0;
254 254
255 u32 handle_table_size = 0; 255 s32 handle_table_size = 0;
256 u32 kernel_version = 0; 256 u32 kernel_version = 0;
257 257
258 ProgramType program_type = ProgramType::SysModule; 258 ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..4d8a337a7 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 63 pending_requesting_threads.pop_back();
64} 64}
65 65
66void ServerSession::ClientDisconnected() {
67 // We keep a shared pointer to the hle handler to keep it alive throughout
68 // the call to ClientDisconnected, as ClientDisconnected invalidates the
69 // hle_handler member itself during the course of the function executing.
70 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
71 if (handler) {
72 // Note that after this returns, this server session's hle_handler is
73 // invalidated (set to null).
74 handler->ClientDisconnected(this);
75 }
76
77 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
78 // their WaitSynchronization result to 0xC920181A.
79
80 // Clean up the list of client threads with pending requests, they are unneeded now that the
81 // client endpoint is closed.
82 pending_requesting_threads.clear();
83 currently_handling = nullptr;
84}
85
86void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
87 domain_request_handlers.push_back(std::move(handler));
88}
89
90std::size_t ServerSession::NumDomainRequestHandlers() const {
91 return domain_request_handlers.size();
92}
93
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 94ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 95 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 96 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 97 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 98
71 99 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 100 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 101
74 switch (domain_message_header->command) { 102 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 103 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 104 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 105 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 106 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 107 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 108 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 109 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 110 "to {} needed to return a new interface!",
83 } 111 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 112 UNREACHABLE();
85 113 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 114 }
115 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
96 116
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 117 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
98 static_cast<int>(domain_message_header->command.Value())); 118 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
99 ASSERT(false); 119
120 domain_request_handlers[object_id - 1] = nullptr;
121
122 IPC::ResponseBuilder rb{context, 2};
123 rb.Push(RESULT_SUCCESS);
124 return RESULT_SUCCESS;
125 }
100 } 126 }
101 127
128 LOG_CRITICAL(IPC, "Unknown domain command={}",
129 static_cast<int>(domain_message_header.command.Value()));
130 ASSERT(false);
102 return RESULT_SUCCESS; 131 return RESULT_SUCCESS;
103} 132}
104 133
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..aea4ccfeb 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
46 return HANDLE_TYPE; 46 return HANDLE_TYPE;
47 } 47 }
48 48
49 Session* GetParent() {
50 return parent.get();
51 }
52
53 const Session* GetParent() const {
54 return parent.get();
55 }
56
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 57 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 58
51 /** 59 /**
@@ -78,23 +86,16 @@ public:
78 86
79 void Acquire(Thread* thread) override; 87 void Acquire(Thread* thread) override;
80 88
81 std::string name; ///< The name of this session (optional) 89 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 90 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 91
86 /// This is the list of domain request handlers (after conversion to a domain) 92 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 93 /// this ServerSession instance.
88 94 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
89 /// List of threads that are pending a response after a sync request. This list is processed in
90 /// a LIFO manner, thus, the last request will be dispatched first.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93 95
94 /// Thread whose request is currently being handled. A request is considered "handled" when a 96 /// Retrieves the total number of domain request handlers that have been
95 /// response is sent via svcReplyAndReceive. 97 /// appended to this ServerSession instance.
96 /// TODO(Subv): Find a better name for this. 98 std::size_t NumDomainRequestHandlers() const;
97 SharedPtr<Thread> currently_handling;
98 99
99 /// Returns true if the session has been converted to a domain, otherwise False 100 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 101 bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
129 /// object handle. 130 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 131 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 132
133 /// The parent session, which links to the client endpoint.
134 std::shared_ptr<Session> parent;
135
136 /// This session's HLE request handler (applicable when not a domain)
137 std::shared_ptr<SessionRequestHandler> hle_handler;
138
139 /// This is the list of domain request handlers (after conversion to a domain)
140 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
141
142 /// List of threads that are pending a response after a sync request. This list is processed in
143 /// a LIFO manner, thus, the last request will be dispatched first.
144 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
145 std::vector<SharedPtr<Thread>> pending_requesting_threads;
146
147 /// Thread whose request is currently being handled. A request is considered "handled" when a
148 /// response is sent via svcReplyAndReceive.
149 /// TODO(Subv): Find a better name for this.
150 SharedPtr<Thread> currently_handling;
151
132 /// When set to True, converts the session to a domain at the end of the command 152 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 153 bool convert_to_domain{};
154
155 /// The name of this session (optional)
156 std::string name;
134}; 157};
135 158
136} // namespace Kernel 159} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..62861da36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 33 shared_memory->backing_block_offset = 0;
35 34
36 // Refresh the address mappings for the current process. 35 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 36 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 37 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 38 shared_memory->backing_block.get());
40 } 39 }
41 } else { 40 } else {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c5d399bab..7f5c0cc86 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address; 48 return address + size > address;
48} 49}
49 50
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// 8 GiB 51// 8 GiB
68constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; 52constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
69 53
@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 return ERR_INVALID_ADDRESS_STATE; 89 return ERR_INVALID_ADDRESS_STATE;
106 } 90 }
107 91
108 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { 92 if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
109 LOG_ERROR(Kernel_SVC, 93 LOG_ERROR(Kernel_SVC,
110 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 94 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
111 src_addr, size); 95 src_addr, size);
112 return ERR_INVALID_ADDRESS_STATE; 96 return ERR_INVALID_ADDRESS_STATE;
113 } 97 }
114 98
115 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { 99 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
116 LOG_ERROR(Kernel_SVC, 100 LOG_ERROR(Kernel_SVC,
117 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 101 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
118 dst_addr, size); 102 dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
238 auto* const current_process = Core::CurrentProcess(); 222 auto* const current_process = Core::CurrentProcess();
239 auto& vm_manager = current_process->VMManager(); 223 auto& vm_manager = current_process->VMManager();
240 224
241 if (!IsInsideAddressSpace(vm_manager, addr, size)) { 225 if (!vm_manager.IsWithinAddressSpace(addr, size)) {
242 LOG_ERROR(Kernel_SVC, 226 LOG_ERROR(Kernel_SVC,
243 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 227 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
244 size); 228 size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
299 } 283 }
300 284
301 auto& vm_manager = Core::CurrentProcess()->VMManager(); 285 auto& vm_manager = Core::CurrentProcess()->VMManager();
302 if (!IsInsideAddressSpace(vm_manager, address, size)) { 286 if (!vm_manager.IsWithinAddressSpace(address, size)) {
303 LOG_ERROR(Kernel_SVC, 287 LOG_ERROR(Kernel_SVC,
304 "Given address (0x{:016X}) is outside the bounds of the address space.", address); 288 "Given address (0x{:016X}) is outside the bounds of the address space.", address);
305 return ERR_INVALID_ADDRESS_STATE; 289 return ERR_INVALID_ADDRESS_STATE;
@@ -1495,13 +1479,14 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
1495 return ERR_INVALID_ADDRESS; 1479 return ERR_INVALID_ADDRESS;
1496 } 1480 }
1497 1481
1482 auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
1498 switch (static_cast<AddressArbiter::ArbitrationType>(type)) { 1483 switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
1499 case AddressArbiter::ArbitrationType::WaitIfLessThan: 1484 case AddressArbiter::ArbitrationType::WaitIfLessThan:
1500 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); 1485 return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, false);
1501 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: 1486 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
1502 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true); 1487 return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, true);
1503 case AddressArbiter::ArbitrationType::WaitIfEqual: 1488 case AddressArbiter::ArbitrationType::WaitIfEqual:
1504 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout); 1489 return address_arbiter.WaitForAddressIfEqual(address, value, timeout);
1505 default: 1490 default:
1506 LOG_ERROR(Kernel_SVC, 1491 LOG_ERROR(Kernel_SVC,
1507 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan " 1492 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
@@ -1526,13 +1511,14 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
1526 return ERR_INVALID_ADDRESS; 1511 return ERR_INVALID_ADDRESS;
1527 } 1512 }
1528 1513
1514 auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
1529 switch (static_cast<AddressArbiter::SignalType>(type)) { 1515 switch (static_cast<AddressArbiter::SignalType>(type)) {
1530 case AddressArbiter::SignalType::Signal: 1516 case AddressArbiter::SignalType::Signal:
1531 return AddressArbiter::SignalToAddress(address, num_to_wake); 1517 return address_arbiter.SignalToAddress(address, num_to_wake);
1532 case AddressArbiter::SignalType::IncrementAndSignalIfEqual: 1518 case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
1533 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake); 1519 return address_arbiter.IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
1534 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual: 1520 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
1535 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, 1521 return address_arbiter.ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
1536 num_to_wake); 1522 num_to_wake);
1537 default: 1523 default:
1538 LOG_ERROR(Kernel_SVC, 1524 LOG_ERROR(Kernel_SVC,
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6661e2130..eb54d6651 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
184 return ERR_INVALID_PROCESSOR_ID; 184 return ERR_INVALID_PROCESSOR_ID;
185 } 185 }
186 186
187 // TODO(yuriks): Other checks, returning 0xD9001BEA
188
189 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { 187 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
190 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 188 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
191 // TODO (bunnei): Find the correct error code to use here 189 // TODO (bunnei): Find the correct error code to use here
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..05c59af34 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -17,8 +17,8 @@
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
20 20namespace {
21static const char* GetMemoryStateName(MemoryState state) { 21const char* GetMemoryStateName(MemoryState state) {
22 static constexpr const char* names[] = { 22 static constexpr const char* names[] = {
23 "Unmapped", "Io", 23 "Unmapped", "Io",
24 "Normal", "CodeStatic", 24 "Normal", "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
35 return names[ToSvcMemoryState(state)]; 35 return names[ToSvcMemoryState(state)];
36} 36}
37 37
38// Checks if a given address range lies within a larger address range.
39constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
40 VAddr address_range_end) {
41 const VAddr end_address = address + size - 1;
42 return address_range_begin <= address && end_address <= address_range_end - 1;
43}
44} // Anonymous namespace
45
38bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 46bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
39 ASSERT(base + size == next.base); 47 ASSERT(base + size == next.base);
40 if (permissions != next.permissions || state != next.state || attribute != next.attribute || 48 if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
249} 257}
250 258
251ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { 259ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
252 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 260 if (!IsWithinHeapRegion(target, size)) {
253 target + size < target) {
254 return ERR_INVALID_ADDRESS; 261 return ERR_INVALID_ADDRESS;
255 } 262 }
256 263
@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
285} 292}
286 293
287ResultCode VMManager::HeapFree(VAddr target, u64 size) { 294ResultCode VMManager::HeapFree(VAddr target, u64 size) {
288 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 295 if (!IsWithinHeapRegion(target, size)) {
289 target + size < target) {
290 return ERR_INVALID_ADDRESS; 296 return ERR_INVALID_ADDRESS;
291 } 297 }
292 298
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
706 return address_space_width; 712 return address_space_width;
707} 713}
708 714
715bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
716 return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
717 GetAddressSpaceEndAddress());
718}
719
709VAddr VMManager::GetASLRRegionBaseAddress() const { 720VAddr VMManager::GetASLRRegionBaseAddress() const {
710 return aslr_region_base; 721 return aslr_region_base;
711} 722}
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
750 return code_region_end - code_region_base; 761 return code_region_end - code_region_base;
751} 762}
752 763
764bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
765 return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
766 GetCodeRegionEndAddress());
767}
768
753VAddr VMManager::GetHeapRegionBaseAddress() const { 769VAddr VMManager::GetHeapRegionBaseAddress() const {
754 return heap_region_base; 770 return heap_region_base;
755} 771}
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
762 return heap_region_end - heap_region_base; 778 return heap_region_end - heap_region_base;
763} 779}
764 780
781bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
782 return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
783 GetHeapRegionEndAddress());
784}
785
765VAddr VMManager::GetMapRegionBaseAddress() const { 786VAddr VMManager::GetMapRegionBaseAddress() const {
766 return map_region_base; 787 return map_region_base;
767} 788}
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
774 return map_region_end - map_region_base; 795 return map_region_end - map_region_base;
775} 796}
776 797
798bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
799 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
800}
801
777VAddr VMManager::GetNewMapRegionBaseAddress() const { 802VAddr VMManager::GetNewMapRegionBaseAddress() const {
778 return new_map_region_base; 803 return new_map_region_base;
779} 804}
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
786 return new_map_region_end - new_map_region_base; 811 return new_map_region_end - new_map_region_base;
787} 812}
788 813
814bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
815 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
816 GetNewMapRegionEndAddress());
817}
818
789VAddr VMManager::GetTLSIORegionBaseAddress() const { 819VAddr VMManager::GetTLSIORegionBaseAddress() const {
790 return tls_io_region_base; 820 return tls_io_region_base;
791} 821}
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
798 return tls_io_region_end - tls_io_region_base; 828 return tls_io_region_end - tls_io_region_base;
799} 829}
800 830
831bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
832 return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
833 GetTLSIORegionEndAddress());
834}
835
801} // namespace Kernel 836} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..88e0b3c02 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -432,18 +432,21 @@ public:
432 /// Gets the address space width in bits. 432 /// Gets the address space width in bits.
433 u64 GetAddressSpaceWidth() const; 433 u64 GetAddressSpaceWidth() const;
434 434
435 /// Determines whether or not the given address range lies within the address space.
436 bool IsWithinAddressSpace(VAddr address, u64 size) const;
437
435 /// Gets the base address of the ASLR region. 438 /// Gets the base address of the ASLR region.
436 VAddr GetASLRRegionBaseAddress() const; 439 VAddr GetASLRRegionBaseAddress() const;
437 440
438 /// Gets the end address of the ASLR region. 441 /// Gets the end address of the ASLR region.
439 VAddr GetASLRRegionEndAddress() const; 442 VAddr GetASLRRegionEndAddress() const;
440 443
441 /// Determines whether or not the specified address range is within the ASLR region.
442 bool IsWithinASLRRegion(VAddr address, u64 size) const;
443
444 /// Gets the size of the ASLR region 444 /// Gets the size of the ASLR region
445 u64 GetASLRRegionSize() const; 445 u64 GetASLRRegionSize() const;
446 446
447 /// Determines whether or not the specified address range is within the ASLR region.
448 bool IsWithinASLRRegion(VAddr address, u64 size) const;
449
447 /// Gets the base address of the code region. 450 /// Gets the base address of the code region.
448 VAddr GetCodeRegionBaseAddress() const; 451 VAddr GetCodeRegionBaseAddress() const;
449 452
@@ -453,6 +456,9 @@ public:
453 /// Gets the total size of the code region in bytes. 456 /// Gets the total size of the code region in bytes.
454 u64 GetCodeRegionSize() const; 457 u64 GetCodeRegionSize() const;
455 458
459 /// Determines whether or not the specified range is within the code region.
460 bool IsWithinCodeRegion(VAddr address, u64 size) const;
461
456 /// Gets the base address of the heap region. 462 /// Gets the base address of the heap region.
457 VAddr GetHeapRegionBaseAddress() const; 463 VAddr GetHeapRegionBaseAddress() const;
458 464
@@ -462,6 +468,9 @@ public:
462 /// Gets the total size of the heap region in bytes. 468 /// Gets the total size of the heap region in bytes.
463 u64 GetHeapRegionSize() const; 469 u64 GetHeapRegionSize() const;
464 470
471 /// Determines whether or not the specified range is within the heap region.
472 bool IsWithinHeapRegion(VAddr address, u64 size) const;
473
465 /// Gets the base address of the map region. 474 /// Gets the base address of the map region.
466 VAddr GetMapRegionBaseAddress() const; 475 VAddr GetMapRegionBaseAddress() const;
467 476
@@ -471,6 +480,9 @@ public:
471 /// Gets the total size of the map region in bytes. 480 /// Gets the total size of the map region in bytes.
472 u64 GetMapRegionSize() const; 481 u64 GetMapRegionSize() const;
473 482
483 /// Determines whether or not the specified range is within the map region.
484 bool IsWithinMapRegion(VAddr address, u64 size) const;
485
474 /// Gets the base address of the new map region. 486 /// Gets the base address of the new map region.
475 VAddr GetNewMapRegionBaseAddress() const; 487 VAddr GetNewMapRegionBaseAddress() const;
476 488
@@ -480,6 +492,9 @@ public:
480 /// Gets the total size of the new map region in bytes. 492 /// Gets the total size of the new map region in bytes.
481 u64 GetNewMapRegionSize() const; 493 u64 GetNewMapRegionSize() const;
482 494
495 /// Determines whether or not the given address range is within the new map region
496 bool IsWithinNewMapRegion(VAddr address, u64 size) const;
497
483 /// Gets the base address of the TLS IO region. 498 /// Gets the base address of the TLS IO region.
484 VAddr GetTLSIORegionBaseAddress() const; 499 VAddr GetTLSIORegionBaseAddress() const;
485 500
@@ -489,6 +504,9 @@ public:
489 /// Gets the total size of the TLS IO region in bytes. 504 /// Gets the total size of the TLS IO region in bytes.
490 u64 GetTLSIORegionSize() const; 505 u64 GetTLSIORegionSize() const;
491 506
507 /// Determines if the given address range is within the TLS IO region.
508 bool IsWithinTLSIORegion(VAddr address, u64 size) const;
509
492 /// Each VMManager has its own page table, which is set as the main one when the owning process 510 /// Each VMManager has its own page table, which is set as the main one when the owning process
493 /// is scheduled. 511 /// is scheduled.
494 Memory::PageTable page_table; 512 Memory::PageTable page_table;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..1ed144481 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,7 +8,6 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 6831c0735..21f5e64c7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
18#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
20#include "core/hle/service/audio/audout_u.h" 20#include "core/hle/service/audio/audout_u.h"
21#include "core/hle/service/audio/errors.h"
21#include "core/memory.h" 22#include "core/memory.h"
22 23
23namespace Service::Audio { 24namespace Service::Audio {
24 25
25namespace ErrCodes {
26enum {
27 ErrorUnknown = 2,
28 BufferCountExceeded = 8,
29};
30}
31
32constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; 26constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
33constexpr int DefaultSampleRate{48000}; 27constexpr int DefaultSampleRate{48000};
34 28
@@ -100,7 +94,7 @@ private:
100 94
101 if (stream->IsPlaying()) { 95 if (stream->IsPlaying()) {
102 IPC::ResponseBuilder rb{ctx, 2}; 96 IPC::ResponseBuilder rb{ctx, 2};
103 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); 97 rb.Push(ERR_OPERATION_FAILED);
104 return; 98 return;
105 } 99 }
106 100
@@ -113,7 +107,9 @@ private:
113 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
114 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
115 109
116 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
117 113
118 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
143 139
144 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { 140 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
145 IPC::ResponseBuilder rb{ctx, 2}; 141 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); 142 rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
143 return;
147 } 144 }
148 145
149 IPC::ResponseBuilder rb{ctx, 2}; 146 IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7e0cc64a8..c9de10a24 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
17#include "core/hle/kernel/readable_event.h" 17#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/audio/audren_u.h" 19#include "core/hle/service/audio/audren_u.h"
20#include "core/hle/service/audio/errors.h"
20 21
21namespace Service::Audio { 22namespace Service::Audio {
22 23
@@ -37,7 +38,7 @@ public:
37 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, 38 {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
38 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, 39 {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
39 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, 40 {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
40 {11, nullptr, "ExecuteAudioRendererRendering"}, 41 {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
41 }; 42 };
42 // clang-format on 43 // clang-format on
43 RegisterHandlers(functions); 44 RegisterHandlers(functions);
@@ -138,6 +139,17 @@ private:
138 rb.Push(rendering_time_limit_percent); 139 rb.Push(rendering_time_limit_percent);
139 } 140 }
140 141
142 void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
143 LOG_DEBUG(Service_Audio, "called");
144
145 // This service command currently only reports an unsupported operation
146 // error code, or aborts. Given that, we just always return an error
147 // code in this case.
148
149 IPC::ResponseBuilder rb{ctx, 2};
150 rb.Push(ERR_NOT_SUPPORTED);
151 }
152
141 Kernel::EventPair system_event; 153 Kernel::EventPair system_event;
142 std::unique_ptr<AudioCore::AudioRenderer> renderer; 154 std::unique_ptr<AudioCore::AudioRenderer> renderer;
143 u32 rendering_time_limit_percent = 100; 155 u32 rendering_time_limit_percent = 100;
@@ -235,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
235 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 247 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
236 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, 248 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
237 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, 249 {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
238 {3, nullptr, "OpenAudioRendererAuto"}, 250 {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
239 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, 251 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
240 }; 252 };
241 // clang-format on 253 // clang-format on
@@ -248,12 +260,7 @@ AudRenU::~AudRenU() = default;
248void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { 260void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
249 LOG_DEBUG(Service_Audio, "called"); 261 LOG_DEBUG(Service_Audio, "called");
250 262
251 IPC::RequestParser rp{ctx}; 263 OpenAudioRendererImpl(ctx);
252 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
253 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
254
255 rb.Push(RESULT_SUCCESS);
256 rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
257} 264}
258 265
259void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 266void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -262,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
262 LOG_DEBUG(Service_Audio, "called"); 269 LOG_DEBUG(Service_Audio, "called");
263 270
264 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); 271 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
265 buffer_sz += params.unknown_c * 1024; 272 buffer_sz += params.submix_count * 1024;
266 buffer_sz += 0x940 * (params.unknown_c + 1); 273 buffer_sz += 0x940 * (params.submix_count + 1);
267 buffer_sz += 0x3F0 * params.voice_count; 274 buffer_sz += 0x3F0 * params.voice_count;
268 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); 275 buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
269 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 276 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
270 buffer_sz += 277 buffer_sz += Common::AlignUp(
271 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 278 (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
272 (params.mix_buffer_count + 6), 279 (params.mix_buffer_count + 6),
273 0x40); 280 0x40);
274 281
275 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 282 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
276 u32 count = params.unknown_c + 1; 283 const u32 count = params.submix_count + 1;
277 u64 node_count = Common::AlignUp(count, 0x40); 284 u64 node_count = Common::AlignUp(count, 0x40);
278 u64 node_state_buffer_sz = 285 const u64 node_state_buffer_sz =
279 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 286 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
280 u64 edge_matrix_buffer_sz = 0; 287 u64 edge_matrix_buffer_sz = 0;
281 node_count = Common::AlignUp(count * count, 0x40); 288 node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
289 296
290 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 297 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
291 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 298 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
292 buffer_sz += 0xE0 * params.unknown_2c; 299 buffer_sz += 0xE0 * params.num_splitter_send_channels;
293 buffer_sz += 0x20 * params.splitter_count; 300 buffer_sz += 0x20 * params.splitter_count;
294 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); 301 buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
295 } 302 }
296 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 303 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
297 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 304 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
298 ((params.voice_count * 256) | 0x40); 305 ((params.voice_count * 256) | 0x40);
299 306
300 if (params.unknown_1c >= 1) { 307 if (params.performance_frame_count >= 1) {
301 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 308 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
302 16 * params.voice_count + 16) + 309 16 * params.voice_count + 16) +
303 0x658) * 310 0x658) *
304 (params.unknown_1c + 1) + 311 (params.performance_frame_count + 1) +
305 0xc0, 312 0xc0,
306 0x40) + 313 0x40) +
307 output_sz; 314 output_sz;
@@ -325,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
325 rb.PushIpcInterface<Audio::IAudioDevice>(); 332 rb.PushIpcInterface<Audio::IAudioDevice>();
326} 333}
327 334
335void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
336 LOG_DEBUG(Service_Audio, "called");
337
338 OpenAudioRendererImpl(ctx);
339}
340
328void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 341void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
329 LOG_WARNING(Service_Audio, "(STUBBED) called"); 342 LOG_WARNING(Service_Audio, "(STUBBED) called");
330 343
@@ -335,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
335 // based on the current revision 348 // based on the current revision
336} 349}
337 350
351void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
352 IPC::RequestParser rp{ctx};
353 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
354 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
355
356 rb.Push(RESULT_SUCCESS);
357 rb.PushIpcInterface<IAudioRenderer>(params);
358}
359
338bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 360bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
339 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 361 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
340 switch (feature) { 362 switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 3d63388fb..e55d25973 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,8 +21,11 @@ private:
21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx); 21 void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx); 23 void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
24 void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
24 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); 25 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
25 26
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28
26 enum class AudioFeatures : u32 { 29 enum class AudioFeatures : u32 {
27 Splitter, 30 Splitter,
28 }; 31 };
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Audio {
10
11constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
12constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
13constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
14
15} // namespace Service::Audio
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 6d897c842..7cc58db4c 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -15,7 +15,7 @@ namespace Kernel {
15class SharedMemory; 15class SharedMemory;
16} 16}
17 17
18namespace SM { 18namespace Service::SM {
19class ServiceManager; 19class ServiceManager;
20} 20}
21 21
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 21ccfe1f8..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
23 23
24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, 24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, 25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
26 const MathUtil::Rectangle<int>& crop_rect) { 26 const Common::Rectangle<int>& crop_rect) {
27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); 27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
28 LOG_TRACE(Service, 28 LOG_TRACE(Service,
29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..ace71169f 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 25 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
27 NVFlinger::BufferQueue::BufferTransformFlags transform, 27 NVFlinger::BufferQueue::BufferTransformFlags transform,
28 const MathUtil::Rectangle<int>& crop_rect); 28 const Common::Rectangle<int>& crop_rect);
29 29
30private: 30private:
31 std::shared_ptr<nvmap> nvmap_dev; 31 std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..a34b9e753 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
178 auto& gpu = system_instance.GPU(); 178 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); 179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr); 180 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); 181 gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
182 182
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184 184
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 63}
64 64
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const MathUtil::Rectangle<int>& crop_rect) { 66 const Common::Rectangle<int>& crop_rect) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 67 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 68 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 69 ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index ab90d591e..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -67,14 +67,14 @@ public:
67 Status status = Status::Free; 67 Status status = Status::Free;
68 IGBPBuffer igbp_buffer; 68 IGBPBuffer igbp_buffer;
69 BufferTransformFlags transform; 69 BufferTransformFlags transform;
70 MathUtil::Rectangle<int> crop_rect; 70 Common::Rectangle<int> crop_rect;
71 }; 71 };
72 72
73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
74 std::optional<u32> DequeueBuffer(u32 width, u32 height); 74 std::optional<u32> DequeueBuffer(u32 width, u32 height);
75 const IGBPBuffer& RequestBuffer(u32 slot) const; 75 const IGBPBuffer& RequestBuffer(u32 slot) const;
76 void QueueBuffer(u32 slot, BufferTransformFlags transform, 76 void QueueBuffer(u32 slot, BufferTransformFlags transform,
77 const MathUtil::Rectangle<int>& crop_rect); 77 const Common::Rectangle<int>& crop_rect);
78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
79 void ReleaseBuffer(u32 slot); 79 void ReleaseBuffer(u32 slot);
80 u32 Query(QueryType type); 80 u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 56f31e2ac..fc496b654 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {
186 186
187 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
188 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
189 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
190 continue; 190 continue;
191 } 191 }
192 192
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 74384a24d..a975767bb 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -420,7 +420,7 @@ public:
420 u32_le fence_is_valid; 420 u32_le fence_is_valid;
421 std::array<Fence, 2> fences; 421 std::array<Fence, 2> fences;
422 422
423 MathUtil::Rectangle<int> GetCropRect() const { 423 Common::Rectangle<int> GetCropRect() const {
424 return {crop_left, crop_top, crop_right, crop_bottom}; 424 return {crop_left, crop_top, crop_right, crop_bottom};
425 } 425 }
426 }; 426 };
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..6591c45d2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
71 FlushMode::FlushAndInvalidate); 71 FlushMode::FlushAndInvalidate);
72 72
73 VAddr end = base + size; 73 VAddr end = base + size;
74 while (base != end) { 74 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 75 base + page_table.pointers.size());
76 76
77 page_table.attributes[base] = type; 77 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
78 page_table.pointers[base] = memory;
79 78
80 base += 1; 79 if (memory == nullptr) {
81 if (memory != nullptr) 80 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
81 } else {
82 while (base != end) {
83 page_table.pointers[base] = memory;
84
85 base += 1;
82 memory += PAGE_SIZE; 86 memory += PAGE_SIZE;
87 }
83 } 88 }
84} 89}
85 90
@@ -166,9 +171,6 @@ T Read(const VAddr vaddr) {
166 return value; 171 return value;
167 } 172 }
168 173
169 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
170 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
171
172 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 174 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
173 switch (type) { 175 switch (type) {
174 case PageType::Unmapped: 176 case PageType::Unmapped:
@@ -199,9 +201,6 @@ void Write(const VAddr vaddr, const T data) {
199 return; 201 return;
200 } 202 }
201 203
202 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
203 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
204
205 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 204 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
206 switch (type) { 205 switch (type) {
207 case PageType::Unmapped: 206 case PageType::Unmapped:
@@ -357,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
357 const VAddr overlap_end = std::min(end, region_end); 356 const VAddr overlap_end = std::min(end, region_end);
358 const VAddr overlap_size = overlap_end - overlap_start; 357 const VAddr overlap_size = overlap_end - overlap_start;
359 358
360 auto& rasterizer = system_instance.Renderer().Rasterizer(); 359 auto& gpu = system_instance.GPU();
361 switch (mode) { 360 switch (mode) {
362 case FlushMode::Flush: 361 case FlushMode::Flush:
363 rasterizer.FlushRegion(overlap_start, overlap_size); 362 gpu.FlushRegion(overlap_start, overlap_size);
364 break; 363 break;
365 case FlushMode::Invalidate: 364 case FlushMode::Invalidate:
366 rasterizer.InvalidateRegion(overlap_start, overlap_size); 365 gpu.InvalidateRegion(overlap_start, overlap_size);
367 break; 366 break;
368 case FlushMode::FlushAndInvalidate: 367 case FlushMode::FlushAndInvalidate:
369 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); 368 gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
370 break; 369 break;
371 } 370 }
372 }; 371 };
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6dd3139cc 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 98 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 99 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 100 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index 7e76e0466..cdfb2f742 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
393 u16 frame_limit; 393 u16 frame_limit;
394 bool use_disk_shader_cache; 394 bool use_disk_shader_cache;
395 bool use_accurate_gpu_emulation; 395 bool use_accurate_gpu_emulation;
396 bool use_asynchronous_gpu_emulation;
396 397
397 float bg_red; 398 float bg_red;
398 float bg_green; 399 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 58dfcc4df..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
162 Settings::values.use_disk_shader_cache); 162 Settings::values.use_disk_shader_cache);
163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
164 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
166 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
167} 169}
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..6d96d4019 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
32 } 32 }
33 33
34 void BeginTilt(int x, int y) { 34 void BeginTilt(int x, int y) {
35 mouse_origin = Math::MakeVec(x, y); 35 mouse_origin = Common::MakeVec(x, y);
36 is_tilting = true; 36 is_tilting = true;
37 } 37 }
38 38
39 void Tilt(int x, int y) { 39 void Tilt(int x, int y) {
40 auto mouse_move = Math::MakeVec(x, y) - mouse_origin; 40 auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
41 if (is_tilting) { 41 if (is_tilting) {
42 std::lock_guard<std::mutex> guard(tilt_mutex); 42 std::lock_guard<std::mutex> guard(tilt_mutex);
43 if (mouse_move.x == 0 && mouse_move.y == 0) { 43 if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
45 } else { 45 } else {
46 tilt_direction = mouse_move.Cast<float>(); 46 tilt_direction = mouse_move.Cast<float>();
47 tilt_angle = 47 tilt_angle =
48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); 48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
49 } 49 }
50 } 50 }
51 } 51 }
@@ -56,7 +56,7 @@ public:
56 is_tilting = false; 56 is_tilting = false;
57 } 57 }
58 58
59 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { 59 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
60 std::lock_guard<std::mutex> guard(status_mutex); 60 std::lock_guard<std::mutex> guard(status_mutex);
61 return status; 61 return status;
62 } 62 }
@@ -66,17 +66,17 @@ private:
66 const std::chrono::steady_clock::duration update_duration; 66 const std::chrono::steady_clock::duration update_duration;
67 const float sensitivity; 67 const float sensitivity;
68 68
69 Math::Vec2<int> mouse_origin; 69 Common::Vec2<int> mouse_origin;
70 70
71 std::mutex tilt_mutex; 71 std::mutex tilt_mutex;
72 Math::Vec2<float> tilt_direction; 72 Common::Vec2<float> tilt_direction;
73 float tilt_angle = 0; 73 float tilt_angle = 0;
74 74
75 bool is_tilting = false; 75 bool is_tilting = false;
76 76
77 Common::Event shutdown_event; 77 Common::Event shutdown_event;
78 78
79 std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; 79 std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
80 std::mutex status_mutex; 80 std::mutex status_mutex;
81 81
82 // Note: always keep the thread declaration at the end so that other objects are initialized 82 // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:
85 85
86 void MotionEmuThread() { 86 void MotionEmuThread() {
87 auto update_time = std::chrono::steady_clock::now(); 87 auto update_time = std::chrono::steady_clock::now();
88 Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); 88 Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
89 Math::Quaternion<float> old_q; 89 Common::Quaternion<float> old_q;
90 90
91 while (!shutdown_event.WaitUntil(update_time)) { 91 while (!shutdown_event.WaitUntil(update_time)) {
92 update_time += update_duration; 92 update_time += update_duration;
@@ -96,18 +96,18 @@ private:
96 std::lock_guard<std::mutex> guard(tilt_mutex); 96 std::lock_guard<std::mutex> guard(tilt_mutex);
97 97
98 // Find the quaternion describing current 3DS tilting 98 // Find the quaternion describing current 3DS tilting
99 q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), 99 q = Common::MakeQuaternion(
100 tilt_angle); 100 Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
101 } 101 }
102 102
103 auto inv_q = q.Inverse(); 103 auto inv_q = q.Inverse();
104 104
105 // Set the gravity vector in world space 105 // Set the gravity vector in world space
106 auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); 106 auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
107 107
108 // Find the angular rate vector in world space 108 // Find the angular rate vector in world space
109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2; 109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
110 angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; 110 angular_rate *= 1000 / update_millisecond / Common::PI * 180;
111 111
112 // Transform the two vectors from world space to 3DS space 112 // Transform the two vectors from world space to 3DS space
113 gravity = QuaternionRotate(inv_q, gravity); 113 gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); 131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
132 } 132 }
133 133
134 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { 134 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
135 return device->GetStatus(); 135 return device->GetStatus();
136 } 136 }
137 137
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..ea27ef90d 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -13,11 +13,11 @@
13namespace ArmTests { 13namespace ArmTests {
14 14
15TestEnvironment::TestEnvironment(bool mutable_memory_) 15TestEnvironment::TestEnvironment(bool mutable_memory_)
16 : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { 16 : mutable_memory(mutable_memory_),
17 17 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
18 auto process = Kernel::Process::Create(kernel, ""); 18 auto process = Kernel::Process::Create(kernel, "");
19 kernel.MakeCurrentProcess(process.get()); 19 kernel.MakeCurrentProcess(process.get());
20 page_table = &Core::CurrentProcess()->VMManager().page_table; 20 page_table = &process->VMManager().page_table;
21 21
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 23 page_table->special_regions.clear();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 661a113fb..0c3038c52 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -95,6 +101,8 @@ add_library(video_core STATIC
95 surface.h 101 surface.h
96 textures/astc.cpp 102 textures/astc.cpp
97 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
98 textures/decoders.cpp 106 textures/decoders.cpp
99 textures/decoders.h 107 textures/decoders.h
100 textures/texture.h 108 textures/texture.h
@@ -105,6 +113,10 @@ add_library(video_core STATIC
105if (ENABLE_VULKAN) 113if (ENABLE_VULKAN)
106 target_sources(video_core PRIVATE 114 target_sources(video_core PRIVATE
107 renderer_vulkan/declarations.h 115 renderer_vulkan/declarations.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
118 renderer_vulkan/vk_buffer_cache.cpp
119 renderer_vulkan/vk_buffer_cache.h
108 renderer_vulkan/vk_device.cpp 120 renderer_vulkan/vk_device.cpp
109 renderer_vulkan/vk_device.h 121 renderer_vulkan/vk_device.h
110 renderer_vulkan/vk_memory_manager.cpp 122 renderer_vulkan/vk_memory_manager.cpp
@@ -112,7 +124,9 @@ if (ENABLE_VULKAN)
112 renderer_vulkan/vk_resource_manager.cpp 124 renderer_vulkan/vk_resource_manager.cpp
113 renderer_vulkan/vk_resource_manager.h 125 renderer_vulkan/vk_resource_manager.h
114 renderer_vulkan/vk_scheduler.cpp 126 renderer_vulkan/vk_scheduler.cpp
115 renderer_vulkan/vk_scheduler.h) 127 renderer_vulkan/vk_scheduler.h
128 renderer_vulkan/vk_stream_buffer.cpp
129 renderer_vulkan/vk_stream_buffer.h)
116 130
117 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) 131 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
118 target_compile_definitions(video_core PRIVATE HAS_VULKAN) 132 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 669541b4b..bff1a37ff 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
39 } 39 }
40 40
41 const CommandList& command_list{dma_pushbuffer.front()}; 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 42 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr; 43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); 44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main; 45 bool non_main = command_list_header.is_non_main;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ec1a57226..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "common/assert.h"
6#include "core/memory.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
7#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10#include "video_core/textures/decoders.h"
11 10
12namespace Tegra::Engines { 11namespace Tegra::Engines {
13 12
@@ -44,10 +43,10 @@ void Fermi2D::HandleSurfaceCopy() {
44 const u32 src_blit_y2{ 43 const u32 src_blit_y2{
45 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; 44 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
46 45
47 const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; 46 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
48 const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, 47 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
49 regs.blit_dst_x + regs.blit_dst_width, 48 regs.blit_dst_x + regs.blit_dst_width,
50 regs.blit_dst_y + regs.blit_dst_height}; 49 regs.blit_dst_y + regs.blit_dst_height};
51 50
52 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
53 UNIMPLEMENTED(); 52 UNIMPLEMENTED();
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index c69f74cc5..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4ca856b6b..b1d950460 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h"
7#include "core/memory.h"
8#include "video_core/engines/kepler_compute.h" 7#include "video_core/engines/kepler_compute.h"
9#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
10 9
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index df0a32e0f..6575afd0f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h"
10#include "common/common_funcs.h" 9#include "common/common_funcs.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4f6126116..aae2a4019 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
48 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 48 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
49 // We do this before actually writing the new data because the destination address might contain 49 // We do this before actually writing the new data because the destination address might contain
50 // a dirty surface that will have to be written back to memory. 50 // a dirty surface that will have to be written back to memory.
51 rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); 51 Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
52 52
53 Memory::Write32(*dest_address, data); 53 Memory::Write32(*dest_address, data);
54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f680c2ad9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstddef>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d2136067..144e7fa82 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 auto debug_context = system.GetGPUDebugContext(); 108 auto debug_context = system.GetGPUDebugContext();
109 109
110 const u32 method = method_call.method;
111
110 // It is an error to write to a register other than the current macro's ARG register before it 112 // It is an error to write to a register other than the current macro's ARG register before it
111 // has finished execution. 113 // has finished execution.
112 if (executing_macro != 0) { 114 if (executing_macro != 0) {
113 ASSERT(method_call.method == executing_macro + 1); 115 ASSERT(method == executing_macro + 1);
114 } 116 }
115 117
116 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 118 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
117 // uploaded to the GPU during initialization. 119 // uploaded to the GPU during initialization.
118 if (method_call.method >= MacroRegistersStart) { 120 if (method >= MacroRegistersStart) {
119 // We're trying to execute a macro 121 // We're trying to execute a macro
120 if (executing_macro == 0) { 122 if (executing_macro == 0) {
121 // A macro call must begin by writing the macro method's register, not its argument. 123 // A macro call must begin by writing the macro method's register, not its argument.
122 ASSERT_MSG((method_call.method % 2) == 0, 124 ASSERT_MSG((method % 2) == 0,
123 "Can't start macro execution by writing to the ARGS register"); 125 "Can't start macro execution by writing to the ARGS register");
124 executing_macro = method_call.method; 126 executing_macro = method;
125 } 127 }
126 128
127 macro_params.push_back(method_call.argument); 129 macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
133 return; 135 return;
134 } 136 }
135 137
136 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 138 ASSERT_MSG(method < Regs::NUM_REGS,
137 "Invalid Maxwell3D register, increase the size of the Regs structure"); 139 "Invalid Maxwell3D register, increase the size of the Regs structure");
138 140
139 if (debug_context) { 141 if (debug_context) {
140 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 142 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
141 } 143 }
142 144
143 if (regs.reg_array[method_call.method] != method_call.argument) { 145 if (regs.reg_array[method] != method_call.argument) {
144 regs.reg_array[method_call.method] = method_call.argument; 146 regs.reg_array[method] = method_call.argument;
145 // Color buffers 147 // Color buffers
146 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 148 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
147 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 149 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
148 if (method_call.method >= first_rt_reg && 150 if (method >= first_rt_reg &&
149 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 151 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
150 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 152 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
151 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 153 dirty_flags.color_buffer.set(rt_index);
152 } 154 }
153 155
154 // Zeta buffer 156 // Zeta buffer
155 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 157 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
156 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 158 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
157 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 159 method == MAXWELL3D_REG_INDEX(zeta_width) ||
158 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 160 method == MAXWELL3D_REG_INDEX(zeta_height) ||
159 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 161 (method >= MAXWELL3D_REG_INDEX(zeta) &&
160 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 162 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
161 dirty_flags.zeta_buffer = true; 163 dirty_flags.zeta_buffer = true;
162 } 164 }
163 165
164 // Shader 166 // Shader
165 constexpr u32 shader_registers_count = 167 constexpr u32 shader_registers_count =
166 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 168 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
167 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 169 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
168 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 170 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
169 dirty_flags.shaders = true; 171 dirty_flags.shaders = true;
170 } 172 }
171 173
172 // Vertex format 174 // Vertex format
173 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 175 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
174 method_call.method < 176 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
175 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true; 177 dirty_flags.vertex_attrib_format = true;
177 } 178 }
178 179
179 // Vertex buffer 180 // Vertex buffer
180 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 181 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 182 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
182 dirty_flags.vertex_array |= 183 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 184 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 185 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
185 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 186 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 dirty_flags.vertex_array |= 187 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 188 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
188 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 189 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
190 dirty_flags.vertex_array |=
191 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
192 } 190 }
193 } 191 }
194 192
195 switch (method_call.method) { 193 switch (method) {
196 case MAXWELL3D_REG_INDEX(macros.data): { 194 case MAXWELL3D_REG_INDEX(macros.data): {
197 ProcessMacroUpload(method_call.argument); 195 ProcessMacroUpload(method_call.argument);
198 break; 196 break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0e3873ffd..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -503,7 +505,7 @@ public:
503 f32 translate_z; 505 f32 translate_z;
504 INSERT_PADDING_WORDS(2); 506 INSERT_PADDING_WORDS(2);
505 507
506 MathUtil::Rectangle<s32> GetRect() const { 508 Common::Rectangle<s32> GetRect() const {
507 return { 509 return {
508 GetX(), // left 510 GetX(), // left
509 GetY() + GetHeight(), // top 511 GetY() + GetHeight(), // top
@@ -1094,19 +1096,18 @@ public:
1094 MemoryManager& memory_manager; 1096 MemoryManager& memory_manager;
1095 1097
1096 struct DirtyFlags { 1098 struct DirtyFlags {
1097 u8 color_buffer = 0xFF; 1099 std::bitset<8> color_buffer{0xFF};
1098 bool zeta_buffer = true; 1100 std::bitset<32> vertex_array{0xFFFFFFFF};
1099
1100 bool shaders = true;
1101 1101
1102 bool vertex_attrib_format = true; 1102 bool vertex_attrib_format = true;
1103 u32 vertex_array = 0xFFFFFFFF; 1103 bool zeta_buffer = true;
1104 bool shaders = true;
1104 1105
1105 void OnMemoryWrite() { 1106 void OnMemoryWrite() {
1106 color_buffer = 0xFF;
1107 zeta_buffer = true; 1107 zeta_buffer = true;
1108 shaders = true; 1108 shaders = true;
1109 vertex_array = 0xFFFFFFFF; 1109 color_buffer.set();
1110 vertex_array.set();
1110 } 1111 }
1111 }; 1112 };
1112 1113
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 529a14ec7..9dfea5999 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
@@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
91 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 92 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
92 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 93 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
93 // copying. 94 // copying.
94 rasterizer.FlushRegion(*source_cpu, src_size); 95 Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
95 96
96 // We have to invalidate the destination region to evict any outdated surfaces from the 97 // We have to invalidate the destination region to evict any outdated surfaces from the
97 // cache. We do this before actually writing the new data because the destination address 98 // cache. We do this before actually writing the new data because the destination address
98 // might contain a dirty surface that will have to be written back to memory. 99 // might contain a dirty surface that will have to be written back to memory.
99 rasterizer.InvalidateRegion(*dest_cpu, dst_size); 100 Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
100 }; 101 };
101 102
102 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 103 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index cf75aeb12..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstddef>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index f62ae8801..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
6 6
7#include <bitset> 7#include <bitset>
8#include <optional> 8#include <optional>
9#include <string>
10#include <tuple> 9#include <tuple>
11#include <vector> 10#include <vector>
12 11
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ac30d1a89..08abf8ac9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
14#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 15#include "video_core/renderer_base.h"
16 16
17namespace Tegra { 17namespace Tegra {
18 18
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
28 UNREACHABLE(); 28 UNREACHABLE();
29} 29}
30 30
31GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { 31GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
32 auto& rasterizer{renderer.Rasterizer()};
32 memory_manager = std::make_unique<Tegra::MemoryManager>(); 33 memory_manager = std::make_unique<Tegra::MemoryManager>();
33 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 34 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
34 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 35 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 0f5bfdcbf..56a203275 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,8 +16,8 @@ class System;
16} 16}
17 17
18namespace VideoCore { 18namespace VideoCore {
19class RasterizerInterface; 19class RendererBase;
20} 20} // namespace VideoCore
21 21
22namespace Tegra { 22namespace Tegra {
23 23
@@ -100,7 +100,7 @@ struct FramebufferConfig {
100 100
101 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; 101 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
102 TransformFlags transform_flags; 102 TransformFlags transform_flags;
103 MathUtil::Rectangle<int> crop_rect; 103 Common::Rectangle<int> crop_rect;
104}; 104};
105 105
106namespace Engines { 106namespace Engines {
@@ -119,10 +119,11 @@ enum class EngineID {
119 MAXWELL_DMA_COPY_A = 0xB0B5, 119 MAXWELL_DMA_COPY_A = 0xB0B5,
120}; 120};
121 121
122class GPU final { 122class GPU {
123public: 123public:
124 explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); 124 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
125 ~GPU(); 125
126 virtual ~GPU();
126 127
127 struct MethodCall { 128 struct MethodCall {
128 u32 method{}; 129 u32 method{};
@@ -200,8 +201,42 @@ public:
200 }; 201 };
201 } regs{}; 202 } regs{};
202 203
204 /// Push GPU command entries to be processed
205 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
206
207 /// Swap buffers (render frame)
208 virtual void SwapBuffers(
209 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
210
211 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
212 virtual void FlushRegion(VAddr addr, u64 size) = 0;
213
214 /// Notify rasterizer that any caches of the specified region should be invalidated
215 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
216
217 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
218 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
219
203private: 220private:
221 void ProcessBindMethod(const MethodCall& method_call);
222 void ProcessSemaphoreTriggerMethod();
223 void ProcessSemaphoreRelease();
224 void ProcessSemaphoreAcquire();
225
226 /// Calls a GPU puller method.
227 void CallPullerMethod(const MethodCall& method_call);
228
229 /// Calls a GPU engine method.
230 void CallEngineMethod(const MethodCall& method_call);
231
232 /// Determines where the method should be executed.
233 bool ExecuteMethodOnEngine(const MethodCall& method_call);
234
235protected:
204 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 236 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
237 VideoCore::RendererBase& renderer;
238
239private:
205 std::unique_ptr<Tegra::MemoryManager> memory_manager; 240 std::unique_ptr<Tegra::MemoryManager> memory_manager;
206 241
207 /// Mapping of command subchannels to their bound engine ids. 242 /// Mapping of command subchannels to their bound engine ids.
@@ -217,18 +252,6 @@ private:
217 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 252 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
218 /// Inline memory engine 253 /// Inline memory engine
219 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 254 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
220
221 void ProcessBindMethod(const MethodCall& method_call);
222 void ProcessSemaphoreTriggerMethod();
223 void ProcessSemaphoreRelease();
224 void ProcessSemaphoreAcquire();
225
226 // Calls a GPU puller method.
227 void CallPullerMethod(const MethodCall& method_call);
228 // Calls a GPU engine method.
229 void CallEngineMethod(const MethodCall& method_call);
230 // Determines where the method should be executed.
231 bool ExecuteMethodOnEngine(const MethodCall& method_call);
232}; 255};
233 256
234#define ASSERT_REG_POSITION(field_name, position) \ 257#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..ad0a747e3
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..e6a807aba
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(VAddr addr, u64 size) override;
30 void InvalidateRegion(VAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..4c00b96c7
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(VAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..7d5a241ff
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(VAddr addr, u64 size) override;
25 void InvalidateRegion(VAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..c5bdd2a17
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,152 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h"
8#include "core/settings.h"
9#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h"
12#include "video_core/renderer_base.h"
13
14namespace VideoCommon::GPUThread {
15
16/// Executes a single GPU thread command
17static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
18 Tegra::DmaPusher& dma_pusher) {
19 if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
20 dma_pusher.Push(std::move(submit_list->entries));
21 dma_pusher.DispatchCalls();
22 } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
23 renderer.SwapBuffers(data->framebuffer);
24 } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
25 renderer.Rasterizer().FlushRegion(data->addr, data->size);
26 } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
27 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
28 } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
29 renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
30 } else {
31 UNREACHABLE();
32 }
33}
34
35/// Runs the GPU thread
36static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
37 SynchState& state) {
38
39 MicroProfileOnThreadCreate("GpuThread");
40
41 auto WaitForWakeup = [&]() {
42 std::unique_lock<std::mutex> lock{state.signal_mutex};
43 state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
44 };
45
46 // Wait for first GPU command before acquiring the window context
47 WaitForWakeup();
48
49 // If emulation was stopped during disk shader loading, abort before trying to acquire context
50 if (!state.is_running) {
51 return;
52 }
53
54 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
55
56 while (state.is_running) {
57 if (!state.is_running) {
58 return;
59 }
60
61 {
62 // Thread has been woken up, so make the previous write queue the next read queue
63 std::lock_guard<std::mutex> lock{state.signal_mutex};
64 std::swap(state.push_queue, state.pop_queue);
65 }
66
67 // Execute all of the GPU commands
68 while (!state.pop_queue->empty()) {
69 ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
70 state.pop_queue->pop();
71 }
72
73 state.UpdateIdleState();
74
75 // Signal that the GPU thread has finished processing commands
76 if (state.is_idle) {
77 state.idle_condition.notify_one();
78 }
79
80 // Wait for CPU thread to send more GPU commands
81 WaitForWakeup();
82 }
83}
84
85ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
86 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
87 std::ref(dma_pusher), std::ref(state)},
88 thread_id{thread.get_id()} {}
89
90ThreadManager::~ThreadManager() {
91 {
92 // Notify GPU thread that a shutdown is pending
93 std::lock_guard<std::mutex> lock{state.signal_mutex};
94 state.is_running = false;
95 }
96
97 state.signal_condition.notify_one();
98 thread.join();
99}
100
101void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
102 if (entries.empty()) {
103 return;
104 }
105
106 PushCommand(SubmitListCommand(std::move(entries)), false, false);
107}
108
109void ThreadManager::SwapBuffers(
110 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
111 PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
112}
113
114void ThreadManager::FlushRegion(VAddr addr, u64 size) {
115 // Block the CPU when using accurate emulation
116 PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
117}
118
119void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
120 PushCommand(InvalidateRegionCommand(addr, size), true, true);
121}
122
123void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
124 InvalidateRegion(addr, size);
125}
126
127void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
128 {
129 std::lock_guard<std::mutex> lock{state.signal_mutex};
130
131 if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
132 // Execute the command synchronously on the current thread
133 ExecuteCommand(&command_data, renderer, dma_pusher);
134 return;
135 }
136
137 // Push the command to the GPU thread
138 state.UpdateIdleState();
139 state.push_queue->emplace(command_data);
140 }
141
142 // Signal the GPU thread that commands are pending
143 state.signal_condition.notify_one();
144
145 if (wait_for_idle) {
146 // Wait for the GPU to be idle (all commands to be executed)
147 std::unique_lock<std::mutex> lock{state.idle_mutex};
148 state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
149 }
150}
151
152} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..edb148b14
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,133 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <thread>
14#include <variant>
15
16namespace Tegra {
17struct FramebufferConfig;
18class DmaPusher;
19} // namespace Tegra
20
21namespace VideoCore {
22class RendererBase;
23} // namespace VideoCore
24
25namespace VideoCommon::GPUThread {
26
27/// Command to signal to the GPU thread that a command list is ready for processing
28struct SubmitListCommand final {
29 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
30
31 Tegra::CommandList entries;
32};
33
34/// Command to signal to the GPU thread that a swap buffers is pending
35struct SwapBuffersCommand final {
36 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
37 : framebuffer{std::move(framebuffer)} {}
38
39 std::optional<const Tegra::FramebufferConfig> framebuffer;
40};
41
42/// Command to signal to the GPU thread to flush a region
43struct FlushRegionCommand final {
44 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
45
46 const VAddr addr;
47 const u64 size;
48};
49
50/// Command to signal to the GPU thread to invalidate a region
51struct InvalidateRegionCommand final {
52 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
53
54 const VAddr addr;
55 const u64 size;
56};
57
58/// Command to signal to the GPU thread to flush and invalidate a region
59struct FlushAndInvalidateRegionCommand final {
60 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
61 : addr{addr}, size{size} {}
62
63 const VAddr addr;
64 const u64 size;
65};
66
67using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
68 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
69
70/// Struct used to synchronize the GPU thread
71struct SynchState final {
72 std::atomic<bool> is_running{true};
73 std::atomic<bool> is_idle{true};
74 std::condition_variable signal_condition;
75 std::mutex signal_mutex;
76 std::condition_variable idle_condition;
77 std::mutex idle_mutex;
78
79 // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
80 // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
81 // empty. This allows for efficient thread-safe access, as it does not require any copies.
82
83 using CommandQueue = std::queue<CommandData>;
84 std::array<CommandQueue, 2> command_queues;
85 CommandQueue* push_queue{&command_queues[0]};
86 CommandQueue* pop_queue{&command_queues[1]};
87
88 void UpdateIdleState() {
89 std::lock_guard<std::mutex> lock{idle_mutex};
90 is_idle = command_queues[0].empty() && command_queues[1].empty();
91 }
92};
93
94/// Class used to manage the GPU thread
95class ThreadManager final {
96public:
97 explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
98 ~ThreadManager();
99
100 /// Push GPU command entries to be processed
101 void SubmitList(Tegra::CommandList&& entries);
102
103 /// Swap buffers (render frame)
104 void SwapBuffers(
105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
106
107 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
108 void FlushRegion(VAddr addr, u64 size);
109
110 /// Notify rasterizer that any caches of the specified region should be invalidated
111 void InvalidateRegion(VAddr addr, u64 size);
112
113 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
114 void FlushAndInvalidateRegion(VAddr addr, u64 size);
115
116private:
117 /// Pushes a command to be executed by the GPU thread
118 void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
119
120 /// Returns true if this is called by the GPU thread
121 bool IsGpuThread() const {
122 return std::this_thread::get_id() == thread_id;
123 }
124
125private:
126 SynchState state;
127 VideoCore::RendererBase& renderer;
128 Tegra::DmaPusher& dma_pusher;
129 std::thread thread;
130 std::thread::id thread_id;
131};
132
133} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..a7bcf26fb 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
129 return ++modified_ticks; 129 return ++modified_ticks;
130 } 130 }
131 131
132 /// Flushes the specified object, updating appropriate cache state as needed
133 void FlushObject(const T& object) {
134 if (!object->IsDirty()) {
135 return;
136 }
137 object->Flush();
138 object->MarkAsModified(false, *this);
139 }
140
132private: 141private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 142 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 143 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
154 return objects; 163 return objects;
155 } 164 }
156 165
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 166 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 167 using ObjectCache = std::unordered_map<VAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b2a223705..6a1dc9cf6 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,8 +47,8 @@ public:
47 /// Attempt to use a faster method to perform a surface copy 47 /// Attempt to use a faster method to perform a surface copy
48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
49 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 49 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
50 const MathUtil::Rectangle<u32>& src_rect, 50 const Common::Rectangle<u32>& src_rect,
51 const MathUtil::Rectangle<u32>& dst_rect) { 51 const Common::Rectangle<u32>& dst_rect) {
52 return false; 52 return false;
53 } 53 }
54 54
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/emu_window.h" 6#include "core/frontend/emu_window.h"
6#include "core/settings.h" 7#include "core/settings.h"
7#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 12d876120..301562ff6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
102 102
103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, 103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
104 ScreenInfo& info) 104 ScreenInfo& info)
105 : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, 105 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
106 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 106 screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
107 // Create sampler objects 107 // Create sampler objects
108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
109 texture_samplers[i].Create(); 109 texture_samplers[i].Create();
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
118 118
119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
120 120
121 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 121 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
122 CheckExtensions(); 122 CheckExtensions();
123} 123}
124 124
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
177 continue; 177 continue;
178 178
179 const auto& buffer = regs.vertex_array[attrib.buffer]; 179 const auto& buffer = regs.vertex_array[attrib.buffer];
180 LOG_TRACE(HW_GPU, 180 LOG_TRACE(Render_OpenGL,
181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
183 attrib.offset.Value(), attrib.IsNormalized()); 183 attrib.offset.Value(), attrib.IsNormalized());
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
200 } 200 }
201 201
202 // Rebinding the VAO invalidates the vertex buffer bindings. 202 // Rebinding the VAO invalidates the vertex buffer bindings.
203 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 203 gpu.dirty_flags.vertex_array.set();
204 204
205 state.draw.vertex_array = vao_entry.handle; 205 state.draw.vertex_array = vao_entry.handle;
206 return vao_entry.handle; 206 return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
211 const auto& regs = gpu.regs; 211 const auto& regs = gpu.regs;
212 212
213 if (!gpu.dirty_flags.vertex_array) 213 if (gpu.dirty_flags.vertex_array.none())
214 return; 214 return;
215 215
216 MICROPROFILE_SCOPE(OpenGL_VB); 216 MICROPROFILE_SCOPE(OpenGL_VB);
217 217
218 // Upload all guest vertex arrays sequentially to our buffer 218 // Upload all guest vertex arrays sequentially to our buffer
219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
220 if (~gpu.dirty_flags.vertex_array & (1u << index)) 220 if (!gpu.dirty_flags.vertex_array[index])
221 continue; 221 continue;
222 222
223 const auto& vertex_array = regs.vertex_array[index]; 223 const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
244 } 244 }
245 } 245 }
246 246
247 gpu.dirty_flags.vertex_array = 0; 247 gpu.dirty_flags.vertex_array.reset();
248} 248}
249 249
250DrawParameters RasterizerOpenGL::SetupDraw() { 250DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 shader_program_manager->UseProgrammableFragmentShader(program_handle); 343 shader_program_manager->UseProgrammableFragmentShader(program_handle);
344 break; 344 break;
345 default: 345 default:
346 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 346 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
347 shader_config.enable.Value(), shader_config.offset); 347 shader_config.enable.Value(), shader_config.offset);
348 UNREACHABLE();
349 } 348 }
350 349
351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 350 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -488,13 +487,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 487 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
489 std::optional<std::size_t> single_color_target) { 488 std::optional<std::size_t> single_color_target) {
490 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 489 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
491 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 490 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
492 const auto& regs = gpu.regs; 491 const auto& regs = gpu.regs;
493 492
494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 493 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
495 single_color_target}; 494 single_color_target};
496 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 495 if (fb_config_state == current_framebuffer_config_state &&
497 !gpu.dirty_flags.zeta_buffer) { 496 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 497 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
499 // single color targets). This is done because the guest registers may not change but the 498 // single color targets). This is done because the guest registers may not change but the
500 // host framebuffer may contain different attachments 499 // host framebuffer may contain different attachments
@@ -721,10 +720,10 @@ void RasterizerOpenGL::DrawArrays() {
721 // Add space for at least 18 constant buffers 720 // Add space for at least 18 constant buffers
722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 721 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
723 722
724 bool invalidate = buffer_cache.Map(buffer_size); 723 const bool invalidate = buffer_cache.Map(buffer_size);
725 if (invalidate) { 724 if (invalidate) {
726 // As all cached buffers are invalidated, we need to recheck their state. 725 // As all cached buffers are invalidated, we need to recheck their state.
727 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 726 gpu.dirty_flags.vertex_array.set();
728 } 727 }
729 728
730 const GLuint vao = SetupVertexFormat(); 729 const GLuint vao = SetupVertexFormat();
@@ -738,30 +737,18 @@ void RasterizerOpenGL::DrawArrays() {
738 shader_program_manager->ApplyTo(state); 737 shader_program_manager->ApplyTo(state);
739 state.Apply(); 738 state.Apply();
740 739
741 // Execute draw call 740 res_cache.SignalPreDrawCall();
742 params.DispatchDraw(); 741 params.DispatchDraw();
743 742 res_cache.SignalPostDrawCall();
744 // Disable scissor test
745 state.viewports[0].scissor.enabled = false;
746 743
747 accelerate_draw = AccelDraw::Disabled; 744 accelerate_draw = AccelDraw::Disabled;
748
749 // Unbind textures for potential future use as framebuffer attachments
750 for (auto& texture_unit : state.texture_units) {
751 texture_unit.Unbind();
752 }
753 state.Apply();
754} 745}
755 746
756void RasterizerOpenGL::FlushAll() {} 747void RasterizerOpenGL::FlushAll() {}
757 748
758void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 749void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
759 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 750 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760 751 res_cache.FlushRegion(addr, size);
761 if (Settings::values.use_accurate_gpu_emulation) {
762 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
763 res_cache.FlushRegion(addr, size);
764 }
765} 752}
766 753
767void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 754void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -779,8 +766,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
779 766
780bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 767bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
781 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 768 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
782 const MathUtil::Rectangle<u32>& src_rect, 769 const Common::Rectangle<u32>& src_rect,
783 const MathUtil::Rectangle<u32>& dst_rect) { 770 const Common::Rectangle<u32>& dst_rect) {
784 MICROPROFILE_SCOPE(OpenGL_Blits); 771 MICROPROFILE_SCOPE(OpenGL_Blits);
785 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); 772 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
786 return true; 773 return true;
@@ -805,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
805 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 792 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
806 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 793 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
807 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 794 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
808 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 795
796 if (params.pixel_format != pixel_format) {
797 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
798 }
809 799
810 screen_info.display_texture = surface->Texture().handle; 800 screen_info.display_texture = surface->Texture().handle;
811 801
@@ -951,8 +941,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
951 size = buffer.size; 941 size = buffer.size;
952 942
953 if (size > MaxConstbufferSize) { 943 if (size > MaxConstbufferSize) {
954 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 944 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
955 MaxConstbufferSize); 945 MaxConstbufferSize);
956 size = MaxConstbufferSize; 946 size = MaxConstbufferSize;
957 } 947 }
958 } else { 948 } else {
@@ -1012,10 +1002,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1012 1002
1013 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 1003 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1014 1004
1015 Surface surface = res_cache.GetTextureSurface(texture, entry); 1005 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1016 if (surface != nullptr) {
1017 state.texture_units[current_bindpoint].texture = 1006 state.texture_units[current_bindpoint].texture =
1018 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 1007 surface->Texture(entry.IsArray()).handle;
1019 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 1008 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1020 texture.tic.w_source); 1009 texture.tic.w_source);
1021 } else { 1010 } else {
@@ -1034,7 +1023,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1034 for (std::size_t i = 0; i < viewport_count; i++) { 1023 for (std::size_t i = 0; i < viewport_count; i++) {
1035 auto& viewport = current_state.viewports[i]; 1024 auto& viewport = current_state.viewports[i];
1036 const auto& src = regs.viewports[i]; 1025 const auto& src = regs.viewports[i];
1037 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 1026 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
1038 viewport.x = viewport_rect.left; 1027 viewport.x = viewport_rect.left;
1039 viewport.y = viewport_rect.bottom; 1028 viewport.y = viewport_rect.bottom;
1040 viewport.width = viewport_rect.GetWidth(); 1029 viewport.width = viewport_rect.GetWidth();
@@ -1247,11 +1236,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1247 1236
1248void RasterizerOpenGL::SyncTransformFeedback() { 1237void RasterizerOpenGL::SyncTransformFeedback() {
1249 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1238 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1250 1239 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1251 if (regs.tfb_enabled != 0) {
1252 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1253 UNREACHABLE();
1254 }
1255} 1240}
1256 1241
1257void RasterizerOpenGL::SyncPointState() { 1242void RasterizerOpenGL::SyncPointState() {
@@ -1271,12 +1256,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1271 1256
1272void RasterizerOpenGL::CheckAlphaTests() { 1257void RasterizerOpenGL::CheckAlphaTests() {
1273 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1258 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1274 1259 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1275 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1260 "Alpha Testing is enabled with more than one rendertarget");
1276 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1277 "this behavior is undefined.");
1278 UNREACHABLE();
1279 }
1280} 1261}
1281 1262
1282} // namespace OpenGL 1263} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 258d62259..2f0524f85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,8 +62,8 @@ public:
62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
64 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 64 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
65 const MathUtil::Rectangle<u32>& src_rect, 65 const Common::Rectangle<u32>& src_rect,
66 const MathUtil::Rectangle<u32>& dst_rect) override; 66 const Common::Rectangle<u32>& dst_rect) override;
67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
68 u32 pixel_stride) override; 68 u32 pixel_stride) override;
69 bool AccelerateDrawBatch(bool is_indexed) override; 69 bool AccelerateDrawBatch(bool is_indexed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index e6d47ce41..e9eb6e921 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -20,7 +21,7 @@
20#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
21#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25 26
26namespace OpenGL { 27namespace OpenGL {
@@ -399,7 +400,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
399 return format; 400 return format;
400} 401}
401 402
402MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 403/// Returns the discrepant array target
404constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
405 switch (target) {
406 case SurfaceTarget::Texture1D:
407 return GL_TEXTURE_1D_ARRAY;
408 case SurfaceTarget::Texture2D:
409 return GL_TEXTURE_2D_ARRAY;
410 case SurfaceTarget::Texture3D:
411 return GL_NONE;
412 case SurfaceTarget::Texture1DArray:
413 return GL_TEXTURE_1D;
414 case SurfaceTarget::Texture2DArray:
415 return GL_TEXTURE_2D;
416 case SurfaceTarget::TextureCubemap:
417 return GL_TEXTURE_CUBE_MAP_ARRAY;
418 case SurfaceTarget::TextureCubeArray:
419 return GL_TEXTURE_CUBE_MAP;
420 }
421 return GL_NONE;
422}
423
424Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
403 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 425 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
404 if (IsPixelFormatASTC(pixel_format)) { 426 if (IsPixelFormatASTC(pixel_format)) {
405 // ASTC formats must stop at the ATSC block size boundary 427 // ASTC formats must stop at the ATSC block size boundary
@@ -549,6 +571,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
549 // alternatives. This signals a bug on those functions. 571 // alternatives. This signals a bug on those functions.
550 const auto width = static_cast<GLsizei>(params.MipWidth(0)); 572 const auto width = static_cast<GLsizei>(params.MipWidth(0));
551 const auto height = static_cast<GLsizei>(params.MipHeight(0)); 573 const auto height = static_cast<GLsizei>(params.MipHeight(0));
574 memory_size = params.MemorySize();
575 reinterpreted = false;
552 576
553 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 577 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
554 gl_internal_format = format_tuple.internal_format; 578 gl_internal_format = format_tuple.internal_format;
@@ -594,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
594 } 618 }
595} 619}
596 620
597static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
598 union S8Z24 {
599 BitField<0, 24, u32> z24;
600 BitField<24, 8, u32> s8;
601 };
602 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
603
604 union Z24S8 {
605 BitField<0, 8, u32> s8;
606 BitField<8, 24, u32> z24;
607 };
608 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
609
610 S8Z24 s8z24_pixel{};
611 Z24S8 z24s8_pixel{};
612 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
613 for (std::size_t y = 0; y < height; ++y) {
614 for (std::size_t x = 0; x < width; ++x) {
615 const std::size_t offset{bpp * (y * width + x)};
616 if (reverse) {
617 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
618 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
619 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
620 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
621 } else {
622 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
623 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
624 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
625 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
626 }
627 }
628 }
629}
630
631/**
632 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
633 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
634 * typical desktop GPUs.
635 */
636static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
637 u32 width, u32 height, u32 depth) {
638 switch (pixel_format) {
639 case PixelFormat::ASTC_2D_4X4:
640 case PixelFormat::ASTC_2D_8X8:
641 case PixelFormat::ASTC_2D_8X5:
642 case PixelFormat::ASTC_2D_5X4:
643 case PixelFormat::ASTC_2D_5X5:
644 case PixelFormat::ASTC_2D_4X4_SRGB:
645 case PixelFormat::ASTC_2D_8X8_SRGB:
646 case PixelFormat::ASTC_2D_8X5_SRGB:
647 case PixelFormat::ASTC_2D_5X4_SRGB:
648 case PixelFormat::ASTC_2D_5X5_SRGB:
649 case PixelFormat::ASTC_2D_10X8:
650 case PixelFormat::ASTC_2D_10X8_SRGB: {
651 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
652 u32 block_width{};
653 u32 block_height{};
654 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
655 data =
656 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
657 break;
658 }
659 case PixelFormat::S8Z24:
660 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
661 ConvertS8Z24ToZ24S8(data, width, height, false);
662 break;
663 }
664}
665
666/**
667 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
668 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
669 * with typical desktop GPUs.
670 */
671static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
672 u32 width, u32 height) {
673 switch (pixel_format) {
674 case PixelFormat::ASTC_2D_4X4:
675 case PixelFormat::ASTC_2D_8X8:
676 case PixelFormat::ASTC_2D_4X4_SRGB:
677 case PixelFormat::ASTC_2D_8X8_SRGB:
678 case PixelFormat::ASTC_2D_5X5:
679 case PixelFormat::ASTC_2D_5X5_SRGB:
680 case PixelFormat::ASTC_2D_10X8:
681 case PixelFormat::ASTC_2D_10X8_SRGB: {
682 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
683 static_cast<u32>(pixel_format));
684 UNREACHABLE();
685 break;
686 }
687 case PixelFormat::S8Z24:
688 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
689 ConvertS8Z24ToZ24S8(data, width, height, true);
690 break;
691 }
692}
693
694MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 621MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
695void CachedSurface::LoadGLBuffer() { 622void CachedSurface::LoadGLBuffer() {
696 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); 623 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -719,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
719 } 646 }
720 } 647 }
721 for (u32 i = 0; i < params.max_mip_level; i++) { 648 for (u32 i = 0; i < params.max_mip_level; i++) {
722 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 649 const u32 width = params.MipWidth(i);
723 params.MipHeight(i), params.MipDepth(i)); 650 const u32 height = params.MipHeight(i);
651 const u32 depth = params.MipDepth(i);
652 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
653 // Reserve size for RGBA8 conversion
654 constexpr std::size_t rgba_bpp = 4;
655 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
656 }
657 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
658 height, depth, true, true);
724 } 659 }
725} 660}
726 661
@@ -743,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
743 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 678 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
744 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 679 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
745 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 680 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
746 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
747 params.height); 682 params.height, params.depth, true, true);
748 const u8* const texture_src_data = Memory::GetPointer(params.addr); 683 const u8* const texture_src_data = Memory::GetPointer(params.addr);
749 ASSERT(texture_src_data); 684 ASSERT(texture_src_data);
750 if (params.is_tiled) { 685 if (params.is_tiled) {
@@ -881,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
881 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 816 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
882} 817}
883 818
884void CachedSurface::EnsureTextureView() { 819void CachedSurface::EnsureTextureDiscrepantView() {
885 if (texture_view.handle != 0) 820 if (discrepant_view.handle != 0)
886 return; 821 return;
887 822
888 const GLenum target{TargetLayer()}; 823 const GLenum target{GetArrayDiscrepantTarget(params.target)};
824 ASSERT(target != GL_NONE);
825
889 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 826 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
890 constexpr GLuint min_layer = 0; 827 constexpr GLuint min_layer = 0;
891 constexpr GLuint min_level = 0; 828 constexpr GLuint min_level = 0;
892 829
893 glGenTextures(1, &texture_view.handle); 830 glGenTextures(1, &discrepant_view.handle);
894 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 831 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
895 params.max_mip_level, min_layer, num_layers); 832 params.max_mip_level, min_layer, num_layers);
896 ApplyTextureDefaults(texture_view.handle, params.max_mip_level); 833 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
897 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, 834 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
898 reinterpret_cast<const GLint*>(swizzle.data())); 835 reinterpret_cast<const GLint*>(swizzle.data()));
899} 836}
900 837
@@ -920,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
920 swizzle = {new_x, new_y, new_z, new_w}; 857 swizzle = {new_x, new_y, new_z, new_w};
921 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); 858 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
922 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 859 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
923 if (texture_view.handle != 0) { 860 if (discrepant_view.handle != 0) {
924 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 861 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
925 } 862 }
926} 863}
927 864
@@ -962,30 +899,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
962 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 899 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
963 const auto& regs{gpu.regs}; 900 const auto& regs{gpu.regs};
964 901
965 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 902 if (!gpu.dirty_flags.color_buffer[index]) {
966 return last_color_buffers[index]; 903 return current_color_buffers[index];
967 } 904 }
968 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 905 gpu.dirty_flags.color_buffer.reset(index);
969 906
970 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 907 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
971 908
972 if (index >= regs.rt_control.count) { 909 if (index >= regs.rt_control.count) {
973 return last_color_buffers[index] = {}; 910 return current_color_buffers[index] = {};
974 } 911 }
975 912
976 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 913 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
977 return last_color_buffers[index] = {}; 914 return current_color_buffers[index] = {};
978 } 915 }
979 916
980 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 917 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
981 918
982 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 919 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
983} 920}
984 921
985void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 922void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
986 surface->LoadGLBuffer(); 923 surface->LoadGLBuffer();
987 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 924 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
988 surface->MarkAsModified(false, *this); 925 surface->MarkAsModified(false, *this);
926 surface->MarkForReload(false);
989} 927}
990 928
991Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 929Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -997,18 +935,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
997 Surface surface{TryGet(params.addr)}; 935 Surface surface{TryGet(params.addr)};
998 if (surface) { 936 if (surface) {
999 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 937 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
1000 // Use the cached surface as-is 938 // Use the cached surface as-is unless it's not synced with memory
939 if (surface->MustReload())
940 LoadSurface(surface);
1001 return surface; 941 return surface;
1002 } else if (preserve_contents) { 942 } else if (preserve_contents) {
1003 // If surface parameters changed and we care about keeping the previous data, recreate 943 // If surface parameters changed and we care about keeping the previous data, recreate
1004 // the surface from the old one 944 // the surface from the old one
1005 Surface new_surface{RecreateSurface(surface, params)}; 945 Surface new_surface{RecreateSurface(surface, params)};
1006 Unregister(surface); 946 UnregisterSurface(surface);
1007 Register(new_surface); 947 Register(new_surface);
948 if (new_surface->IsUploaded()) {
949 RegisterReinterpretSurface(new_surface);
950 }
1008 return new_surface; 951 return new_surface;
1009 } else { 952 } else {
1010 // Delete the old surface before creating a new one to prevent collisions. 953 // Delete the old surface before creating a new one to prevent collisions.
1011 Unregister(surface); 954 UnregisterSurface(surface);
1012 } 955 }
1013 } 956 }
1014 957
@@ -1062,8 +1005,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1062} 1005}
1063 1006
1064static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, 1007static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1065 const MathUtil::Rectangle<u32>& src_rect, 1008 const Common::Rectangle<u32>& src_rect,
1066 const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, 1009 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1067 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, 1010 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1068 std::size_t cubemap_face = 0) { 1011 std::size_t cubemap_face = 0) {
1069 1012
@@ -1193,7 +1136,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1193void RasterizerCacheOpenGL::FermiCopySurface( 1136void RasterizerCacheOpenGL::FermiCopySurface(
1194 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1137 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1195 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 1138 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1196 const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { 1139 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1197 1140
1198 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 1141 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1199 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 1142 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1290,4 +1233,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1290 return {}; 1233 return {};
1291} 1234}
1292 1235
1236static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1237 u32 height) {
1238 for (u32 i = 0; i < params.max_mip_level; i++) {
1239 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1240 return {i};
1241 }
1242 }
1243 return {};
1244}
1245
1246static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
1247 const std::size_t size = params.LayerMemorySize();
1248 VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
1249 for (u32 i = 0; i < params.depth; i++) {
1250 if (start == addr) {
1251 return {i};
1252 }
1253 start += size;
1254 }
1255 return {};
1256}
1257
1258static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1259 const Surface blitted_surface) {
1260 const auto& dst_params = blitted_surface->GetSurfaceParams();
1261 const auto& src_params = render_surface->GetSurfaceParams();
1262 const std::size_t src_memory_size = src_params.size_in_bytes;
1263 const std::optional<u32> level =
1264 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1265 if (level.has_value()) {
1266 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1267 src_params.height == dst_params.MipHeight(*level) &&
1268 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1269 const std::optional<u32> slot =
1270 TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
1271 if (slot.has_value()) {
1272 glCopyImageSubData(render_surface->Texture().handle,
1273 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1274 blitted_surface->Texture().handle,
1275 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1276 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1277 blitted_surface->MarkAsModified(true, cache);
1278 return true;
1279 }
1280 }
1281 }
1282 return false;
1283}
1284
1285static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1286 const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
1287 const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
1288 if (bound2 > bound1)
1289 return true;
1290 const auto& dst_params = blitted_surface->GetSurfaceParams();
1291 const auto& src_params = render_surface->GetSurfaceParams();
1292 return (dst_params.component_type != src_params.component_type);
1293}
1294
1295static bool IsReinterpretInvalidSecond(const Surface render_surface,
1296 const Surface blitted_surface) {
1297 const auto& dst_params = blitted_surface->GetSurfaceParams();
1298 const auto& src_params = render_surface->GetSurfaceParams();
1299 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1300}
1301
1302bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1303 Surface intersect) {
1304 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1305 UnregisterSurface(intersect);
1306 return false;
1307 }
1308 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1309 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1310 UnregisterSurface(intersect);
1311 return false;
1312 }
1313 FlushObject(intersect);
1314 FlushObject(triggering_surface);
1315 intersect->MarkForReload(true);
1316 }
1317 return true;
1318}
1319
1320void RasterizerCacheOpenGL::SignalPreDrawCall() {
1321 if (texception && GLAD_GL_ARB_texture_barrier) {
1322 glTextureBarrier();
1323 }
1324 texception = false;
1325}
1326
1327void RasterizerCacheOpenGL::SignalPostDrawCall() {
1328 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1329 if (current_color_buffers[i] != nullptr) {
1330 Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
1331 if (intersect != nullptr) {
1332 PartialReinterpretSurface(current_color_buffers[i], intersect);
1333 texception = true;
1334 }
1335 }
1336 }
1337}
1338
1293} // namespace OpenGL 1339} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 89d733c50..9cf6f50be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,12 +28,13 @@ namespace OpenGL {
28 28
29class CachedSurface; 29class CachedSurface;
30using Surface = std::shared_ptr<CachedSurface>; 30using Surface = std::shared_ptr<CachedSurface>;
31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
32 32
33using SurfaceTarget = VideoCore::Surface::SurfaceTarget; 33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
34using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
35using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
36using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
37 38
38struct SurfaceParams { 39struct SurfaceParams {
39 enum class SurfaceClass { 40 enum class SurfaceClass {
@@ -71,7 +72,7 @@ struct SurfaceParams {
71 } 72 }
72 73
73 /// Returns the rectangle corresponding to this surface 74 /// Returns the rectangle corresponding to this surface
74 MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; 75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
75 76
76 /// Returns the total size of this surface in bytes, adjusted for compression 77 /// Returns the total size of this surface in bytes, adjusted for compression
77 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { 78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
140 return offset; 141 return offset;
141 } 142 }
142 143
144 std::size_t GetMipmapSingleSize(u32 mip_level) const {
145 return InnerMipmapMemorySize(mip_level, false, is_layered);
146 }
147
143 u32 MipWidth(u32 mip_level) const { 148 u32 MipWidth(u32 mip_level) const {
144 return std::max(1U, width >> mip_level); 149 return std::max(1U, width >> mip_level);
145 } 150 }
146 151
152 u32 MipWidthGobAligned(u32 mip_level) const {
153 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
154 }
155
147 u32 MipHeight(u32 mip_level) const { 156 u32 MipHeight(u32 mip_level) const {
148 return std::max(1U, height >> mip_level); 157 return std::max(1U, height >> mip_level);
149 } 158 }
@@ -346,6 +355,10 @@ public:
346 return cached_size_in_bytes; 355 return cached_size_in_bytes;
347 } 356 }
348 357
358 std::size_t GetMemorySize() const {
359 return memory_size;
360 }
361
349 void Flush() override { 362 void Flush() override {
350 FlushGLBuffer(); 363 FlushGLBuffer();
351 } 364 }
@@ -354,31 +367,19 @@ public:
354 return texture; 367 return texture;
355 } 368 }
356 369
357 const OGLTexture& TextureLayer() { 370 const OGLTexture& Texture(bool as_array) {
358 if (params.is_array) { 371 if (params.is_array == as_array) {
359 return Texture(); 372 return texture;
373 } else {
374 EnsureTextureDiscrepantView();
375 return discrepant_view;
360 } 376 }
361 EnsureTextureView();
362 return texture_view;
363 } 377 }
364 378
365 GLenum Target() const { 379 GLenum Target() const {
366 return gl_target; 380 return gl_target;
367 } 381 }
368 382
369 GLenum TargetLayer() const {
370 using VideoCore::Surface::SurfaceTarget;
371 switch (params.target) {
372 case SurfaceTarget::Texture1D:
373 return GL_TEXTURE_1D_ARRAY;
374 case SurfaceTarget::Texture2D:
375 return GL_TEXTURE_2D_ARRAY;
376 case SurfaceTarget::TextureCubemap:
377 return GL_TEXTURE_CUBE_MAP_ARRAY;
378 }
379 return Target();
380 }
381
382 const SurfaceParams& GetSurfaceParams() const { 383 const SurfaceParams& GetSurfaceParams() const {
383 return params; 384 return params;
384 } 385 }
@@ -395,19 +396,42 @@ public:
395 Tegra::Texture::SwizzleSource swizzle_z, 396 Tegra::Texture::SwizzleSource swizzle_z,
396 Tegra::Texture::SwizzleSource swizzle_w); 397 Tegra::Texture::SwizzleSource swizzle_w);
397 398
399 void MarkReinterpreted() {
400 reinterpreted = true;
401 }
402
403 bool IsReinterpreted() const {
404 return reinterpreted;
405 }
406
407 void MarkForReload(bool reload) {
408 must_reload = reload;
409 }
410
411 bool MustReload() const {
412 return must_reload;
413 }
414
415 bool IsUploaded() const {
416 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
417 }
418
398private: 419private:
399 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 420 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
400 421
401 void EnsureTextureView(); 422 void EnsureTextureDiscrepantView();
402 423
403 OGLTexture texture; 424 OGLTexture texture;
404 OGLTexture texture_view; 425 OGLTexture discrepant_view;
405 std::vector<std::vector<u8>> gl_buffer; 426 std::vector<std::vector<u8>> gl_buffer;
406 SurfaceParams params{}; 427 SurfaceParams params{};
407 GLenum gl_target{}; 428 GLenum gl_target{};
408 GLenum gl_internal_format{}; 429 GLenum gl_internal_format{};
409 std::size_t cached_size_in_bytes{}; 430 std::size_t cached_size_in_bytes{};
410 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; 431 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
432 std::size_t memory_size;
433 bool reinterpreted = false;
434 bool must_reload = false;
411}; 435};
412 436
413class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 437class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -430,8 +454,11 @@ public:
430 /// Copies the contents of one surface to another 454 /// Copies the contents of one surface to another
431 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 455 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
432 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 456 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
433 const MathUtil::Rectangle<u32>& src_rect, 457 const Common::Rectangle<u32>& src_rect,
434 const MathUtil::Rectangle<u32>& dst_rect); 458 const Common::Rectangle<u32>& dst_rect);
459
460 void SignalPreDrawCall();
461 void SignalPostDrawCall();
435 462
436private: 463private:
437 void LoadSurface(const Surface& surface); 464 void LoadSurface(const Surface& surface);
@@ -449,6 +476,10 @@ private:
449 /// Tries to get a reserved surface for the specified parameters 476 /// Tries to get a reserved surface for the specified parameters
450 Surface TryGetReservedSurface(const SurfaceParams& params); 477 Surface TryGetReservedSurface(const SurfaceParams& params);
451 478
479 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
480 // returns true if the reinterpret was successful, false in case it was not.
481 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
482
452 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 483 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
453 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 484 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
454 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 485 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +496,50 @@ private:
465 OGLFramebuffer read_framebuffer; 496 OGLFramebuffer read_framebuffer;
466 OGLFramebuffer draw_framebuffer; 497 OGLFramebuffer draw_framebuffer;
467 498
499 bool texception = false;
500
468 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 501 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
469 /// using the new format. 502 /// using the new format.
470 OGLBuffer copy_pbo; 503 OGLBuffer copy_pbo;
471 504
472 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 505 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
506 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
473 Surface last_depth_buffer; 507 Surface last_depth_buffer;
508
509 using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
510 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
511
512 static auto GetReinterpretInterval(const Surface& object) {
513 return SurfaceInterval::right_open(object->GetAddr() + 1,
514 object->GetAddr() + object->GetMemorySize() - 1);
515 }
516
517 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
518 SurfaceIntervalCache reinterpreted_surfaces;
519
520 void RegisterReinterpretSurface(Surface reinterpret_surface) {
521 auto interval = GetReinterpretInterval(reinterpret_surface);
522 reinterpreted_surfaces.insert({interval, reinterpret_surface});
523 reinterpret_surface->MarkReinterpreted();
524 }
525
526 Surface CollideOnReinterpretedSurface(VAddr addr) const {
527 const SurfaceInterval interval{addr};
528 for (auto& pair :
529 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
530 return pair.second;
531 }
532 return nullptr;
533 }
534
535 /// Unregisters an object from the cache
536 void UnregisterSurface(const Surface& object) {
537 if (object->IsReinterpreted()) {
538 auto interval = GetReinterpretInterval(object);
539 reinterpreted_surfaces.erase(interval);
540 }
541 Unregister(object);
542 }
474}; 543};
475 544
476} // namespace OpenGL 545} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 81882822b..82fc4d44b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,8 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once
6
7#include <cstring> 5#include <cstring>
8#include <fmt/format.h> 6#include <fmt/format.h>
9#include <lz4.h> 7#include <lz4.h>
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 219f08053..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
461 461
462 if (has_delta) { 462 if (has_delta) {
463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
464 textures.data()); 464 textures.data() + first);
465 } 465 }
466} 466}
467 467
@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
482 } 482 }
483 if (has_delta) { 483 if (has_delta) {
484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
485 samplers.data()); 485 samplers.data() + first);
486 } 486 }
487} 487}
488 488
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 272fc2e8e..8b510b6ae 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
245} 245}
246 246
247void RendererOpenGL::AddTelemetryFields() {
248 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
249 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
250 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
251
252 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
253 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
254 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
255
256 auto& telemetry_session = system.TelemetrySession();
257 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
258 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
260}
261
247void RendererOpenGL::CreateRasterizer() { 262void RendererOpenGL::CreateRasterizer() {
248 if (rasterizer) { 263 if (rasterizer) {
249 return; 264 return;
@@ -257,6 +272,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
257 const Tegra::FramebufferConfig& framebuffer) { 272 const Tegra::FramebufferConfig& framebuffer) {
258 texture.width = framebuffer.width; 273 texture.width = framebuffer.width;
259 texture.height = framebuffer.height; 274 texture.height = framebuffer.height;
275 texture.pixel_format = framebuffer.pixel_format;
260 276
261 GLint internal_format; 277 GLint internal_format;
262 switch (framebuffer.pixel_format) { 278 switch (framebuffer.pixel_format) {
@@ -465,17 +481,7 @@ bool RendererOpenGL::Init() {
465 glDebugMessageCallback(DebugHandler, nullptr); 481 glDebugMessageCallback(DebugHandler, nullptr);
466 } 482 }
467 483
468 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 484 AddTelemetryFields();
469 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
470 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
471
472 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
473 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
474 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
475
476 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
477 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
478 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
479 485
480 if (!GLAD_GL_VERSION_4_3) { 486 if (!GLAD_GL_VERSION_4_3) {
481 return false; 487 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 7e13e566b..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
39/// Structure used for storing information about the display target for the Switch screen 39/// Structure used for storing information about the display target for the Switch screen
40struct ScreenInfo { 40struct ScreenInfo {
41 GLuint display_texture; 41 GLuint display_texture;
42 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; 42 const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
43 TextureInfo texture; 43 TextureInfo texture;
44}; 44};
45 45
@@ -60,6 +60,7 @@ public:
60 60
61private: 61private:
62 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
63 void CreateRasterizer(); 64 void CreateRasterizer();
64 65
65 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -102,7 +103,7 @@ private:
102 103
103 /// Used for transforming the framebuffer orientation 104 /// Used for transforming the framebuffer orientation
104 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 105 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
105 MathUtil::Rectangle<int> framebuffer_crop_rect; 106 Common::Rectangle<int> framebuffer_crop_rect;
106}; 107};
107 108
108} // namespace OpenGL 109} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..4a33a6c84
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,116 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <optional>
8#include <tuple>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "core/memory.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
21 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
22 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
23 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
24 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
25 vk::BufferUsageFlagBits::eIndexBuffer |
26 vk::BufferUsageFlagBits::eUniformBuffer;
27 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
28 vk::AccessFlagBits::eUniformRead;
29 stream_buffer =
30 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
31 vk::PipelineStageFlagBits::eAllCommands);
32 buffer_handle = stream_buffer->GetBuffer();
33}
34
35VKBufferCache::~VKBufferCache() = default;
36
37u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
38 bool cache) {
39 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
40 ASSERT(cpu_addr);
41
42 // Cache management is a big overhead, so only cache entries with a given size.
43 // TODO: Figure out which size is the best for given games.
44 cache &= size >= 2048;
45
46 if (cache) {
47 if (auto entry = TryGet(*cpu_addr); entry) {
48 if (entry->size >= size && entry->alignment == alignment) {
49 return entry->offset;
50 }
51 Unregister(entry);
52 }
53 }
54
55 AlignBuffer(alignment);
56 const u64 uploaded_offset = buffer_offset;
57
58 Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
59
60 buffer_ptr += size;
61 buffer_offset += size;
62
63 if (cache) {
64 auto entry = std::make_shared<CachedBufferEntry>();
65 entry->offset = uploaded_offset;
66 entry->size = size;
67 entry->alignment = alignment;
68 entry->addr = *cpu_addr;
69 Register(entry);
70 }
71
72 return uploaded_offset;
73}
74
75u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
76 AlignBuffer(alignment);
77 std::memcpy(buffer_ptr, raw_pointer, size);
78 const u64 uploaded_offset = buffer_offset;
79
80 buffer_ptr += size;
81 buffer_offset += size;
82 return uploaded_offset;
83}
84
85std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
86 AlignBuffer(alignment);
87 u8* const uploaded_ptr = buffer_ptr;
88 const u64 uploaded_offset = buffer_offset;
89
90 buffer_ptr += size;
91 buffer_offset += size;
92 return {uploaded_ptr, uploaded_offset};
93}
94
95void VKBufferCache::Reserve(std::size_t max_size) {
96 bool invalidate;
97 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
98 buffer_offset = buffer_offset_base;
99
100 if (invalidate) {
101 InvalidateAll();
102 }
103}
104
105VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
106 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
107}
108
109void VKBufferCache::AlignBuffer(std::size_t alignment) {
110 // Align the offset, not the mapped pointer
111 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
112 buffer_ptr += offset_aligned - buffer_offset;
113 buffer_offset = offset_aligned;
114}
115
116} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..d8e916f31
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,87 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <tuple>
9
10#include "common/common_types.h"
11#include "video_core/gpu.h"
12#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15
16namespace Tegra {
17class MemoryManager;
18}
19
20namespace Vulkan {
21
22class VKDevice;
23class VKFence;
24class VKMemoryManager;
25class VKStreamBuffer;
26
27struct CachedBufferEntry final : public RasterizerCacheObject {
28 VAddr GetAddr() const override {
29 return addr;
30 }
31
32 std::size_t GetSizeInBytes() const override {
33 return size;
34 }
35
36 // We do not have to flush this cache as things in it are never modified by us.
37 void Flush() override {}
38
39 VAddr addr;
40 std::size_t size;
41 u64 offset;
42 std::size_t alignment;
43};
44
45class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
46public:
47 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
48 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
49 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
50 ~VKBufferCache();
51
52 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
53 /// allocated.
54 u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
55 bool cache = true);
56
57 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
58 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
59
60 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
61 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
62
63 /// Reserves a region of memory to be used in subsequent upload/reserve operations.
64 void Reserve(std::size_t max_size);
65
66 /// Ensures that the set data is sent to the device.
67 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
68
69 /// Returns the buffer cache handle.
70 vk::Buffer GetBuffer() const {
71 return buffer_handle;
72 }
73
74private:
75 void AlignBuffer(std::size_t alignment);
76
77 Tegra::MemoryManager& tegra_memory_manager;
78
79 std::unique_ptr<VKStreamBuffer> stream_buffer;
80 vk::Buffer buffer_handle;
81
82 u8* buffer_ptr = nullptr;
83 u64 buffer_offset = 0;
84 u64 buffer_offset_base = 0;
85};
86
87} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 78a4e5f0e..00242ecbe 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
122 FormatType format_type) const { 122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format); 123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) { 124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", 125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 static_cast<u32>(wanted_format));
127 UNREACHABLE(); 126 UNREACHABLE();
128 return true; 127 return true;
129 } 128 }
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
219 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
220 }; 219 };
221 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); 220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
222 AddFormatQuery(vk::Format::eR5G6B5UnormPack16); 221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
223 AddFormatQuery(vk::Format::eD32Sfloat); 225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
224 AddFormatQuery(vk::Format::eD16UnormS8Uint); 227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
225 AddFormatQuery(vk::Format::eD24UnormS8Uint); 228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
226 AddFormatQuery(vk::Format::eD32SfloatS8Uint); 229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
227 234
228 return format_properties; 235 return format_properties;
229} 236}
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 17ee93b91..0451babbf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
238 238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, 239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end) 240 u8* data, u64 begin, u64 end)
241 : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} 241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242 242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() { 243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this); 244 allocation->Free(this);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index 1678463c7..a1e117443 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
125 protected_resources.push_back(resource); 125 protected_resources.push_back(resource);
126} 126}
127 127
128void VKFence::Unprotect(const VKResource* resource) { 128void VKFence::Unprotect(VKResource* resource) {
129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); 129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
130 if (it != protected_resources.end()) { 130 ASSERT(it != protected_resources.end());
131 protected_resources.erase(it); 131
132 } 132 resource->OnFenceRemoval(this);
133 protected_resources.erase(it);
133} 134}
134 135
135VKFenceWatch::VKFenceWatch() = default; 136VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
141} 142}
142 143
143void VKFenceWatch::Wait() { 144void VKFenceWatch::Wait() {
144 if (!fence) { 145 if (fence == nullptr) {
145 return; 146 return;
146 } 147 }
147 fence->Wait(); 148 fence->Wait();
148 fence->Unprotect(this); 149 fence->Unprotect(this);
149 fence = nullptr;
150} 150}
151 151
152void VKFenceWatch::Watch(VKFence& new_fence) { 152void VKFenceWatch::Watch(VKFence& new_fence) {
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5018dfa44..5bfe4cead 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
63 void Protect(VKResource* resource); 63 void Protect(VKResource* resource);
64 64
65 /// Removes protection for a resource. 65 /// Removes protection for a resource.
66 void Unprotect(const VKResource* resource); 66 void Unprotect(VKResource* resource);
67 67
68 /// Retreives the fence. 68 /// Retreives the fence.
69 operator vk::Fence() const { 69 operator vk::Fence() const {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
27 pipeline_stage} {
28 CreateBuffers(memory_manager, usage);
29 ReserveWatches(WATCHES_INITIAL_RESERVE);
30}
31
32VKStreamBuffer::~VKStreamBuffer() = default;
33
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
35 ASSERT(size <= buffer_size);
36 mapped_size = size;
37
38 if (offset + size > buffer_size) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and
40 // reset the state.
41 invalidation_mark = used_watches;
42 used_watches = 0;
43 offset = 0;
44 }
45
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47}
48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51
52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59
60 if (used_watches + 1 >= watches.size()) {
61 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 }
64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence());
66
67 offset += size;
68
69 return exctx;
70}
71
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
73 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
74 nullptr);
75
76 const auto dev = device.GetLogical();
77 const auto& dld = device.GetDispatchLoader();
78 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
79 commit = memory_manager.Commit(*buffer, true);
80 mapped_pointer = commit->GetData();
81}
82
83void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
84 const std::size_t previous_size = watches.size();
85 watches.resize(previous_size + grow_size);
86 std::generate(watches.begin() + previous_size, watches.end(),
87 []() { return std::make_unique<VKFenceWatch>(); });
88}
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <tuple>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler;
23
24class VKStreamBuffer {
25public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer();
30
31 /**
32 * Reserves a region of memory from the stream buffer.
33 * @param size Size to reserve.
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated.
36 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
41
42 vk::Buffer GetBuffer() const {
43 return *buffer;
44 }
45
46private:
47 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
49
50 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size);
52
53 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58
59 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62
63 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy.
65
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t>
69 invalidation_mark{}; ///< Number of watches used in the current invalidation.
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 044ba116a..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
89 89
90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
91 switch (format) { 91 switch (format) {
92 // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
93 // gamma.
94 case Tegra::RenderTargetFormat::RGBA8_SRGB: 92 case Tegra::RenderTargetFormat::RGBA8_SRGB:
95 return PixelFormat::RGBA8_SRGB; 93 return PixelFormat::RGBA8_SRGB;
96 case Tegra::RenderTargetFormat::RGBA8_UNORM: 94 case Tegra::RenderTargetFormat::RGBA8_UNORM:
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/textures/astc.h"
14#include "video_core/textures/convert.h"
15
16namespace Tegra::Texture {
17
18using VideoCore::Surface::PixelFormat;
19
20template <bool reverse>
21void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
22 union S8Z24 {
23 BitField<0, 24, u32> z24;
24 BitField<24, 8, u32> s8;
25 };
26 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
27
28 union Z24S8 {
29 BitField<0, 8, u32> s8;
30 BitField<8, 24, u32> z24;
31 };
32 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
33
34 S8Z24 s8z24_pixel{};
35 Z24S8 z24s8_pixel{};
36 constexpr auto bpp{
37 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
38 for (std::size_t y = 0; y < height; ++y) {
39 for (std::size_t x = 0; x < width; ++x) {
40 const std::size_t offset{bpp * (y * width + x)};
41 if constexpr (reverse) {
42 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
43 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
44 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
45 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
46 } else {
47 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
48 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
49 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
50 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
51 }
52 }
53 }
54}
55
56static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
57 SwapS8Z24ToZ24S8<false>(data, width, height);
58}
59
60static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
61 SwapS8Z24ToZ24S8<true>(data, width, height);
62}
63
64void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
65 bool convert_astc, bool convert_s8z24) {
66 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
67 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
68 u32 block_width{};
69 u32 block_height{};
70 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
71 const std::vector<u8> rgba8_data =
72 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
73 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
74
75 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
76 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
77 }
78}
79
80void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
81 bool convert_astc, bool convert_s8z24) {
82 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
83 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
84 static_cast<u32>(pixel_format));
85 UNREACHABLE();
86
87 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
88 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
89 }
90}
91
92} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace Tegra::Texture {
11
12void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
13 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
14
15void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 105 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 106 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 107 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 109 }
110 pixel_base += stride_x; 110 pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 154 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 155 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 156 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 157 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format.
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 20void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 26std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 27 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
55 u32 block_height); 48 u32 block_height);
49
56/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 52 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <future>
9#include <string> 7#include <string>
10 8
11namespace WebService { 9namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..40da1a4e2 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/web_result.h" 12#include "common/web_result.h"
13#include "core/settings.h"
14#include "web_service/web_backend.h" 13#include "web_service/web_backend.h"
15 14
16namespace WebService { 15namespace WebService {
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..979b9ec14 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 73b04b749..3b070bfbb 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,10 +20,7 @@
20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
21 21
22void EmuThread::run() { 22void EmuThread::run() {
23 if (!Settings::values.use_multi_core) { 23 render_window->MakeCurrent();
24 // Single core mode must acquire OpenGL context for entire emulation session
25 render_window->MakeCurrent();
26 }
27 24
28 MicroProfileOnThreadCreate("EmuThread"); 25 MicroProfileOnThreadCreate("EmuThread");
29 26
@@ -38,6 +35,11 @@ void EmuThread::run() {
38 35
39 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 36 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
40 37
38 if (Settings::values.use_asynchronous_gpu_emulation) {
39 // Release OpenGL context for the GPU thread
40 render_window->DoneCurrent();
41 }
42
41 // holds whether the cpu was running during the last iteration, 43 // holds whether the cpu was running during the last iteration,
42 // so that the DebugModeLeft signal can be emitted before the 44 // so that the DebugModeLeft signal can be emitted before the
43 // next execution step 45 // next execution step
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 5f0896f84..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,15 +53,15 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
61 button(QWizard::CancelButton)->setVisible(false); 61 button(QWizard::CancelButton)->setVisible(false);
62 62
63 testcase_watcher.setFuture(QtConcurrent::run( 63 testcase_watcher.setFuture(QtConcurrent::run(
64 [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); 64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
65 break; 65 break;
66 default: 66 default:
67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); 67 LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9546dadf..74dc6bb28 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -374,6 +374,8 @@ void Config::ReadValues() {
374 qt_config->value("use_disk_shader_cache", false).toBool(); 374 qt_config->value("use_disk_shader_cache", false).toBool();
375 Settings::values.use_accurate_gpu_emulation = 375 Settings::values.use_accurate_gpu_emulation =
376 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 376 qt_config->value("use_accurate_gpu_emulation", false).toBool();
377 Settings::values.use_asynchronous_gpu_emulation =
378 qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
377 379
378 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 380 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
379 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 381 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -633,6 +635,8 @@ void Config::SaveValues() {
633 qt_config->setValue("frame_limit", Settings::values.frame_limit); 635 qt_config->setValue("frame_limit", Settings::values.frame_limit);
634 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); 636 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
635 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 637 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
638 qt_config->setValue("use_asynchronous_gpu_emulation",
639 Settings::values.use_asynchronous_gpu_emulation);
636 640
637 // Cast to double because Qt's written float values are not human-readable 641 // Cast to double because Qt's written float values are not human-readable
638 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 642 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 0f5dd534b..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
75 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
78 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
79 Settings::values.bg_blue)); 81 Settings::values.bg_blue));
80} 82}
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
86 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
87 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
88 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
89 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
90 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
91 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 824f5810a..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -64,6 +64,13 @@
64 </widget> 64 </widget>
65 </item> 65 </item>
66 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
67 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
68 <item> 75 <item>
69 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 209798521..71683da8e 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
398 398
399 for (unsigned int y = 0; y < surface_height; ++y) { 399 for (unsigned int y = 0; y < surface_height; ++y) {
400 for (unsigned int x = 0; x < surface_width; ++x) { 400 for (unsigned int x = 0; x < surface_width; ++x) {
401 Math::Vec4<u8> color; 401 Common::Vec4<u8> color;
402 color[0] = texture_data[x + y * surface_width + 0]; 402 color[0] = texture_data[x + y * surface_width + 0];
403 color[1] = texture_data[x + y * surface_width + 1]; 403 color[1] = texture_data[x + y * surface_width + 1];
404 color[2] = texture_data[x + y * surface_width + 2]; 404 color[2] = texture_data[x + y * surface_width + 2];
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index f50225d5f..06ad74ffe 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const {
81 return text; 81 return text;
82} 82}
83 83
84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { 84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
85 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 85 : mutex_address(mutex_address) {
86
87 mutex_value = Memory::Read32(mutex_address); 86 mutex_value = Memory::Read32(mutex_address);
88 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); 87 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
89 owner = handle_table.Get<Kernel::Thread>(owner_handle); 88 owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -316,7 +315,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
316 315
317 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 316 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
318 if (mutex_wait_address != 0) { 317 if (mutex_wait_address != 0) {
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); 318 const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
320 } else { 320 } else {
321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); 321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
322 } 322 }
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 365c3dbfe..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,6 +17,7 @@
17class EmuThread; 17class EmuThread;
18 18
19namespace Kernel { 19namespace Kernel {
20class HandleTable;
20class ReadableEvent; 21class ReadableEvent;
21class WaitObject; 22class WaitObject;
22class Thread; 23class Thread;
@@ -72,7 +73,7 @@ public:
72class WaitTreeMutexInfo : public WaitTreeExpandableItem { 73class WaitTreeMutexInfo : public WaitTreeExpandableItem {
73 Q_OBJECT 74 Q_OBJECT
74public: 75public:
75 explicit WaitTreeMutexInfo(VAddr mutex_address); 76 explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
76 ~WaitTreeMutexInfo() override; 77 ~WaitTreeMutexInfo() override;
77 78
78 QString GetText() const override; 79 QString GetText() const override;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 1d460c189..41ba3c4c6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,6 +11,7 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
@@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
339 .arg(QString::fromStdString(std::to_string(key_code)))); 340 .arg(QString::fromStdString(std::to_string(key_code))));
340 }; 341 };
341 342
343 QMessageBox::information(
344 this, tr("Exit"),
345 tr("To exit the web application, use the game provided controls to select exit, select the "
346 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
347
342 bool running_exit_check = false; 348 bool running_exit_check = false;
343 while (!finished) { 349 while (!finished) {
344 QApplication::processEvents(); 350 QApplication::processEvents();
@@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() {
522 Qt::ApplicationShortcut); 528 Qt::ApplicationShortcut);
523 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", 529 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
524 QKeySequence(QKeySequence::Print)); 530 QKeySequence(QKeySequence::Print));
531 hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10));
525 532
526 hotkey_registry.LoadHotkeys(); 533 hotkey_registry.LoadHotkeys();
527 534
@@ -561,7 +568,10 @@ void GMainWindow::InitializeHotkeys() {
561 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 568 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
562 UpdateStatusBar(); 569 UpdateStatusBar();
563 }); 570 });
564 constexpr u16 SPEED_LIMIT_STEP = 5; 571 // TODO: Remove this comment/static whenever the next major release of
572 // MSVC occurs and we make it a requirement (see:
573 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
574 static constexpr u16 SPEED_LIMIT_STEP = 5;
565 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), 575 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
566 &QShortcut::activated, this, [&] { 576 &QShortcut::activated, this, [&] {
567 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 577 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -588,6 +598,12 @@ void GMainWindow::InitializeHotkeys() {
588 OnCaptureScreenshot(); 598 OnCaptureScreenshot();
589 } 599 }
590 }); 600 });
601 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
602 &QShortcut::activated, this, [&] {
603 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
604 OnDockedModeChanged(!Settings::values.use_docked_mode,
605 Settings::values.use_docked_mode);
606 });
591} 607}
592 608
593void GMainWindow::SetDefaultUIGeometry() { 609void GMainWindow::SetDefaultUIGeometry() {
@@ -846,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
846 } 862 }
847 game_path = filename; 863 game_path = filename;
848 864
849 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 865 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
850 return true; 866 return true;
851} 867}
852 868
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ff05b3179..32e78049c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -346,7 +346,7 @@ void Config::ReadValues() {
346 346
347 // Renderer 347 // Renderer
348 Settings::values.resolution_factor = 348 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 349 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -354,17 +354,20 @@ void Config::ReadValues() {
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
355 Settings::values.use_accurate_gpu_emulation = 355 Settings::values.use_accurate_gpu_emulation =
356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
357 Settings::values.use_asynchronous_gpu_emulation =
358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
357 359
358 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 360 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
359 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 361 Settings::values.bg_green =
360 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 362 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
363 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
361 364
362 // Audio 365 // Audio
363 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 366 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
364 Settings::values.enable_audio_stretching = 367 Settings::values.enable_audio_stretching =
365 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 368 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
366 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 369 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
367 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 370 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
368 371
369 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 372 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
370 373
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index a81986f8e..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -118,6 +118,10 @@ use_disk_shader_cache =
118# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
119use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
120 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
121# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
122# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
123bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c34b5467f..c6c66a787 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -216,7 +216,7 @@ int main(int argc, char** argv) {
216 } 216 }
217 } 217 }
218 218
219 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
220 220
221 system.Renderer().Rasterizer().LoadDiskResources(); 221 system.Renderer().Rasterizer().LoadDiskResources();
222 222