summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/cubeb_sink.cpp6
-rw-r--r--src/audio_core/cubeb_sink.h2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/bit_field.h16
-rw-r--r--src/common/logging/backend.cpp50
-rw-r--r--src/common/logging/backend.h4
-rw-r--r--src/core/CMakeLists.txt1
-rw-r--r--src/core/core.cpp34
-rw-r--r--src/core/core.h4
-rw-r--r--src/core/core_cpu.cpp9
-rw-r--r--src/core/core_cpu.h8
-rw-r--r--src/core/cpu_core_manager.cpp3
-rw-r--r--src/core/hle/ipc.h4
-rw-r--r--src/core/hle/ipc_helpers.h17
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp183
-rw-r--r--src/core/hle/kernel/address_arbiter.h82
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h9
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/kernel.cpp20
-rw-r--r--src/core/hle/kernel/kernel.h20
-rw-r--r--src/core/hle/kernel/process.cpp9
-rw-r--r--src/core/hle/kernel/process.h22
-rw-r--r--src/core/hle/kernel/scheduler.cpp14
-rw-r--r--src/core/hle/kernel/scheduler.h6
-rw-r--r--src/core/hle/kernel/server_session.cpp91
-rw-r--r--src/core/hle/kernel/server_session.h53
-rw-r--r--src/core/hle/kernel/shared_memory.cpp5
-rw-r--r--src/core/hle/kernel/svc.cpp63
-rw-r--r--src/core/hle/kernel/thread.cpp2
-rw-r--r--src/core/hle/kernel/vm_manager.cpp47
-rw-r--r--src/core/hle/kernel/vm_manager.h24
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/audio/audout_u.cpp17
-rw-r--r--src/core/hle/service/audio/audren_u.cpp3
-rw-r--r--src/core/hle/service/audio/errors.h15
-rw-r--r--src/core/hle/service/audio/hwopus.cpp176
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp2
-rw-r--r--src/core/hle/service/service.cpp7
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/vi/vi.cpp38
-rw-r--r--src/core/hle/service/vi/vi.h40
-rw-r--r--src/core/hle/service/vi/vi_m.cpp12
-rw-r--r--src/core/hle/service/vi/vi_m.h19
-rw-r--r--src/core/hle/service/vi/vi_s.cpp12
-rw-r--r--src/core/hle/service/vi/vi_s.h19
-rw-r--r--src/core/hle/service/vi/vi_u.cpp12
-rw-r--r--src/core/hle/service/vi/vi_u.h19
-rw-r--r--src/core/memory.cpp14
-rw-r--r--src/core/settings.cpp3
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/tests/core/arm/arm_test_common.cpp8
-rw-r--r--src/video_core/CMakeLists.txt11
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp7
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp3
-rw-r--r--src/video_core/engines/kepler_compute.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h26
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h57
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp152
-rw-r--r--src/video_core/gpu_thread.h133
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp152
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp156
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp56
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp13
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/texture.cpp534
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/decoders.h18
-rw-r--r--src/video_core/textures/texture.h34
-rw-r--r--src/web_service/verify_login.h2
-rw-r--r--src/web_service/web_backend.cpp1
-rw-r--r--src/yuzu/applets/web_browser.cpp2
-rw-r--r--src/yuzu/bootmanager.cpp10
-rw-r--r--src/yuzu/compatdb.cpp4
-rw-r--r--src/yuzu/configuration/config.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu/debugger/wait_tree.cpp8
-rw-r--r--src/yuzu/debugger/wait_tree.h3
-rw-r--r--src/yuzu/main.cpp20
-rw-r--r--src/yuzu_cmd/config.cpp13
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
118 files changed, 2841 insertions, 1384 deletions
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 1da0b9f2a..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,7 +12,7 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _MSC_VER 15#ifdef _WIN32
16#include <objbase.h> 16#include <objbase.h>
17#endif 17#endif
18 18
@@ -113,7 +113,7 @@ private:
113 113
114CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows 115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _MSC_VER 116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); 117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif 118#endif
119 119
@@ -152,7 +152,7 @@ CubebSink::~CubebSink() {
152 152
153 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154 154
155#ifdef _MSC_VER 155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) { 156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize(); 157 CoUninitialize();
158 } 158 }
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index 511df7bb1..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -26,7 +26,7 @@ private:
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28 28
29#ifdef _MSC_VER 29#ifdef _WIN32
30 u32 com_init_result = 0; 30 u32 com_init_result = 0;
31#endif 31#endif
32}; 32};
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bdd885273..3d30f0e3e 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp" 51 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..7433c39ba 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
111template <std::size_t Position, std::size_t Bits, typename T> 111template <std::size_t Position, std::size_t Bits, typename T>
112struct BitField { 112struct BitField {
113private: 113private:
114 // We hide the copy assigment operator here, because the default copy
115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable.
118 constexpr BitField& operator=(const BitField&) = default;
119
120 // UnderlyingType is T for non-enum types and the underlying type of T if 114 // UnderlyingType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 115 // T is an enumeration. Note that T is wrapped within an enable_if in the
122 // former case to workaround compile errors which arise when using 116 // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
163 BitField(T val) = delete; 157 BitField(T val) = delete;
164 BitField& operator=(T val) = delete; 158 BitField& operator=(T val) = delete;
165 159
166 // Force default constructor to be created 160 constexpr BitField() noexcept = default;
167 // so that we can use this within unions 161
168 constexpr BitField() = default; 162 constexpr BitField(const BitField&) noexcept = default;
163 constexpr BitField& operator=(const BitField&) noexcept = default;
164
165 constexpr BitField(BitField&&) noexcept = default;
166 constexpr BitField& operator=(BitField&&) noexcept = default;
169 167
170 constexpr FORCE_INLINE operator T() const { 168 constexpr FORCE_INLINE operator T() const {
171 return Value(); 169 return Value();
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index b369f199f..4462ff3fb 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,8 +39,10 @@ public:
39 Impl(Impl const&) = delete; 39 Impl(Impl const&) = delete;
40 const Impl& operator=(Impl const&) = delete; 40 const Impl& operator=(Impl const&) = delete;
41 41
42 void PushEntry(Entry e) { 42 void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
43 message_queue.Push(std::move(e)); 43 const char* function, std::string message) {
44 message_queue.Push(
45 CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
44 } 46 }
45 47
46 void AddBackend(std::unique_ptr<Backend> backend) { 48 void AddBackend(std::unique_ptr<Backend> backend) {
@@ -108,11 +110,30 @@ private:
108 backend_thread.join(); 110 backend_thread.join();
109 } 111 }
110 112
113 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
114 const char* function, std::string message) const {
115 using std::chrono::duration_cast;
116 using std::chrono::steady_clock;
117
118 Entry entry;
119 entry.timestamp =
120 duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
121 entry.log_class = log_class;
122 entry.log_level = log_level;
123 entry.filename = Common::TrimSourcePath(filename);
124 entry.line_num = line_nr;
125 entry.function = function;
126 entry.message = std::move(message);
127
128 return entry;
129 }
130
111 std::mutex writing_mutex; 131 std::mutex writing_mutex;
112 std::thread backend_thread; 132 std::thread backend_thread;
113 std::vector<std::unique_ptr<Backend>> backends; 133 std::vector<std::unique_ptr<Backend>> backends;
114 Common::MPSCQueue<Log::Entry> message_queue; 134 Common::MPSCQueue<Log::Entry> message_queue;
115 Filter filter; 135 Filter filter;
136 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
116}; 137};
117 138
118void ConsoleBackend::Write(const Entry& entry) { 139void ConsoleBackend::Write(const Entry& entry) {
@@ -271,25 +292,6 @@ const char* GetLevelName(Level log_level) {
271#undef LVL 292#undef LVL
272} 293}
273 294
274Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
275 const char* function, std::string message) {
276 using std::chrono::duration_cast;
277 using std::chrono::steady_clock;
278
279 static steady_clock::time_point time_origin = steady_clock::now();
280
281 Entry entry;
282 entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
283 entry.log_class = log_class;
284 entry.log_level = log_level;
285 entry.filename = Common::TrimSourcePath(filename);
286 entry.line_num = line_nr;
287 entry.function = function;
288 entry.message = std::move(message);
289
290 return entry;
291}
292
293void SetGlobalFilter(const Filter& filter) { 295void SetGlobalFilter(const Filter& filter) {
294 Impl::Instance().SetGlobalFilter(filter); 296 Impl::Instance().SetGlobalFilter(filter);
295} 297}
@@ -314,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
314 if (!filter.CheckMessage(log_class, log_level)) 316 if (!filter.CheckMessage(log_class, log_level))
315 return; 317 return;
316 318
317 Entry entry = 319 instance.PushEntry(log_class, log_level, filename, line_num, function,
318 CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); 320 fmt::vformat(format, args));
319
320 instance.PushEntry(std::move(entry));
321} 321}
322} // namespace Log 322} // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index a31ee6968..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -135,10 +135,6 @@ const char* GetLogClassName(Class log_class);
135 */ 135 */
136const char* GetLevelName(Level log_level); 136const char* GetLevelName(Level log_level);
137 137
138/// Creates a log entry by formatting the given source location, and message.
139Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
140 const char* function, std::string message);
141
142/** 138/**
143 * The global filter will prevent any messages from even being processed if they are filtered. Each 139 * The global filter will prevent any messages from even being processed if they are filtered. Each
144 * backend can have a filter, but if the level is lower than the global filter, the backend will 140 * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 988356c65..8ccb2d5f0 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -217,6 +217,7 @@ add_library(core STATIC
217 hle/service/audio/audren_u.h 217 hle/service/audio/audren_u.h
218 hle/service/audio/codecctl.cpp 218 hle/service/audio/codecctl.cpp
219 hle/service/audio/codecctl.h 219 hle/service/audio/codecctl.h
220 hle/service/audio/errors.h
220 hle/service/audio/hwopus.cpp 221 hle/service/audio/hwopus.cpp
221 hle/service/audio/hwopus.h 222 hle/service/audio/hwopus.h
222 hle/service/bcat/bcat.cpp 223 hle/service/bcat/bcat.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index ab7181a05..89b3fb418 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
36#include "frontend/applets/software_keyboard.h" 36#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 37#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 38#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 39#include "video_core/gpu_asynch.h"
40#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 41#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 42#include "video_core/video_core.h"
42 43
@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
78 return vfs->OpenFile(path, FileSys::Mode::Read); 79 return vfs->OpenFile(path, FileSys::Mode::Read);
79} 80}
80struct System::Impl { 81struct System::Impl {
82 explicit Impl(System& system) : kernel{system} {}
81 83
82 Cpu& CurrentCpuCore() { 84 Cpu& CurrentCpuCore() {
83 return cpu_core_manager.GetCurrentCore(); 85 return cpu_core_manager.GetCurrentCore();
@@ -95,7 +97,7 @@ struct System::Impl {
95 LOG_DEBUG(HW_Memory, "initialized OK"); 97 LOG_DEBUG(HW_Memory, "initialized OK");
96 98
97 core_timing.Initialize(); 99 core_timing.Initialize();
98 kernel.Initialize(core_timing); 100 kernel.Initialize();
99 101
100 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( 102 const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
101 std::chrono::system_clock::now().time_since_epoch()); 103 std::chrono::system_clock::now().time_since_epoch());
@@ -114,7 +116,7 @@ struct System::Impl {
114 if (web_browser == nullptr) 116 if (web_browser == nullptr)
115 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); 117 web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
116 118
117 auto main_process = Kernel::Process::Create(kernel, "main"); 119 auto main_process = Kernel::Process::Create(system, "main");
118 kernel.MakeCurrentProcess(main_process.get()); 120 kernel.MakeCurrentProcess(main_process.get());
119 121
120 telemetry_session = std::make_unique<Core::TelemetrySession>(); 122 telemetry_session = std::make_unique<Core::TelemetrySession>();
@@ -128,10 +130,16 @@ struct System::Impl {
128 return ResultStatus::ErrorVideoCore; 130 return ResultStatus::ErrorVideoCore;
129 } 131 }
130 132
131 gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); 133 is_powered_on = true;
134
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
137 } else {
138 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
139 }
132 140
133 cpu_core_manager.Initialize(system); 141 cpu_core_manager.Initialize(system);
134 is_powered_on = true; 142
135 LOG_DEBUG(Core, "Initialized OK"); 143 LOG_DEBUG(Core, "Initialized OK");
136 144
137 // Reset counters and set time origin to current frame 145 // Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {
182 190
183 void Shutdown() { 191 void Shutdown() {
184 // Log last frame performance stats 192 // Log last frame performance stats
185 auto perf_results = GetAndResetPerfStats(); 193 const auto perf_results = GetAndResetPerfStats();
186 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 194 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
187 perf_results.emulation_speed * 100.0); 195 perf_results.emulation_speed * 100.0);
188 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 196 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
189 perf_results.game_fps); 197 perf_results.game_fps);
190 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 198 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
191 perf_results.frametime * 1000.0); 199 perf_results.frametime * 1000.0);
192 200
193 is_powered_on = false; 201 is_powered_on = false;
194 202
@@ -265,7 +273,7 @@ struct System::Impl {
265 Core::FrameLimiter frame_limiter; 273 Core::FrameLimiter frame_limiter;
266}; 274};
267 275
268System::System() : impl{std::make_unique<Impl>()} {} 276System::System() : impl{std::make_unique<Impl>(*this)} {}
269System::~System() = default; 277System::~System() = default;
270 278
271Cpu& System::CurrentCpuCore() { 279Cpu& System::CurrentCpuCore() {
diff --git a/src/core/core.h b/src/core/core.h
index d720013f7..ba76a41d8 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
293 return System::GetInstance().CurrentArmInterface(); 293 return System::GetInstance().CurrentArmInterface();
294} 294}
295 295
296inline TelemetrySession& Telemetry() {
297 return System::GetInstance().TelemetrySession();
298}
299
300inline Kernel::Process* CurrentProcess() { 296inline Kernel::Process* CurrentProcess() {
301 return System::GetInstance().CurrentProcess(); 297 return System::GetInstance().CurrentProcess();
302} 298}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 54aa21a3a..1eefed6d0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 CpuBarrier& cpu_barrier, std::size_t core_index) 54 std::size_t core_index)
54 : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
55 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
56#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
57 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
@@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
63 arm_interface = std::make_unique<ARM_Unicorn>(core_timing); 64 arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
64 } 65 }
65 66
66 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
67} 68}
68 69
69Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index e2204c6b0..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
15class Scheduler; 15class Scheduler;
16} 16}
17 17
18namespace Core {
19class System;
20}
21
18namespace Core::Timing { 22namespace Core::Timing {
19class CoreTiming; 23class CoreTiming;
20} 24}
@@ -45,8 +49,8 @@ private:
45 49
46class Cpu { 50class Cpu {
47public: 51public:
48 Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
49 CpuBarrier& cpu_barrier, std::size_t core_index); 53 std::size_t core_index);
50 ~Cpu(); 54 ~Cpu();
51 55
52 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 2ddb3610d..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
32 } 31 }
33 32
34 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..455d1f346 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 90f276ee8..0d8368546 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -350,7 +350,7 @@ public:
350 template <class T> 350 template <class T>
351 std::shared_ptr<T> PopIpcInterface() { 351 std::shared_ptr<T> PopIpcInterface() {
352 ASSERT(context->Session()->IsDomain()); 352 ASSERT(context->Session()->IsDomain());
353 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 353 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
355 } 355 }
356}; 356};
@@ -362,6 +362,11 @@ inline u32 RequestParser::Pop() {
362 return cmdbuf[index++]; 362 return cmdbuf[index++];
363} 363}
364 364
365template <>
366inline s32 RequestParser::Pop() {
367 return static_cast<s32>(Pop<u32>());
368}
369
365template <typename T> 370template <typename T>
366void RequestParser::PopRaw(T& value) { 371void RequestParser::PopRaw(T& value) {
367 std::memcpy(&value, cmdbuf + index, sizeof(T)); 372 std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -393,6 +398,16 @@ inline u64 RequestParser::Pop() {
393} 398}
394 399
395template <> 400template <>
401inline s8 RequestParser::Pop() {
402 return static_cast<s8>(Pop<u8>());
403}
404
405template <>
406inline s16 RequestParser::Pop() {
407 return static_cast<s16>(Pop<u16>());
408}
409
410template <>
396inline s64 RequestParser::Pop() { 411inline s64 RequestParser::Pop() {
397 return static_cast<s64>(Pop<u64>()); 412 return static_cast<s64>(Pop<u64>());
398} 413}
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index a250d088d..352190da8 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_cpu.h" 11#include "core/core_cpu.h"
12#include "core/hle/kernel/address_arbiter.h"
12#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/process.h" 15#include "core/hle/kernel/process.h"
@@ -17,58 +18,16 @@
17#include "core/hle/result.h" 18#include "core/hle/result.h"
18#include "core/memory.h" 19#include "core/memory.h"
19 20
20namespace Kernel::AddressArbiter { 21namespace Kernel {
21 22namespace {
22// Performs actual address waiting logic.
23static ResultCode WaitForAddress(VAddr address, s64 timeout) {
24 SharedPtr<Thread> current_thread = GetCurrentThread();
25 current_thread->SetArbiterWaitAddress(address);
26 current_thread->SetStatus(ThreadStatus::WaitArb);
27 current_thread->InvalidateWakeupCallback();
28
29 current_thread->WakeAfterDelay(timeout);
30
31 Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
32 return RESULT_TIMEOUT;
33}
34
35// Gets the threads waiting on an address.
36static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
37 const auto RetrieveWaitingThreads = [](std::size_t core_index,
38 std::vector<SharedPtr<Thread>>& waiting_threads,
39 VAddr arb_addr) {
40 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
41 const auto& thread_list = scheduler.GetThreadList();
42
43 for (const auto& thread : thread_list) {
44 if (thread->GetArbiterWaitAddress() == arb_addr)
45 waiting_threads.push_back(thread);
46 }
47 };
48
49 // Retrieve all threads that are waiting for this address.
50 std::vector<SharedPtr<Thread>> threads;
51 RetrieveWaitingThreads(0, threads, address);
52 RetrieveWaitingThreads(1, threads, address);
53 RetrieveWaitingThreads(2, threads, address);
54 RetrieveWaitingThreads(3, threads, address);
55
56 // Sort them by priority, such that the highest priority ones come first.
57 std::sort(threads.begin(), threads.end(),
58 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
59 return lhs->GetPriority() < rhs->GetPriority();
60 });
61
62 return threads;
63}
64
65// Wake up num_to_wake (or all) threads in a vector. 23// Wake up num_to_wake (or all) threads in a vector.
66static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { 24void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
67 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process 25 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
68 // them all. 26 // them all.
69 std::size_t last = waiting_threads.size(); 27 std::size_t last = waiting_threads.size();
70 if (num_to_wake > 0) 28 if (num_to_wake > 0) {
71 last = num_to_wake; 29 last = num_to_wake;
30 }
72 31
73 // Signal the waiting threads. 32 // Signal the waiting threads.
74 for (std::size_t i = 0; i < last; i++) { 33 for (std::size_t i = 0; i < last; i++) {
@@ -78,42 +37,55 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
78 waiting_threads[i]->ResumeFromWait(); 37 waiting_threads[i]->ResumeFromWait();
79 } 38 }
80} 39}
40} // Anonymous namespace
41
42AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
43AddressArbiter::~AddressArbiter() = default;
44
45ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
46 s32 num_to_wake) {
47 switch (type) {
48 case SignalType::Signal:
49 return SignalToAddressOnly(address, num_to_wake);
50 case SignalType::IncrementAndSignalIfEqual:
51 return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
52 case SignalType::ModifyByWaitingCountAndSignalIfEqual:
53 return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
54 default:
55 return ERR_INVALID_ENUM_VALUE;
56 }
57}
81 58
82// Signals an address being waited on. 59ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
83ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { 60 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
84 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
85
86 WakeThreads(waiting_threads, num_to_wake); 61 WakeThreads(waiting_threads, num_to_wake);
87 return RESULT_SUCCESS; 62 return RESULT_SUCCESS;
88} 63}
89 64
90// Signals an address being waited on and increments its value if equal to the value argument. 65ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
91ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { 66 s32 num_to_wake) {
92 // Ensure that we can write to the address. 67 // Ensure that we can write to the address.
93 if (!Memory::IsValidVirtualAddress(address)) { 68 if (!Memory::IsValidVirtualAddress(address)) {
94 return ERR_INVALID_ADDRESS_STATE; 69 return ERR_INVALID_ADDRESS_STATE;
95 } 70 }
96 71
97 if (static_cast<s32>(Memory::Read32(address)) == value) { 72 if (static_cast<s32>(Memory::Read32(address)) != value) {
98 Memory::Write32(address, static_cast<u32>(value + 1));
99 } else {
100 return ERR_INVALID_STATE; 73 return ERR_INVALID_STATE;
101 } 74 }
102 75
103 return SignalToAddress(address, num_to_wake); 76 Memory::Write32(address, static_cast<u32>(value + 1));
77 return SignalToAddressOnly(address, num_to_wake);
104} 78}
105 79
106// Signals an address being waited on and modifies its value based on waiting thread count if equal 80ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
107// to the value argument. 81 s32 num_to_wake) {
108ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
109 s32 num_to_wake) {
110 // Ensure that we can write to the address. 82 // Ensure that we can write to the address.
111 if (!Memory::IsValidVirtualAddress(address)) { 83 if (!Memory::IsValidVirtualAddress(address)) {
112 return ERR_INVALID_ADDRESS_STATE; 84 return ERR_INVALID_ADDRESS_STATE;
113 } 85 }
114 86
115 // Get threads waiting on the address. 87 // Get threads waiting on the address.
116 std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); 88 const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
117 89
118 // Determine the modified value depending on the waiting count. 90 // Determine the modified value depending on the waiting count.
119 s32 updated_value; 91 s32 updated_value;
@@ -125,41 +97,54 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu
125 updated_value = value; 97 updated_value = value;
126 } 98 }
127 99
128 if (static_cast<s32>(Memory::Read32(address)) == value) { 100 if (static_cast<s32>(Memory::Read32(address)) != value) {
129 Memory::Write32(address, static_cast<u32>(updated_value));
130 } else {
131 return ERR_INVALID_STATE; 101 return ERR_INVALID_STATE;
132 } 102 }
133 103
104 Memory::Write32(address, static_cast<u32>(updated_value));
134 WakeThreads(waiting_threads, num_to_wake); 105 WakeThreads(waiting_threads, num_to_wake);
135 return RESULT_SUCCESS; 106 return RESULT_SUCCESS;
136} 107}
137 108
138// Waits on an address if the value passed is less than the argument value, optionally decrementing. 109ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
139ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { 110 s64 timeout_ns) {
111 switch (type) {
112 case ArbitrationType::WaitIfLessThan:
113 return WaitForAddressIfLessThan(address, value, timeout_ns, false);
114 case ArbitrationType::DecrementAndWaitIfLessThan:
115 return WaitForAddressIfLessThan(address, value, timeout_ns, true);
116 case ArbitrationType::WaitIfEqual:
117 return WaitForAddressIfEqual(address, value, timeout_ns);
118 default:
119 return ERR_INVALID_ENUM_VALUE;
120 }
121}
122
123ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
124 bool should_decrement) {
140 // Ensure that we can read the address. 125 // Ensure that we can read the address.
141 if (!Memory::IsValidVirtualAddress(address)) { 126 if (!Memory::IsValidVirtualAddress(address)) {
142 return ERR_INVALID_ADDRESS_STATE; 127 return ERR_INVALID_ADDRESS_STATE;
143 } 128 }
144 129
145 s32 cur_value = static_cast<s32>(Memory::Read32(address)); 130 const s32 cur_value = static_cast<s32>(Memory::Read32(address));
146 if (cur_value < value) { 131 if (cur_value >= value) {
147 if (should_decrement) {
148 Memory::Write32(address, static_cast<u32>(cur_value - 1));
149 }
150 } else {
151 return ERR_INVALID_STATE; 132 return ERR_INVALID_STATE;
152 } 133 }
134
135 if (should_decrement) {
136 Memory::Write32(address, static_cast<u32>(cur_value - 1));
137 }
138
153 // Short-circuit without rescheduling, if timeout is zero. 139 // Short-circuit without rescheduling, if timeout is zero.
154 if (timeout == 0) { 140 if (timeout == 0) {
155 return RESULT_TIMEOUT; 141 return RESULT_TIMEOUT;
156 } 142 }
157 143
158 return WaitForAddress(address, timeout); 144 return WaitForAddressImpl(address, timeout);
159} 145}
160 146
161// Waits on an address if the value passed is equal to the argument value. 147ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
162ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
163 // Ensure that we can read the address. 148 // Ensure that we can read the address.
164 if (!Memory::IsValidVirtualAddress(address)) { 149 if (!Memory::IsValidVirtualAddress(address)) {
165 return ERR_INVALID_ADDRESS_STATE; 150 return ERR_INVALID_ADDRESS_STATE;
@@ -173,6 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
173 return RESULT_TIMEOUT; 158 return RESULT_TIMEOUT;
174 } 159 }
175 160
176 return WaitForAddress(address, timeout); 161 return WaitForAddressImpl(address, timeout);
162}
163
164ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
165 SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
166 current_thread->SetArbiterWaitAddress(address);
167 current_thread->SetStatus(ThreadStatus::WaitArb);
168 current_thread->InvalidateWakeupCallback();
169
170 current_thread->WakeAfterDelay(timeout);
171
172 system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
173 return RESULT_TIMEOUT;
174}
175
176std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
177 const auto RetrieveWaitingThreads = [this](std::size_t core_index,
178 std::vector<SharedPtr<Thread>>& waiting_threads,
179 VAddr arb_addr) {
180 const auto& scheduler = system.Scheduler(core_index);
181 const auto& thread_list = scheduler.GetThreadList();
182
183 for (const auto& thread : thread_list) {
184 if (thread->GetArbiterWaitAddress() == arb_addr) {
185 waiting_threads.push_back(thread);
186 }
187 }
188 };
189
190 // Retrieve all threads that are waiting for this address.
191 std::vector<SharedPtr<Thread>> threads;
192 RetrieveWaitingThreads(0, threads, address);
193 RetrieveWaitingThreads(1, threads, address);
194 RetrieveWaitingThreads(2, threads, address);
195 RetrieveWaitingThreads(3, threads, address);
196
197 // Sort them by priority, such that the highest priority ones come first.
198 std::sort(threads.begin(), threads.end(),
199 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
200 return lhs->GetPriority() < rhs->GetPriority();
201 });
202
203 return threads;
177} 204}
178} // namespace Kernel::AddressArbiter 205} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index b58f21bec..ed0d0e69f 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,29 +4,77 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/object.h"
8 11
9union ResultCode; 12union ResultCode;
10 13
11namespace Kernel::AddressArbiter { 14namespace Core {
15class System;
16}
12 17
13enum class ArbitrationType { 18namespace Kernel {
14 WaitIfLessThan = 0,
15 DecrementAndWaitIfLessThan = 1,
16 WaitIfEqual = 2,
17};
18 19
19enum class SignalType { 20class Thread;
20 Signal = 0, 21
21 IncrementAndSignalIfEqual = 1, 22class AddressArbiter {
22 ModifyByWaitingCountAndSignalIfEqual = 2, 23public:
23}; 24 enum class ArbitrationType {
25 WaitIfLessThan = 0,
26 DecrementAndWaitIfLessThan = 1,
27 WaitIfEqual = 2,
28 };
29
30 enum class SignalType {
31 Signal = 0,
32 IncrementAndSignalIfEqual = 1,
33 ModifyByWaitingCountAndSignalIfEqual = 2,
34 };
35
36 explicit AddressArbiter(Core::System& system);
37 ~AddressArbiter();
38
39 AddressArbiter(const AddressArbiter&) = delete;
40 AddressArbiter& operator=(const AddressArbiter&) = delete;
41
42 AddressArbiter(AddressArbiter&&) = default;
43 AddressArbiter& operator=(AddressArbiter&&) = delete;
44
45 /// Signals an address being waited on with a particular signaling type.
46 ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
24 47
25ResultCode SignalToAddress(VAddr address, s32 num_to_wake); 48 /// Waits on an address with a particular arbitration type.
26ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); 49 ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
27ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
28 50
29ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); 51private:
30ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); 52 /// Signals an address being waited on.
53 ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
54
55 /// Signals an address being waited on and increments its value if equal to the value argument.
56 ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
57
58 /// Signals an address being waited on and modifies its value based on waiting thread count if
59 /// equal to the value argument.
60 ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
61 s32 num_to_wake);
62
63 /// Waits on an address if the value passed is less than the argument value,
64 /// optionally decrementing.
65 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
66 bool should_decrement);
67
68 /// Waits on an address if the value passed is equal to the argument value.
69 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
70
71 // Waits on the given address with a timeout in nanoseconds
72 ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
73
74 // Gets the threads waiting on an address.
75 std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
76
77 Core::System& system;
78};
31 79
32} // namespace Kernel::AddressArbiter 80} // namespace Kernel
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..b1f39aad7 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index dd749eed4..4d224d01d 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@
12 12
13#include "core/core.h" 13#include "core/core.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/client_port.h" 16#include "core/hle/kernel/client_port.h"
16#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
86} 87}
87 88
88struct KernelCore::Impl { 89struct KernelCore::Impl {
89 void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) { 90 explicit Impl(Core::System& system) : system{system} {}
91
92 void Initialize(KernelCore& kernel) {
90 Shutdown(); 93 Shutdown();
91 94
92 InitializeSystemResourceLimit(kernel); 95 InitializeSystemResourceLimit(kernel);
93 InitializeThreads(core_timing); 96 InitializeThreads();
94 } 97 }
95 98
96 void Shutdown() { 99 void Shutdown() {
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
122 ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); 125 ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
123 } 126 }
124 127
125 void InitializeThreads(Core::Timing::CoreTiming& core_timing) { 128 void InitializeThreads() {
126 thread_wakeup_event_type = 129 thread_wakeup_event_type =
127 core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); 130 system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
128 } 131 }
129 132
130 std::atomic<u32> next_object_id{0}; 133 std::atomic<u32> next_object_id{0};
@@ -145,15 +148,18 @@ struct KernelCore::Impl {
145 /// Map of named ports managed by the kernel, which can be retrieved using 148 /// Map of named ports managed by the kernel, which can be retrieved using
146 /// the ConnectToPort SVC. 149 /// the ConnectToPort SVC.
147 NamedPortTable named_ports; 150 NamedPortTable named_ports;
151
152 // System context
153 Core::System& system;
148}; 154};
149 155
150KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} 156KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
151KernelCore::~KernelCore() { 157KernelCore::~KernelCore() {
152 Shutdown(); 158 Shutdown();
153} 159}
154 160
155void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) { 161void KernelCore::Initialize() {
156 impl->Initialize(*this, core_timing); 162 impl->Initialize(*this);
157} 163}
158 164
159void KernelCore::Shutdown() { 165void KernelCore::Shutdown() {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 154bced42..ff17ff865 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,6 +11,10 @@
11template <typename T> 11template <typename T>
12class ResultVal; 12class ResultVal;
13 13
14namespace Core {
15class System;
16}
17
14namespace Core::Timing { 18namespace Core::Timing {
15class CoreTiming; 19class CoreTiming;
16struct EventType; 20struct EventType;
@@ -18,6 +22,7 @@ struct EventType;
18 22
19namespace Kernel { 23namespace Kernel {
20 24
25class AddressArbiter;
21class ClientPort; 26class ClientPort;
22class HandleTable; 27class HandleTable;
23class Process; 28class Process;
@@ -30,7 +35,14 @@ private:
30 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; 35 using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
31 36
32public: 37public:
33 KernelCore(); 38 /// Constructs an instance of the kernel using the given System
39 /// instance as a context for any necessary system-related state,
40 /// such as threads, CPU core state, etc.
41 ///
42 /// @post After execution of the constructor, the provided System
43 /// object *must* outlive the kernel instance itself.
44 ///
45 explicit KernelCore(Core::System& system);
34 ~KernelCore(); 46 ~KernelCore();
35 47
36 KernelCore(const KernelCore&) = delete; 48 KernelCore(const KernelCore&) = delete;
@@ -40,11 +52,7 @@ public:
40 KernelCore& operator=(KernelCore&&) = delete; 52 KernelCore& operator=(KernelCore&&) = delete;
41 53
42 /// Resets the kernel to a clean slate for use. 54 /// Resets the kernel to a clean slate for use.
43 /// 55 void Initialize();
44 /// @param core_timing CoreTiming instance used to create any necessary
45 /// kernel-specific callback events.
46 ///
47 void Initialize(Core::Timing::CoreTiming& core_timing);
48 56
49 /// Clears all resources in use by the kernel instance. 57 /// Clears all resources in use by the kernel instance.
50 void Shutdown(); 58 void Shutdown();
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 8009150e0..7e8ba978c 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -53,9 +53,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
53CodeSet::CodeSet() = default; 53CodeSet::CodeSet() = default;
54CodeSet::~CodeSet() = default; 54CodeSet::~CodeSet() = default;
55 55
56SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) { 56SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
57 SharedPtr<Process> process(new Process(kernel)); 57 auto& kernel = system.Kernel();
58 58
59 SharedPtr<Process> process(new Process(system));
59 process->name = std::move(name); 60 process->name = std::move(name);
60 process->resource_limit = kernel.GetSystemResourceLimit(); 61 process->resource_limit = kernel.GetSystemResourceLimit();
61 process->status = ProcessStatus::Created; 62 process->status = ProcessStatus::Created;
@@ -233,8 +234,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
233 Core::System::GetInstance().ArmInterface(3).ClearInstructionCache(); 234 Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
234} 235}
235 236
236Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} 237Process::Process(Core::System& system) : WaitObject{system.Kernel()}, address_arbiter{system} {}
237Kernel::Process::~Process() {} 238Process::~Process() = default;
238 239
239void Process::Acquire(Thread* thread) { 240void Process::Acquire(Thread* thread) {
240 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); 241 ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dcc57ae9f..2a132c894 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -12,12 +12,17 @@
12#include <vector> 12#include <vector>
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "core/hle/kernel/address_arbiter.h"
15#include "core/hle/kernel/handle_table.h" 16#include "core/hle/kernel/handle_table.h"
16#include "core/hle/kernel/process_capability.h" 17#include "core/hle/kernel/process_capability.h"
17#include "core/hle/kernel/vm_manager.h" 18#include "core/hle/kernel/vm_manager.h"
18#include "core/hle/kernel/wait_object.h" 19#include "core/hle/kernel/wait_object.h"
19#include "core/hle/result.h" 20#include "core/hle/result.h"
20 21
22namespace Core {
23class System;
24}
25
21namespace FileSys { 26namespace FileSys {
22class ProgramMetadata; 27class ProgramMetadata;
23} 28}
@@ -116,7 +121,7 @@ public:
116 121
117 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; 122 static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
118 123
119 static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name); 124 static SharedPtr<Process> Create(Core::System& system, std::string&& name);
120 125
121 std::string GetTypeName() const override { 126 std::string GetTypeName() const override {
122 return "Process"; 127 return "Process";
@@ -150,6 +155,16 @@ public:
150 return handle_table; 155 return handle_table;
151 } 156 }
152 157
158 /// Gets a reference to the process' address arbiter.
159 AddressArbiter& GetAddressArbiter() {
160 return address_arbiter;
161 }
162
163 /// Gets a const reference to the process' address arbiter.
164 const AddressArbiter& GetAddressArbiter() const {
165 return address_arbiter;
166 }
167
153 /// Gets the current status of the process 168 /// Gets the current status of the process
154 ProcessStatus GetStatus() const { 169 ProcessStatus GetStatus() const {
155 return status; 170 return status;
@@ -251,7 +266,7 @@ public:
251 void FreeTLSSlot(VAddr tls_address); 266 void FreeTLSSlot(VAddr tls_address);
252 267
253private: 268private:
254 explicit Process(KernelCore& kernel); 269 explicit Process(Core::System& kernel);
255 ~Process() override; 270 ~Process() override;
256 271
257 /// Checks if the specified thread should wait until this process is available. 272 /// Checks if the specified thread should wait until this process is available.
@@ -309,6 +324,9 @@ private:
309 /// Per-process handle table for storing created object handles in. 324 /// Per-process handle table for storing created object handles in.
310 HandleTable handle_table; 325 HandleTable handle_table;
311 326
327 /// Per-process address arbiter.
328 AddressArbiter address_arbiter;
329
312 /// Random values for svcGetInfo RandomEntropy 330 /// Random values for svcGetInfo RandomEntropy
313 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; 331 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
314 332
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 44f30d070..5fccfd9f4 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {
61 62
62void Scheduler::SwitchContext(Thread* new_thread) { 63void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 64 Thread* const previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 65 Process* const previous_process = system.Kernel().CurrentProcess();
65 66
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 67 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 68
@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
94 95
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 96 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 97 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 98 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); 99 SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
99 } 100 }
100 101
101 cpu_core.LoadContext(new_thread->GetContext()); 102 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 112
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 113void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 114 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); 115 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 116 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 117
117 if (thread != nullptr) { 118 if (thread != nullptr) {
@@ -223,8 +224,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 224 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 225 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 226 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 227 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 228
229 // If scheduler provides a suggested thread 229 // If scheduler provides a suggested thread
230 if (res != nullptr) { 230 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..1c5bf57d9 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..4d8a337a7 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 63 pending_requesting_threads.pop_back();
64} 64}
65 65
66void ServerSession::ClientDisconnected() {
67 // We keep a shared pointer to the hle handler to keep it alive throughout
68 // the call to ClientDisconnected, as ClientDisconnected invalidates the
69 // hle_handler member itself during the course of the function executing.
70 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
71 if (handler) {
72 // Note that after this returns, this server session's hle_handler is
73 // invalidated (set to null).
74 handler->ClientDisconnected(this);
75 }
76
77 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
78 // their WaitSynchronization result to 0xC920181A.
79
80 // Clean up the list of client threads with pending requests, they are unneeded now that the
81 // client endpoint is closed.
82 pending_requesting_threads.clear();
83 currently_handling = nullptr;
84}
85
86void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
87 domain_request_handlers.push_back(std::move(handler));
88}
89
90std::size_t ServerSession::NumDomainRequestHandlers() const {
91 return domain_request_handlers.size();
92}
93
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 94ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 95 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 96 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 97 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 98
71 99 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 100 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 101
74 switch (domain_message_header->command) { 102 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 103 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 104 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 105 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 106 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 107 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 108 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 109 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 110 "to {} needed to return a new interface!",
83 } 111 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 112 UNREACHABLE();
85 113 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 114 }
115 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
96 116
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 117 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
98 static_cast<int>(domain_message_header->command.Value())); 118 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
99 ASSERT(false); 119
120 domain_request_handlers[object_id - 1] = nullptr;
121
122 IPC::ResponseBuilder rb{context, 2};
123 rb.Push(RESULT_SUCCESS);
124 return RESULT_SUCCESS;
125 }
100 } 126 }
101 127
128 LOG_CRITICAL(IPC, "Unknown domain command={}",
129 static_cast<int>(domain_message_header.command.Value()));
130 ASSERT(false);
102 return RESULT_SUCCESS; 131 return RESULT_SUCCESS;
103} 132}
104 133
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..aea4ccfeb 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
46 return HANDLE_TYPE; 46 return HANDLE_TYPE;
47 } 47 }
48 48
49 Session* GetParent() {
50 return parent.get();
51 }
52
53 const Session* GetParent() const {
54 return parent.get();
55 }
56
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 57 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 58
51 /** 59 /**
@@ -78,23 +86,16 @@ public:
78 86
79 void Acquire(Thread* thread) override; 87 void Acquire(Thread* thread) override;
80 88
81 std::string name; ///< The name of this session (optional) 89 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 90 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 91
86 /// This is the list of domain request handlers (after conversion to a domain) 92 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 93 /// this ServerSession instance.
88 94 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
89 /// List of threads that are pending a response after a sync request. This list is processed in
90 /// a LIFO manner, thus, the last request will be dispatched first.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93 95
94 /// Thread whose request is currently being handled. A request is considered "handled" when a 96 /// Retrieves the total number of domain request handlers that have been
95 /// response is sent via svcReplyAndReceive. 97 /// appended to this ServerSession instance.
96 /// TODO(Subv): Find a better name for this. 98 std::size_t NumDomainRequestHandlers() const;
97 SharedPtr<Thread> currently_handling;
98 99
99 /// Returns true if the session has been converted to a domain, otherwise False 100 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 101 bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
129 /// object handle. 130 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 131 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 132
133 /// The parent session, which links to the client endpoint.
134 std::shared_ptr<Session> parent;
135
136 /// This session's HLE request handler (applicable when not a domain)
137 std::shared_ptr<SessionRequestHandler> hle_handler;
138
139 /// This is the list of domain request handlers (after conversion to a domain)
140 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
141
142 /// List of threads that are pending a response after a sync request. This list is processed in
143 /// a LIFO manner, thus, the last request will be dispatched first.
144 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
145 std::vector<SharedPtr<Thread>> pending_requesting_threads;
146
147 /// Thread whose request is currently being handled. A request is considered "handled" when a
148 /// response is sent via svcReplyAndReceive.
149 /// TODO(Subv): Find a better name for this.
150 SharedPtr<Thread> currently_handling;
151
132 /// When set to True, converts the session to a domain at the end of the command 152 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 153 bool convert_to_domain{};
154
155 /// The name of this session (optional)
156 std::string name;
134}; 157};
135 158
136} // namespace Kernel 159} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..62861da36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 33 shared_memory->backing_block_offset = 0;
35 34
36 // Refresh the address mappings for the current process. 35 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 36 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 37 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 38 shared_memory->backing_block.get());
40 } 39 }
41 } else { 40 } else {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c5d399bab..77d0e3d96 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address; 48 return address + size > address;
48} 49}
49 50
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// 8 GiB 51// 8 GiB
68constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; 52constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
69 53
@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 return ERR_INVALID_ADDRESS_STATE; 89 return ERR_INVALID_ADDRESS_STATE;
106 } 90 }
107 91
108 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { 92 if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
109 LOG_ERROR(Kernel_SVC, 93 LOG_ERROR(Kernel_SVC,
110 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 94 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
111 src_addr, size); 95 src_addr, size);
112 return ERR_INVALID_ADDRESS_STATE; 96 return ERR_INVALID_ADDRESS_STATE;
113 } 97 }
114 98
115 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { 99 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
116 LOG_ERROR(Kernel_SVC, 100 LOG_ERROR(Kernel_SVC,
117 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 101 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
118 dst_addr, size); 102 dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
238 auto* const current_process = Core::CurrentProcess(); 222 auto* const current_process = Core::CurrentProcess();
239 auto& vm_manager = current_process->VMManager(); 223 auto& vm_manager = current_process->VMManager();
240 224
241 if (!IsInsideAddressSpace(vm_manager, addr, size)) { 225 if (!vm_manager.IsWithinAddressSpace(addr, size)) {
242 LOG_ERROR(Kernel_SVC, 226 LOG_ERROR(Kernel_SVC,
243 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 227 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
244 size); 228 size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
299 } 283 }
300 284
301 auto& vm_manager = Core::CurrentProcess()->VMManager(); 285 auto& vm_manager = Core::CurrentProcess()->VMManager();
302 if (!IsInsideAddressSpace(vm_manager, address, size)) { 286 if (!vm_manager.IsWithinAddressSpace(address, size)) {
303 LOG_ERROR(Kernel_SVC, 287 LOG_ERROR(Kernel_SVC,
304 "Given address (0x{:016X}) is outside the bounds of the address space.", address); 288 "Given address (0x{:016X}) is outside the bounds of the address space.", address);
305 return ERR_INVALID_ADDRESS_STATE; 289 return ERR_INVALID_ADDRESS_STATE;
@@ -1495,20 +1479,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
1495 return ERR_INVALID_ADDRESS; 1479 return ERR_INVALID_ADDRESS;
1496 } 1480 }
1497 1481
1498 switch (static_cast<AddressArbiter::ArbitrationType>(type)) { 1482 const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
1499 case AddressArbiter::ArbitrationType::WaitIfLessThan: 1483 auto& address_arbiter =
1500 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); 1484 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1501 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: 1485 return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
1502 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
1503 case AddressArbiter::ArbitrationType::WaitIfEqual:
1504 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
1505 default:
1506 LOG_ERROR(Kernel_SVC,
1507 "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
1508 "or WaitIfEqual but got {}",
1509 type);
1510 return ERR_INVALID_ENUM_VALUE;
1511 }
1512} 1486}
1513 1487
1514// Signals to an address (via Address Arbiter) 1488// Signals to an address (via Address Arbiter)
@@ -1526,21 +1500,10 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
1526 return ERR_INVALID_ADDRESS; 1500 return ERR_INVALID_ADDRESS;
1527 } 1501 }
1528 1502
1529 switch (static_cast<AddressArbiter::SignalType>(type)) { 1503 const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
1530 case AddressArbiter::SignalType::Signal: 1504 auto& address_arbiter =
1531 return AddressArbiter::SignalToAddress(address, num_to_wake); 1505 Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
1532 case AddressArbiter::SignalType::IncrementAndSignalIfEqual: 1506 return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
1533 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
1534 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
1535 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
1536 num_to_wake);
1537 default:
1538 LOG_ERROR(Kernel_SVC,
1539 "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
1540 "or ModifyByWaitingCountAndSignalIfEqual but got {}",
1541 type);
1542 return ERR_INVALID_ENUM_VALUE;
1543 }
1544} 1507}
1545 1508
1546/// This returns the total CPU ticks elapsed since the CPU was powered-on 1509/// This returns the total CPU ticks elapsed since the CPU was powered-on
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6661e2130..eb54d6651 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
184 return ERR_INVALID_PROCESSOR_ID; 184 return ERR_INVALID_PROCESSOR_ID;
185 } 185 }
186 186
187 // TODO(yuriks): Other checks, returning 0xD9001BEA
188
189 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { 187 if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
190 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); 188 LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
191 // TODO (bunnei): Find the correct error code to use here 189 // TODO (bunnei): Find the correct error code to use here
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..05c59af34 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -17,8 +17,8 @@
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
20 20namespace {
21static const char* GetMemoryStateName(MemoryState state) { 21const char* GetMemoryStateName(MemoryState state) {
22 static constexpr const char* names[] = { 22 static constexpr const char* names[] = {
23 "Unmapped", "Io", 23 "Unmapped", "Io",
24 "Normal", "CodeStatic", 24 "Normal", "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
35 return names[ToSvcMemoryState(state)]; 35 return names[ToSvcMemoryState(state)];
36} 36}
37 37
38// Checks if a given address range lies within a larger address range.
39constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
40 VAddr address_range_end) {
41 const VAddr end_address = address + size - 1;
42 return address_range_begin <= address && end_address <= address_range_end - 1;
43}
44} // Anonymous namespace
45
38bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 46bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
39 ASSERT(base + size == next.base); 47 ASSERT(base + size == next.base);
40 if (permissions != next.permissions || state != next.state || attribute != next.attribute || 48 if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
249} 257}
250 258
251ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { 259ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
252 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 260 if (!IsWithinHeapRegion(target, size)) {
253 target + size < target) {
254 return ERR_INVALID_ADDRESS; 261 return ERR_INVALID_ADDRESS;
255 } 262 }
256 263
@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
285} 292}
286 293
287ResultCode VMManager::HeapFree(VAddr target, u64 size) { 294ResultCode VMManager::HeapFree(VAddr target, u64 size) {
288 if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || 295 if (!IsWithinHeapRegion(target, size)) {
289 target + size < target) {
290 return ERR_INVALID_ADDRESS; 296 return ERR_INVALID_ADDRESS;
291 } 297 }
292 298
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
706 return address_space_width; 712 return address_space_width;
707} 713}
708 714
715bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
716 return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
717 GetAddressSpaceEndAddress());
718}
719
709VAddr VMManager::GetASLRRegionBaseAddress() const { 720VAddr VMManager::GetASLRRegionBaseAddress() const {
710 return aslr_region_base; 721 return aslr_region_base;
711} 722}
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
750 return code_region_end - code_region_base; 761 return code_region_end - code_region_base;
751} 762}
752 763
764bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
765 return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
766 GetCodeRegionEndAddress());
767}
768
753VAddr VMManager::GetHeapRegionBaseAddress() const { 769VAddr VMManager::GetHeapRegionBaseAddress() const {
754 return heap_region_base; 770 return heap_region_base;
755} 771}
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
762 return heap_region_end - heap_region_base; 778 return heap_region_end - heap_region_base;
763} 779}
764 780
781bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
782 return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
783 GetHeapRegionEndAddress());
784}
785
765VAddr VMManager::GetMapRegionBaseAddress() const { 786VAddr VMManager::GetMapRegionBaseAddress() const {
766 return map_region_base; 787 return map_region_base;
767} 788}
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
774 return map_region_end - map_region_base; 795 return map_region_end - map_region_base;
775} 796}
776 797
798bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
799 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
800}
801
777VAddr VMManager::GetNewMapRegionBaseAddress() const { 802VAddr VMManager::GetNewMapRegionBaseAddress() const {
778 return new_map_region_base; 803 return new_map_region_base;
779} 804}
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
786 return new_map_region_end - new_map_region_base; 811 return new_map_region_end - new_map_region_base;
787} 812}
788 813
814bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
815 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
816 GetNewMapRegionEndAddress());
817}
818
789VAddr VMManager::GetTLSIORegionBaseAddress() const { 819VAddr VMManager::GetTLSIORegionBaseAddress() const {
790 return tls_io_region_base; 820 return tls_io_region_base;
791} 821}
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
798 return tls_io_region_end - tls_io_region_base; 828 return tls_io_region_end - tls_io_region_base;
799} 829}
800 830
831bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
832 return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
833 GetTLSIORegionEndAddress());
834}
835
801} // namespace Kernel 836} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..88e0b3c02 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -432,18 +432,21 @@ public:
432 /// Gets the address space width in bits. 432 /// Gets the address space width in bits.
433 u64 GetAddressSpaceWidth() const; 433 u64 GetAddressSpaceWidth() const;
434 434
435 /// Determines whether or not the given address range lies within the address space.
436 bool IsWithinAddressSpace(VAddr address, u64 size) const;
437
435 /// Gets the base address of the ASLR region. 438 /// Gets the base address of the ASLR region.
436 VAddr GetASLRRegionBaseAddress() const; 439 VAddr GetASLRRegionBaseAddress() const;
437 440
438 /// Gets the end address of the ASLR region. 441 /// Gets the end address of the ASLR region.
439 VAddr GetASLRRegionEndAddress() const; 442 VAddr GetASLRRegionEndAddress() const;
440 443
441 /// Determines whether or not the specified address range is within the ASLR region.
442 bool IsWithinASLRRegion(VAddr address, u64 size) const;
443
444 /// Gets the size of the ASLR region 444 /// Gets the size of the ASLR region
445 u64 GetASLRRegionSize() const; 445 u64 GetASLRRegionSize() const;
446 446
447 /// Determines whether or not the specified address range is within the ASLR region.
448 bool IsWithinASLRRegion(VAddr address, u64 size) const;
449
447 /// Gets the base address of the code region. 450 /// Gets the base address of the code region.
448 VAddr GetCodeRegionBaseAddress() const; 451 VAddr GetCodeRegionBaseAddress() const;
449 452
@@ -453,6 +456,9 @@ public:
453 /// Gets the total size of the code region in bytes. 456 /// Gets the total size of the code region in bytes.
454 u64 GetCodeRegionSize() const; 457 u64 GetCodeRegionSize() const;
455 458
459 /// Determines whether or not the specified range is within the code region.
460 bool IsWithinCodeRegion(VAddr address, u64 size) const;
461
456 /// Gets the base address of the heap region. 462 /// Gets the base address of the heap region.
457 VAddr GetHeapRegionBaseAddress() const; 463 VAddr GetHeapRegionBaseAddress() const;
458 464
@@ -462,6 +468,9 @@ public:
462 /// Gets the total size of the heap region in bytes. 468 /// Gets the total size of the heap region in bytes.
463 u64 GetHeapRegionSize() const; 469 u64 GetHeapRegionSize() const;
464 470
471 /// Determines whether or not the specified range is within the heap region.
472 bool IsWithinHeapRegion(VAddr address, u64 size) const;
473
465 /// Gets the base address of the map region. 474 /// Gets the base address of the map region.
466 VAddr GetMapRegionBaseAddress() const; 475 VAddr GetMapRegionBaseAddress() const;
467 476
@@ -471,6 +480,9 @@ public:
471 /// Gets the total size of the map region in bytes. 480 /// Gets the total size of the map region in bytes.
472 u64 GetMapRegionSize() const; 481 u64 GetMapRegionSize() const;
473 482
483 /// Determines whether or not the specified range is within the map region.
484 bool IsWithinMapRegion(VAddr address, u64 size) const;
485
474 /// Gets the base address of the new map region. 486 /// Gets the base address of the new map region.
475 VAddr GetNewMapRegionBaseAddress() const; 487 VAddr GetNewMapRegionBaseAddress() const;
476 488
@@ -480,6 +492,9 @@ public:
480 /// Gets the total size of the new map region in bytes. 492 /// Gets the total size of the new map region in bytes.
481 u64 GetNewMapRegionSize() const; 493 u64 GetNewMapRegionSize() const;
482 494
495 /// Determines whether or not the given address range is within the new map region
496 bool IsWithinNewMapRegion(VAddr address, u64 size) const;
497
483 /// Gets the base address of the TLS IO region. 498 /// Gets the base address of the TLS IO region.
484 VAddr GetTLSIORegionBaseAddress() const; 499 VAddr GetTLSIORegionBaseAddress() const;
485 500
@@ -489,6 +504,9 @@ public:
489 /// Gets the total size of the TLS IO region in bytes. 504 /// Gets the total size of the TLS IO region in bytes.
490 u64 GetTLSIORegionSize() const; 505 u64 GetTLSIORegionSize() const;
491 506
507 /// Determines if the given address range is within the TLS IO region.
508 bool IsWithinTLSIORegion(VAddr address, u64 size) const;
509
492 /// Each VMManager has its own page table, which is set as the main one when the owning process 510 /// Each VMManager has its own page table, which is set as the main one when the owning process
493 /// is scheduled. 511 /// is scheduled.
494 Memory::PageTable page_table; 512 Memory::PageTable page_table;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..1ed144481 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,7 +8,6 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 6831c0735..21f5e64c7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
18#include "core/hle/kernel/readable_event.h" 18#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/writable_event.h" 19#include "core/hle/kernel/writable_event.h"
20#include "core/hle/service/audio/audout_u.h" 20#include "core/hle/service/audio/audout_u.h"
21#include "core/hle/service/audio/errors.h"
21#include "core/memory.h" 22#include "core/memory.h"
22 23
23namespace Service::Audio { 24namespace Service::Audio {
24 25
25namespace ErrCodes {
26enum {
27 ErrorUnknown = 2,
28 BufferCountExceeded = 8,
29};
30}
31
32constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; 26constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
33constexpr int DefaultSampleRate{48000}; 27constexpr int DefaultSampleRate{48000};
34 28
@@ -100,7 +94,7 @@ private:
100 94
101 if (stream->IsPlaying()) { 95 if (stream->IsPlaying()) {
102 IPC::ResponseBuilder rb{ctx, 2}; 96 IPC::ResponseBuilder rb{ctx, 2};
103 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); 97 rb.Push(ERR_OPERATION_FAILED);
104 return; 98 return;
105 } 99 }
106 100
@@ -113,7 +107,9 @@ private:
113 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
114 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
115 109
116 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
117 113
118 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
143 139
144 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { 140 if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
145 IPC::ResponseBuilder rb{ctx, 2}; 141 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); 142 rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
143 return;
147 } 144 }
148 145
149 IPC::ResponseBuilder rb{ctx, 2}; 146 IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index ea8f9d0bb..c9de10a24 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
17#include "core/hle/kernel/readable_event.h" 17#include "core/hle/kernel/readable_event.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/audio/audren_u.h" 19#include "core/hle/service/audio/audren_u.h"
20#include "core/hle/service/audio/errors.h"
20 21
21namespace Service::Audio { 22namespace Service::Audio {
22 23
@@ -146,7 +147,7 @@ private:
146 // code in this case. 147 // code in this case.
147 148
148 IPC::ResponseBuilder rb{ctx, 2}; 149 IPC::ResponseBuilder rb{ctx, 2};
149 rb.Push(ResultCode{ErrorModule::Audio, 201}); 150 rb.Push(ERR_NOT_SUPPORTED);
150 } 151 }
151 152
152 Kernel::EventPair system_event; 153 Kernel::EventPair system_event;
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Audio {
10
11constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
12constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
13constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
14
15} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 11eba4a12..377e12cfa 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -9,43 +9,32 @@
9 9
10#include <opus.h> 10#include <opus.h>
11 11
12#include "common/common_funcs.h" 12#include "common/assert.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "core/hle/ipc_helpers.h" 14#include "core/hle/ipc_helpers.h"
15#include "core/hle/kernel/hle_ipc.h" 15#include "core/hle/kernel/hle_ipc.h"
16#include "core/hle/service/audio/hwopus.h" 16#include "core/hle/service/audio/hwopus.h"
17 17
18namespace Service::Audio { 18namespace Service::Audio {
19 19namespace {
20struct OpusDeleter { 20struct OpusDeleter {
21 void operator()(void* ptr) const { 21 void operator()(void* ptr) const {
22 operator delete(ptr); 22 operator delete(ptr);
23 } 23 }
24}; 24};
25 25
26class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 26using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
27public:
28 IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
29 u32 channel_count)
30 : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
31 sample_rate(sample_rate), channel_count(channel_count) {
32 // clang-format off
33 static const FunctionInfo functions[] = {
34 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
35 {1, nullptr, "SetContext"},
36 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
37 {3, nullptr, "SetContextForMultiStream"},
38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
39 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
40 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
41 {7, nullptr, "DecodeInterleavedForMultiStream"},
42 };
43 // clang-format on
44 27
45 RegisterHandlers(functions); 28struct OpusPacketHeader {
46 } 29 // Packet size in bytes.
30 u32_be size;
31 // Indicates the final range of the codec's entropy coder.
32 u32_be final_range;
33};
34static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
47 35
48private: 36class OpusDecoderStateBase {
37public:
49 /// Describes extra behavior that may be asked of the decoding context. 38 /// Describes extra behavior that may be asked of the decoding context.
50 enum class ExtraBehavior { 39 enum class ExtraBehavior {
51 /// No extra behavior. 40 /// No extra behavior.
@@ -55,30 +44,36 @@ private:
55 ResetContext, 44 ResetContext,
56 }; 45 };
57 46
58 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { 47 enum class PerfTime {
59 LOG_DEBUG(Audio, "called"); 48 Disabled,
60 49 Enabled,
61 DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); 50 };
62 }
63
64 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
65 LOG_DEBUG(Audio, "called");
66 51
67 u64 performance = 0; 52 virtual ~OpusDecoderStateBase() = default;
68 DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
69 }
70 53
71 void DecodeInterleaved(Kernel::HLERequestContext& ctx) { 54 // Decodes interleaved Opus packets. Optionally allows reporting time taken to
72 LOG_DEBUG(Audio, "called"); 55 // perform the decoding, as well as any relevant extra behavior.
73 56 virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
74 IPC::RequestParser rp{ctx}; 57 ExtraBehavior extra_behavior) = 0;
75 const auto extra_behavior = 58};
76 rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
77 59
78 u64 performance = 0; 60// Represents the decoder state for a non-multistream decoder.
79 DecodeInterleavedHelper(ctx, &performance, extra_behavior); 61class OpusDecoderState final : public OpusDecoderStateBase {
62public:
63 explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
64 : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
65
66 void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
67 ExtraBehavior extra_behavior) override {
68 if (perf_time == PerfTime::Disabled) {
69 DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
70 } else {
71 u64 performance = 0;
72 DecodeInterleavedHelper(ctx, &performance, extra_behavior);
73 }
80 } 74 }
81 75
76private:
82 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, 77 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
83 ExtraBehavior extra_behavior) { 78 ExtraBehavior extra_behavior) {
84 u32 consumed = 0; 79 u32 consumed = 0;
@@ -89,8 +84,7 @@ private:
89 ResetDecoderContext(); 84 ResetDecoderContext();
90 } 85 }
91 86
92 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, 87 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
93 performance)) {
94 LOG_ERROR(Audio, "Failed to decode opus data"); 88 LOG_ERROR(Audio, "Failed to decode opus data");
95 IPC::ResponseBuilder rb{ctx, 2}; 89 IPC::ResponseBuilder rb{ctx, 2};
96 // TODO(ogniK): Use correct error code 90 // TODO(ogniK): Use correct error code
@@ -109,27 +103,27 @@ private:
109 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 103 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
110 } 104 }
111 105
112 bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, 106 bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
113 std::vector<opus_int16>& output, u64* out_performance_time) { 107 std::vector<opus_int16>& output, u64* out_performance_time) const {
114 const auto start_time = std::chrono::high_resolution_clock::now(); 108 const auto start_time = std::chrono::high_resolution_clock::now();
115 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 109 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
116 if (sizeof(OpusHeader) > input.size()) { 110 if (sizeof(OpusPacketHeader) > input.size()) {
117 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", 111 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
118 sizeof(OpusHeader), input.size()); 112 sizeof(OpusPacketHeader), input.size());
119 return false; 113 return false;
120 } 114 }
121 115
122 OpusHeader hdr{}; 116 OpusPacketHeader hdr{};
123 std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); 117 std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
124 if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { 118 if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
125 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", 119 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
126 sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); 120 sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
127 return false; 121 return false;
128 } 122 }
129 123
130 const auto frame = input.data() + sizeof(OpusHeader); 124 const auto frame = input.data() + sizeof(OpusPacketHeader);
131 const auto decoded_sample_count = opus_packet_get_nb_samples( 125 const auto decoded_sample_count = opus_packet_get_nb_samples(
132 frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), 126 frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
133 static_cast<opus_int32>(sample_rate)); 127 static_cast<opus_int32>(sample_rate));
134 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { 128 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
135 LOG_ERROR( 129 LOG_ERROR(
@@ -141,18 +135,18 @@ private:
141 135
142 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); 136 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
143 const auto out_sample_count = 137 const auto out_sample_count =
144 opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); 138 opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
145 if (out_sample_count < 0) { 139 if (out_sample_count < 0) {
146 LOG_ERROR(Audio, 140 LOG_ERROR(Audio,
147 "Incorrect sample count received from opus_decode, " 141 "Incorrect sample count received from opus_decode, "
148 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", 142 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
149 out_sample_count, frame_size, static_cast<u32>(hdr.sz)); 143 out_sample_count, frame_size, static_cast<u32>(hdr.size));
150 return false; 144 return false;
151 } 145 }
152 146
153 const auto end_time = std::chrono::high_resolution_clock::now() - start_time; 147 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
154 sample_count = out_sample_count; 148 sample_count = out_sample_count;
155 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 149 consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
156 if (out_performance_time != nullptr) { 150 if (out_performance_time != nullptr) {
157 *out_performance_time = 151 *out_performance_time =
158 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); 152 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -167,21 +161,66 @@ private:
167 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); 161 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
168 } 162 }
169 163
170 struct OpusHeader { 164 OpusDecoderPtr decoder;
171 u32_be sz; // Needs to be BE for some odd reason
172 INSERT_PADDING_WORDS(1);
173 };
174 static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
175
176 std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
177 u32 sample_rate; 165 u32 sample_rate;
178 u32 channel_count; 166 u32 channel_count;
179}; 167};
180 168
181static std::size_t WorkerBufferSize(u32 channel_count) { 169class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
170public:
171 explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
172 : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
173 // clang-format off
174 static const FunctionInfo functions[] = {
175 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
176 {1, nullptr, "SetContext"},
177 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
178 {3, nullptr, "SetContextForMultiStream"},
179 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
180 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
181 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
182 {7, nullptr, "DecodeInterleavedForMultiStream"},
183 };
184 // clang-format on
185
186 RegisterHandlers(functions);
187 }
188
189private:
190 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
191 LOG_DEBUG(Audio, "called");
192
193 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
194 OpusDecoderStateBase::ExtraBehavior::None);
195 }
196
197 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
198 LOG_DEBUG(Audio, "called");
199
200 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
201 OpusDecoderStateBase::ExtraBehavior::None);
202 }
203
204 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
205 LOG_DEBUG(Audio, "called");
206
207 IPC::RequestParser rp{ctx};
208 const auto extra_behavior = rp.Pop<bool>()
209 ? OpusDecoderStateBase::ExtraBehavior::ResetContext
210 : OpusDecoderStateBase::ExtraBehavior::None;
211
212 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
213 extra_behavior);
214 }
215
216 std::unique_ptr<OpusDecoderStateBase> decoder_state;
217};
218
219std::size_t WorkerBufferSize(u32 channel_count) {
182 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 220 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
183 return opus_decoder_get_size(static_cast<int>(channel_count)); 221 return opus_decoder_get_size(static_cast<int>(channel_count));
184} 222}
223} // Anonymous namespace
185 224
186void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { 225void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
187 IPC::RequestParser rp{ctx}; 226 IPC::RequestParser rp{ctx};
@@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
220 const std::size_t worker_sz = WorkerBufferSize(channel_count); 259 const std::size_t worker_sz = WorkerBufferSize(channel_count);
221 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); 260 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
222 261
223 std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ 262 OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
224 static_cast<OpusDecoder*>(operator new(worker_sz))};
225 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { 263 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
226 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); 264 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
227 IPC::ResponseBuilder rb{ctx, 2}; 265 IPC::ResponseBuilder rb{ctx, 2};
@@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
232 270
233 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 271 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
234 rb.Push(RESULT_SUCCESS); 272 rb.Push(RESULT_SUCCESS);
235 rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, 273 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
236 channel_count); 274 std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
237} 275}
238 276
239HwOpus::HwOpus() : ServiceFramework("hwopus") { 277HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index dbe7ee6e8..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..a34b9e753 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
178 auto& gpu = system_instance.GPU(); 178 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); 179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr); 180 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); 181 gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
182 182
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184 184
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 56f31e2ac..fc496b654 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {
186 186
187 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
188 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
189 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
190 continue; 190 continue;
191 } 191 }
192 192
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 117f87a45..576fd6407 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -76,7 +76,8 @@ namespace Service {
76 * Creates a function string for logging, complete with the name (or header code, depending 76 * Creates a function string for logging, complete with the name (or header code, depending
77 * on what's passed in) the port name, and all the cmd_buff arguments. 77 * on what's passed in) the port name, and all the cmd_buff arguments.
78 */ 78 */
79[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name, 79[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
80 std::string_view port_name,
80 const u32* cmd_buff) { 81 const u32* cmd_buff) {
81 // Number of params == bits 0-5 + bits 6-11 82 // Number of params == bits 0-5 + bits 6-11
82 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); 83 int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +159,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
158 return ReportUnimplementedFunction(ctx, info); 159 return ReportUnimplementedFunction(ctx, info);
159 } 160 }
160 161
161 LOG_TRACE( 162 LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
162 Service, "{}",
163 MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
164 handler_invoker(this, info->handler_callback, ctx); 163 handler_invoker(this, info->handler_callback, ctx);
165} 164}
166 165
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index a975767bb..566cd6006 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
24#include "core/hle/service/nvdrv/nvdrv.h" 24#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 25#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 26#include "core/hle/service/nvflinger/nvflinger.h"
27#include "core/hle/service/service.h"
27#include "core/hle/service/vi/vi.h" 28#include "core/hle/service/vi/vi.h"
28#include "core/hle/service/vi/vi_m.h" 29#include "core/hle/service/vi/vi_m.h"
29#include "core/hle/service/vi/vi_s.h" 30#include "core/hle/service/vi/vi_s.h"
@@ -33,6 +34,7 @@
33namespace Service::VI { 34namespace Service::VI {
34 35
35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; 36constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
37constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; 38constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
37constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7}; 39constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
38 40
@@ -1203,26 +1205,40 @@ IApplicationDisplayService::IApplicationDisplayService(
1203 RegisterHandlers(functions); 1205 RegisterHandlers(functions);
1204} 1206}
1205 1207
1206Module::Interface::Interface(std::shared_ptr<Module> module, const char* name, 1208static bool IsValidServiceAccess(Permission permission, Policy policy) {
1207 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 1209 if (permission == Permission::User) {
1208 : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {} 1210 return policy == Policy::User;
1211 }
1212
1213 if (permission == Permission::System || permission == Permission::Manager) {
1214 return policy == Policy::User || policy == Policy::Compositor;
1215 }
1209 1216
1210Module::Interface::~Interface() = default; 1217 return false;
1218}
1211 1219
1212void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) { 1220void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
1213 LOG_WARNING(Service_VI, "(STUBBED) called"); 1221 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
1222 Permission permission) {
1223 IPC::RequestParser rp{ctx};
1224 const auto policy = rp.PopEnum<Policy>();
1225
1226 if (!IsValidServiceAccess(permission, policy)) {
1227 IPC::ResponseBuilder rb{ctx, 2};
1228 rb.Push(ERR_PERMISSION_DENIED);
1229 return;
1230 }
1214 1231
1215 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 1232 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1216 rb.Push(RESULT_SUCCESS); 1233 rb.Push(RESULT_SUCCESS);
1217 rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger); 1234 rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
1218} 1235}
1219 1236
1220void InstallInterfaces(SM::ServiceManager& service_manager, 1237void InstallInterfaces(SM::ServiceManager& service_manager,
1221 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { 1238 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
1222 auto module = std::make_shared<Module>(); 1239 std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
1223 std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager); 1240 std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
1224 std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager); 1241 std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
1225 std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
1226} 1242}
1227 1243
1228} // namespace Service::VI 1244} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index e3963502a..6b66f8b81 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/service.h" 7#include <memory>
8#include "common/common_types.h"
9
10namespace Kernel {
11class HLERequestContext;
12}
8 13
9namespace Service::NVFlinger { 14namespace Service::NVFlinger {
10class NVFlinger; 15class NVFlinger;
11} 16}
12 17
18namespace Service::SM {
19class ServiceManager;
20}
21
13namespace Service::VI { 22namespace Service::VI {
14 23
15enum class DisplayResolution : u32 { 24enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
19 UndockedHeight = 720, 28 UndockedHeight = 720,
20}; 29};
21 30
22class Module final { 31/// Permission level for a particular VI service instance
23public: 32enum class Permission {
24 class Interface : public ServiceFramework<Interface> { 33 User,
25 public: 34 System,
26 explicit Interface(std::shared_ptr<Module> module, const char* name, 35 Manager,
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 36};
28 ~Interface() override;
29
30 void GetDisplayService(Kernel::HLERequestContext& ctx);
31 37
32 protected: 38/// A policy type that may be requested via GetDisplayService and
33 std::shared_ptr<Module> module; 39/// GetDisplayServiceWithProxyNameExchange
34 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 40enum class Policy {
35 }; 41 User,
42 Compositor,
36}; 43};
37 44
45namespace detail {
46void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
47 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
48} // namespace detail
49
38/// Registers all VI services with the specified service manager. 50/// Registers all VI services with the specified service manager.
39void InstallInterfaces(SM::ServiceManager& service_manager, 51void InstallInterfaces(SM::ServiceManager& service_manager,
40 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 52 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp
index 207c06b16..06070087f 100644
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_m.h" 7#include "core/hle/service/vi/vi_m.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {2, &VI_M::GetDisplayService, "GetDisplayService"}, 14 {2, &VI_M::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_M::~VI_M() = default; 20VI_M::~VI_M() = default;
19 21
22void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h
index 487d58d50..290e06689 100644
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_M final : public Module::Interface { 19class VI_M final : public ServiceFramework<VI_M> {
12public: 20public:
13 explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_M() override; 22 ~VI_M() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp
index 920e6a1f6..57c596cc4 100644
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_s.h" 7#include "core/hle/service/vi/vi_s.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {1, &VI_S::GetDisplayService, "GetDisplayService"}, 14 {1, &VI_S::GetDisplayService, "GetDisplayService"},
13 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, 15 {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
17 19
18VI_S::~VI_S() = default; 20VI_S::~VI_S() = default;
19 21
22void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
23 LOG_DEBUG(Service_VI, "called");
24
25 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
26}
27
20} // namespace Service::VI 28} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h
index bbc31148f..47804dc0b 100644
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_S final : public Module::Interface { 19class VI_S final : public ServiceFramework<VI_S> {
12public: 20public:
13 explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_S() override; 22 ~VI_S() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp
index d81e410d6..9d5ceb608 100644
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/service/vi/vi.h"
5#include "core/hle/service/vi/vi_u.h" 7#include "core/hle/service/vi/vi_u.h"
6 8
7namespace Service::VI { 9namespace Service::VI {
8 10
9VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) 11VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
10 : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) { 12 : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
11 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
12 {0, &VI_U::GetDisplayService, "GetDisplayService"}, 14 {0, &VI_U::GetDisplayService, "GetDisplayService"},
13 }; 15 };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
16 18
17VI_U::~VI_U() = default; 19VI_U::~VI_U() = default;
18 20
21void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
22 LOG_DEBUG(Service_VI, "called");
23
24 detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
25}
26
19} // namespace Service::VI 27} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h
index b92f28c92..19bdb73b0 100644
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/vi/vi.h" 7#include "core/hle/service/service.h"
8
9namespace Kernel {
10class HLERequestContext;
11}
12
13namespace Service::NVFlinger {
14class NVFlinger;
15}
8 16
9namespace Service::VI { 17namespace Service::VI {
10 18
11class VI_U final : public Module::Interface { 19class VI_U final : public ServiceFramework<VI_U> {
12public: 20public:
13 explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); 21 explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
14 ~VI_U() override; 22 ~VI_U() override;
23
24private:
25 void GetDisplayService(Kernel::HLERequestContext& ctx);
26
27 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
15}; 28};
16 29
17} // namespace Service::VI 30} // namespace Service::VI
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f809567b6..6591c45d2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -171,9 +171,6 @@ T Read(const VAddr vaddr) {
171 return value; 171 return value;
172 } 172 }
173 173
174 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
175 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
176
177 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 174 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
178 switch (type) { 175 switch (type) {
179 case PageType::Unmapped: 176 case PageType::Unmapped:
@@ -204,9 +201,6 @@ void Write(const VAddr vaddr, const T data) {
204 return; 201 return;
205 } 202 }
206 203
207 // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
208 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
209
210 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 204 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
211 switch (type) { 205 switch (type) {
212 case PageType::Unmapped: 206 case PageType::Unmapped:
@@ -362,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
362 const VAddr overlap_end = std::min(end, region_end); 356 const VAddr overlap_end = std::min(end, region_end);
363 const VAddr overlap_size = overlap_end - overlap_start; 357 const VAddr overlap_size = overlap_end - overlap_start;
364 358
365 auto& rasterizer = system_instance.Renderer().Rasterizer(); 359 auto& gpu = system_instance.GPU();
366 switch (mode) { 360 switch (mode) {
367 case FlushMode::Flush: 361 case FlushMode::Flush:
368 rasterizer.FlushRegion(overlap_start, overlap_size); 362 gpu.FlushRegion(overlap_start, overlap_size);
369 break; 363 break;
370 case FlushMode::Invalidate: 364 case FlushMode::Invalidate:
371 rasterizer.InvalidateRegion(overlap_start, overlap_size); 365 gpu.InvalidateRegion(overlap_start, overlap_size);
372 break; 366 break;
373 case FlushMode::FlushAndInvalidate: 367 case FlushMode::FlushAndInvalidate:
374 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); 368 gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
375 break; 369 break;
376 } 370 }
377 }; 371 };
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6dd3139cc 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 98 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 99 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 100 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index 7e76e0466..cdfb2f742 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
393 u16 frame_limit; 393 u16 frame_limit;
394 bool use_disk_shader_cache; 394 bool use_disk_shader_cache;
395 bool use_accurate_gpu_emulation; 395 bool use_accurate_gpu_emulation;
396 bool use_asynchronous_gpu_emulation;
396 397
397 float bg_red; 398 float bg_red;
398 float bg_green; 399 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 58dfcc4df..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
162 Settings::values.use_disk_shader_cache); 162 Settings::values.use_disk_shader_cache);
163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
164 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
166 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
167} 169}
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..6fe56833d 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -13,11 +13,11 @@
13namespace ArmTests { 13namespace ArmTests {
14 14
15TestEnvironment::TestEnvironment(bool mutable_memory_) 15TestEnvironment::TestEnvironment(bool mutable_memory_)
16 : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { 16 : mutable_memory(mutable_memory_),
17 17 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
18 auto process = Kernel::Process::Create(kernel, ""); 18 auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
19 kernel.MakeCurrentProcess(process.get()); 19 kernel.MakeCurrentProcess(process.get());
20 page_table = &Core::CurrentProcess()->VMManager().page_table; 20 page_table = &process->VMManager().page_table;
21 21
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 23 page_table->special_regions.clear();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3e9d2b3be..0c3038c52 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -74,6 +80,7 @@ add_library(video_core STATIC
74 shader/decode/hfma2.cpp 80 shader/decode/hfma2.cpp
75 shader/decode/conversion.cpp 81 shader/decode/conversion.cpp
76 shader/decode/memory.cpp 82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
77 shader/decode/float_set_predicate.cpp 84 shader/decode/float_set_predicate.cpp
78 shader/decode/integer_set_predicate.cpp 85 shader/decode/integer_set_predicate.cpp
79 shader/decode/half_set_predicate.cpp 86 shader/decode/half_set_predicate.cpp
@@ -94,6 +101,8 @@ add_library(video_core STATIC
94 surface.h 101 surface.h
95 textures/astc.cpp 102 textures/astc.cpp
96 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
97 textures/decoders.cpp 106 textures/decoders.cpp
98 textures/decoders.h 107 textures/decoders.h
99 textures/texture.h 108 textures/texture.h
@@ -104,6 +113,8 @@ add_library(video_core STATIC
104if (ENABLE_VULKAN) 113if (ENABLE_VULKAN)
105 target_sources(video_core PRIVATE 114 target_sources(video_core PRIVATE
106 renderer_vulkan/declarations.h 115 renderer_vulkan/declarations.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
107 renderer_vulkan/vk_buffer_cache.cpp 118 renderer_vulkan/vk_buffer_cache.cpp
108 renderer_vulkan/vk_buffer_cache.h 119 renderer_vulkan/vk_buffer_cache.h
109 renderer_vulkan/vk_device.cpp 120 renderer_vulkan/vk_device.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 669541b4b..bff1a37ff 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
39 } 39 }
40 40
41 const CommandList& command_list{dma_pushbuffer.front()}; 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 42 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr; 43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); 44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main; 45 bool non_main = command_list_header.is_non_main;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 540dcc52c..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "common/assert.h"
6#include "core/memory.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
7#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10#include "video_core/textures/decoders.h"
11 10
12namespace Tegra::Engines { 11namespace Tegra::Engines {
13 12
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index c69f74cc5..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4ca856b6b..b1d950460 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "core/core.h"
7#include "core/memory.h"
8#include "video_core/engines/kepler_compute.h" 7#include "video_core/engines/kepler_compute.h"
9#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
10 9
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index df0a32e0f..6575afd0f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include "common/assert.h" 8#include <cstddef>
9#include "common/bit_field.h"
10#include "common/common_funcs.h" 9#include "common/common_funcs.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4f6126116..aae2a4019 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
48 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 48 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
49 // We do this before actually writing the new data because the destination address might contain 49 // We do this before actually writing the new data because the destination address might contain
50 // a dirty surface that will have to be written back to memory. 50 // a dirty surface that will have to be written back to memory.
51 rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); 51 Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
52 52
53 Memory::Write32(*dest_address, data); 53 Memory::Write32(*dest_address, data);
54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f680c2ad9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstddef>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 529a14ec7..9dfea5999 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h" 8#include "core/memory.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
@@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
91 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 92 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
92 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 93 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
93 // copying. 94 // copying.
94 rasterizer.FlushRegion(*source_cpu, src_size); 95 Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
95 96
96 // We have to invalidate the destination region to evict any outdated surfaces from the 97 // We have to invalidate the destination region to evict any outdated surfaces from the
97 // cache. We do this before actually writing the new data because the destination address 98 // cache. We do this before actually writing the new data because the destination address
98 // might contain a dirty surface that will have to be written back to memory. 99 // might contain a dirty surface that will have to be written back to memory.
99 rasterizer.InvalidateRegion(*dest_cpu, dst_size); 100 Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
100 }; 101 };
101 102
102 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 103 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index cf75aeb12..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstddef>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 252592edd..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
6 6
7#include <bitset> 7#include <bitset>
8#include <optional> 8#include <optional>
9#include <string>
10#include <tuple> 9#include <tuple>
11#include <vector> 10#include <vector>
12 11
@@ -325,11 +324,11 @@ enum class TextureQueryType : u64 {
325 324
326enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
327 None = 0, 326 None = 0,
328 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
329 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
330 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
331 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
332 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
333}; 332};
334 333
335enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -1446,6 +1445,7 @@ public:
1446 Flow, 1445 Flow,
1447 Synch, 1446 Synch,
1448 Memory, 1447 Memory,
1448 Texture,
1449 FloatSet, 1449 FloatSet,
1450 FloatSetPredicate, 1450 FloatSetPredicate,
1451 IntegerSet, 1451 IntegerSet,
@@ -1576,14 +1576,14 @@ private:
1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1579 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1579 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1580 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1580 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1581 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1581 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1582 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1582 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1583 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1583 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1584 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1584 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1585 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1585 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1586 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1586 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ac30d1a89..08abf8ac9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
14#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 15#include "video_core/renderer_base.h"
16 16
17namespace Tegra { 17namespace Tegra {
18 18
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
28 UNREACHABLE(); 28 UNREACHABLE();
29} 29}
30 30
31GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { 31GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
32 auto& rasterizer{renderer.Rasterizer()};
32 memory_manager = std::make_unique<Tegra::MemoryManager>(); 33 memory_manager = std::make_unique<Tegra::MemoryManager>();
33 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 34 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
34 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 35 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 6313702f2..56a203275 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,8 +16,8 @@ class System;
16} 16}
17 17
18namespace VideoCore { 18namespace VideoCore {
19class RasterizerInterface; 19class RendererBase;
20} 20} // namespace VideoCore
21 21
22namespace Tegra { 22namespace Tegra {
23 23
@@ -119,10 +119,11 @@ enum class EngineID {
119 MAXWELL_DMA_COPY_A = 0xB0B5, 119 MAXWELL_DMA_COPY_A = 0xB0B5,
120}; 120};
121 121
122class GPU final { 122class GPU {
123public: 123public:
124 explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); 124 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
125 ~GPU(); 125
126 virtual ~GPU();
126 127
127 struct MethodCall { 128 struct MethodCall {
128 u32 method{}; 129 u32 method{};
@@ -200,8 +201,42 @@ public:
200 }; 201 };
201 } regs{}; 202 } regs{};
202 203
204 /// Push GPU command entries to be processed
205 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
206
207 /// Swap buffers (render frame)
208 virtual void SwapBuffers(
209 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
210
211 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
212 virtual void FlushRegion(VAddr addr, u64 size) = 0;
213
214 /// Notify rasterizer that any caches of the specified region should be invalidated
215 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
216
217 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
218 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
219
203private: 220private:
221 void ProcessBindMethod(const MethodCall& method_call);
222 void ProcessSemaphoreTriggerMethod();
223 void ProcessSemaphoreRelease();
224 void ProcessSemaphoreAcquire();
225
226 /// Calls a GPU puller method.
227 void CallPullerMethod(const MethodCall& method_call);
228
229 /// Calls a GPU engine method.
230 void CallEngineMethod(const MethodCall& method_call);
231
232 /// Determines where the method should be executed.
233 bool ExecuteMethodOnEngine(const MethodCall& method_call);
234
235protected:
204 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 236 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
237 VideoCore::RendererBase& renderer;
238
239private:
205 std::unique_ptr<Tegra::MemoryManager> memory_manager; 240 std::unique_ptr<Tegra::MemoryManager> memory_manager;
206 241
207 /// Mapping of command subchannels to their bound engine ids. 242 /// Mapping of command subchannels to their bound engine ids.
@@ -217,18 +252,6 @@ private:
217 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 252 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
218 /// Inline memory engine 253 /// Inline memory engine
219 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 254 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
220
221 void ProcessBindMethod(const MethodCall& method_call);
222 void ProcessSemaphoreTriggerMethod();
223 void ProcessSemaphoreRelease();
224 void ProcessSemaphoreAcquire();
225
226 // Calls a GPU puller method.
227 void CallPullerMethod(const MethodCall& method_call);
228 // Calls a GPU engine method.
229 void CallEngineMethod(const MethodCall& method_call);
230 // Determines where the method should be executed.
231 bool ExecuteMethodOnEngine(const MethodCall& method_call);
232}; 255};
233 256
234#define ASSERT_REG_POSITION(field_name, position) \ 257#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..ad0a747e3
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..e6a807aba
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(VAddr addr, u64 size) override;
30 void InvalidateRegion(VAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..4c00b96c7
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(VAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..7d5a241ff
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(VAddr addr, u64 size) override;
25 void InvalidateRegion(VAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..c5bdd2a17
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,152 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h"
8#include "core/settings.h"
9#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h"
12#include "video_core/renderer_base.h"
13
14namespace VideoCommon::GPUThread {
15
16/// Executes a single GPU thread command
17static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
18 Tegra::DmaPusher& dma_pusher) {
19 if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
20 dma_pusher.Push(std::move(submit_list->entries));
21 dma_pusher.DispatchCalls();
22 } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
23 renderer.SwapBuffers(data->framebuffer);
24 } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
25 renderer.Rasterizer().FlushRegion(data->addr, data->size);
26 } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
27 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
28 } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
29 renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
30 } else {
31 UNREACHABLE();
32 }
33}
34
35/// Runs the GPU thread
36static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
37 SynchState& state) {
38
39 MicroProfileOnThreadCreate("GpuThread");
40
41 auto WaitForWakeup = [&]() {
42 std::unique_lock<std::mutex> lock{state.signal_mutex};
43 state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
44 };
45
46 // Wait for first GPU command before acquiring the window context
47 WaitForWakeup();
48
49 // If emulation was stopped during disk shader loading, abort before trying to acquire context
50 if (!state.is_running) {
51 return;
52 }
53
54 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
55
56 while (state.is_running) {
57 if (!state.is_running) {
58 return;
59 }
60
61 {
62 // Thread has been woken up, so make the previous write queue the next read queue
63 std::lock_guard<std::mutex> lock{state.signal_mutex};
64 std::swap(state.push_queue, state.pop_queue);
65 }
66
67 // Execute all of the GPU commands
68 while (!state.pop_queue->empty()) {
69 ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
70 state.pop_queue->pop();
71 }
72
73 state.UpdateIdleState();
74
75 // Signal that the GPU thread has finished processing commands
76 if (state.is_idle) {
77 state.idle_condition.notify_one();
78 }
79
80 // Wait for CPU thread to send more GPU commands
81 WaitForWakeup();
82 }
83}
84
85ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
86 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
87 std::ref(dma_pusher), std::ref(state)},
88 thread_id{thread.get_id()} {}
89
90ThreadManager::~ThreadManager() {
91 {
92 // Notify GPU thread that a shutdown is pending
93 std::lock_guard<std::mutex> lock{state.signal_mutex};
94 state.is_running = false;
95 }
96
97 state.signal_condition.notify_one();
98 thread.join();
99}
100
101void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
102 if (entries.empty()) {
103 return;
104 }
105
106 PushCommand(SubmitListCommand(std::move(entries)), false, false);
107}
108
109void ThreadManager::SwapBuffers(
110 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
111 PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
112}
113
114void ThreadManager::FlushRegion(VAddr addr, u64 size) {
115 // Block the CPU when using accurate emulation
116 PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
117}
118
119void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
120 PushCommand(InvalidateRegionCommand(addr, size), true, true);
121}
122
123void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
124 InvalidateRegion(addr, size);
125}
126
127void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
128 {
129 std::lock_guard<std::mutex> lock{state.signal_mutex};
130
131 if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
132 // Execute the command synchronously on the current thread
133 ExecuteCommand(&command_data, renderer, dma_pusher);
134 return;
135 }
136
137 // Push the command to the GPU thread
138 state.UpdateIdleState();
139 state.push_queue->emplace(command_data);
140 }
141
142 // Signal the GPU thread that commands are pending
143 state.signal_condition.notify_one();
144
145 if (wait_for_idle) {
146 // Wait for the GPU to be idle (all commands to be executed)
147 std::unique_lock<std::mutex> lock{state.idle_mutex};
148 state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
149 }
150}
151
152} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..edb148b14
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,133 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <thread>
14#include <variant>
15
16namespace Tegra {
17struct FramebufferConfig;
18class DmaPusher;
19} // namespace Tegra
20
21namespace VideoCore {
22class RendererBase;
23} // namespace VideoCore
24
25namespace VideoCommon::GPUThread {
26
27/// Command to signal to the GPU thread that a command list is ready for processing
28struct SubmitListCommand final {
29 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
30
31 Tegra::CommandList entries;
32};
33
34/// Command to signal to the GPU thread that a swap buffers is pending
35struct SwapBuffersCommand final {
36 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
37 : framebuffer{std::move(framebuffer)} {}
38
39 std::optional<const Tegra::FramebufferConfig> framebuffer;
40};
41
42/// Command to signal to the GPU thread to flush a region
43struct FlushRegionCommand final {
44 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
45
46 const VAddr addr;
47 const u64 size;
48};
49
50/// Command to signal to the GPU thread to invalidate a region
51struct InvalidateRegionCommand final {
52 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
53
54 const VAddr addr;
55 const u64 size;
56};
57
58/// Command to signal to the GPU thread to flush and invalidate a region
59struct FlushAndInvalidateRegionCommand final {
60 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
61 : addr{addr}, size{size} {}
62
63 const VAddr addr;
64 const u64 size;
65};
66
67using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
68 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
69
70/// Struct used to synchronize the GPU thread
71struct SynchState final {
72 std::atomic<bool> is_running{true};
73 std::atomic<bool> is_idle{true};
74 std::condition_variable signal_condition;
75 std::mutex signal_mutex;
76 std::condition_variable idle_condition;
77 std::mutex idle_mutex;
78
79 // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
80 // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
81 // empty. This allows for efficient thread-safe access, as it does not require any copies.
82
83 using CommandQueue = std::queue<CommandData>;
84 std::array<CommandQueue, 2> command_queues;
85 CommandQueue* push_queue{&command_queues[0]};
86 CommandQueue* pop_queue{&command_queues[1]};
87
88 void UpdateIdleState() {
89 std::lock_guard<std::mutex> lock{idle_mutex};
90 is_idle = command_queues[0].empty() && command_queues[1].empty();
91 }
92};
93
94/// Class used to manage the GPU thread
95class ThreadManager final {
96public:
97 explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
98 ~ThreadManager();
99
100 /// Push GPU command entries to be processed
101 void SubmitList(Tegra::CommandList&& entries);
102
103 /// Swap buffers (render frame)
104 void SwapBuffers(
105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
106
107 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
108 void FlushRegion(VAddr addr, u64 size);
109
110 /// Notify rasterizer that any caches of the specified region should be invalidated
111 void InvalidateRegion(VAddr addr, u64 size);
112
113 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
114 void FlushAndInvalidateRegion(VAddr addr, u64 size);
115
116private:
117 /// Pushes a command to be executed by the GPU thread
118 void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
119
120 /// Returns true if this is called by the GPU thread
121 bool IsGpuThread() const {
122 return std::this_thread::get_id() == thread_id;
123 }
124
125private:
126 SynchState state;
127 VideoCore::RendererBase& renderer;
128 Tegra::DmaPusher& dma_pusher;
129 std::thread thread;
130 std::thread::id thread_id;
131};
132
133} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/emu_window.h" 6#include "core/frontend/emu_window.h"
6#include "core/settings.h" 7#include "core/settings.h"
7#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c8c1d6911..824863561 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
118 118
119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
120 120
121 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 121 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
122 CheckExtensions(); 122 CheckExtensions();
123} 123}
124 124
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
177 continue; 177 continue;
178 178
179 const auto& buffer = regs.vertex_array[attrib.buffer]; 179 const auto& buffer = regs.vertex_array[attrib.buffer];
180 LOG_TRACE(HW_GPU, 180 LOG_TRACE(Render_OpenGL,
181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
183 attrib.offset.Value(), attrib.IsNormalized()); 183 attrib.offset.Value(), attrib.IsNormalized());
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 shader_program_manager->UseProgrammableFragmentShader(program_handle); 343 shader_program_manager->UseProgrammableFragmentShader(program_handle);
344 break; 344 break;
345 default: 345 default:
346 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 346 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
347 shader_config.enable.Value(), shader_config.offset); 347 shader_config.enable.Value(), shader_config.offset);
348 UNREACHABLE();
349 } 348 }
350 349
351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 350 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -739,33 +738,17 @@ void RasterizerOpenGL::DrawArrays() {
739 state.Apply(); 738 state.Apply();
740 739
741 res_cache.SignalPreDrawCall(); 740 res_cache.SignalPreDrawCall();
742
743 // Execute draw call
744 params.DispatchDraw(); 741 params.DispatchDraw();
745
746 res_cache.SignalPostDrawCall(); 742 res_cache.SignalPostDrawCall();
747 743
748 // Disable scissor test
749 state.viewports[0].scissor.enabled = false;
750
751 accelerate_draw = AccelDraw::Disabled; 744 accelerate_draw = AccelDraw::Disabled;
752
753 // Unbind textures for potential future use as framebuffer attachments
754 for (auto& texture_unit : state.texture_units) {
755 texture_unit.Unbind();
756 }
757 state.Apply();
758} 745}
759 746
760void RasterizerOpenGL::FlushAll() {} 747void RasterizerOpenGL::FlushAll() {}
761 748
762void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 749void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
763 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 750 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
764 751 res_cache.FlushRegion(addr, size);
765 if (Settings::values.use_accurate_gpu_emulation) {
766 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
767 res_cache.FlushRegion(addr, size);
768 }
769} 752}
770 753
771void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 754void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -809,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
809 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 792 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
810 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 793 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
811 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 794 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
812 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 795
796 if (params.pixel_format != pixel_format) {
797 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
798 }
813 799
814 screen_info.display_texture = surface->Texture().handle; 800 screen_info.display_texture = surface->Texture().handle;
815 801
@@ -818,104 +804,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
818 804
819void RasterizerOpenGL::SamplerInfo::Create() { 805void RasterizerOpenGL::SamplerInfo::Create() {
820 sampler.Create(); 806 sampler.Create();
821 mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; 807 mag_filter = Tegra::Texture::TextureFilter::Linear;
822 wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; 808 min_filter = Tegra::Texture::TextureFilter::Linear;
823 uses_depth_compare = false; 809 wrap_u = Tegra::Texture::WrapMode::Wrap;
810 wrap_v = Tegra::Texture::WrapMode::Wrap;
811 wrap_p = Tegra::Texture::WrapMode::Wrap;
812 use_depth_compare = false;
824 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; 813 depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
825 814
826 // default is GL_LINEAR_MIPMAP_LINEAR 815 // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
827 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 816 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
828 // Other attributes have correct defaults
829 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); 817 glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
818
819 // Other attributes have correct defaults
830} 820}
831 821
832void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { 822void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
833 const GLuint s = sampler.handle; 823 const GLuint sampler_id = sampler.handle;
834 if (mag_filter != config.mag_filter) { 824 if (mag_filter != config.mag_filter) {
835 mag_filter = config.mag_filter; 825 mag_filter = config.mag_filter;
836 glSamplerParameteri( 826 glSamplerParameteri(
837 s, GL_TEXTURE_MAG_FILTER, 827 sampler_id, GL_TEXTURE_MAG_FILTER,
838 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); 828 MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
839 } 829 }
840 if (min_filter != config.min_filter || mip_filter != config.mip_filter) { 830 if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
841 min_filter = config.min_filter; 831 min_filter = config.min_filter;
842 mip_filter = config.mip_filter; 832 mipmap_filter = config.mipmap_filter;
843 glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, 833 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
844 MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); 834 MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
845 } 835 }
846 836
847 if (wrap_u != config.wrap_u) { 837 if (wrap_u != config.wrap_u) {
848 wrap_u = config.wrap_u; 838 wrap_u = config.wrap_u;
849 glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); 839 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
850 } 840 }
851 if (wrap_v != config.wrap_v) { 841 if (wrap_v != config.wrap_v) {
852 wrap_v = config.wrap_v; 842 wrap_v = config.wrap_v;
853 glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); 843 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
854 } 844 }
855 if (wrap_p != config.wrap_p) { 845 if (wrap_p != config.wrap_p) {
856 wrap_p = config.wrap_p; 846 wrap_p = config.wrap_p;
857 glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); 847 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
858 } 848 }
859 849
860 if (uses_depth_compare != (config.depth_compare_enabled == 1)) { 850 if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
861 uses_depth_compare = (config.depth_compare_enabled == 1); 851 use_depth_compare = enabled;
862 if (uses_depth_compare) { 852 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
863 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); 853 use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
864 } else {
865 glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
866 }
867 } 854 }
868 855
869 if (depth_compare_func != config.depth_compare_func) { 856 if (depth_compare_func != config.depth_compare_func) {
870 depth_compare_func = config.depth_compare_func; 857 depth_compare_func = config.depth_compare_func;
871 glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, 858 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
872 MaxwellToGL::DepthCompareFunc(depth_compare_func)); 859 MaxwellToGL::DepthCompareFunc(depth_compare_func));
873 } 860 }
874 861
875 GLvec4 new_border_color; 862 if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
876 if (config.srgb_conversion) {
877 new_border_color[0] = config.srgb_border_color_r / 255.0f;
878 new_border_color[1] = config.srgb_border_color_g / 255.0f;
879 new_border_color[2] = config.srgb_border_color_g / 255.0f;
880 } else {
881 new_border_color[0] = config.border_color_r;
882 new_border_color[1] = config.border_color_g;
883 new_border_color[2] = config.border_color_b;
884 }
885 new_border_color[3] = config.border_color_a;
886
887 if (border_color != new_border_color) {
888 border_color = new_border_color; 863 border_color = new_border_color;
889 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); 864 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
890 } 865 }
891 866
892 const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); 867 if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
893 if (anisotropic_max != max_anisotropic) { 868 max_anisotropic = anisotropic;
894 max_anisotropic = anisotropic_max;
895 if (GLAD_GL_ARB_texture_filter_anisotropic) { 869 if (GLAD_GL_ARB_texture_filter_anisotropic) {
896 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); 870 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
897 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 871 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
898 glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); 872 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
899 } 873 }
900 } 874 }
901 const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
902 if (lod_min != min_lod) {
903 min_lod = lod_min;
904 glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
905 }
906 875
907 const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; 876 if (const float min = config.GetMinLod(); min_lod != min) {
908 if (lod_max != max_lod) { 877 min_lod = min;
909 max_lod = lod_max; 878 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
910 glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod);
911 } 879 }
912 const u32 bias = config.mip_lod_bias.Value(); 880 if (const float max = config.GetMaxLod(); max_lod != max) {
913 // Sign extend the 13-bit value. 881 max_lod = max;
914 constexpr u32 mask = 1U << (13 - 1); 882 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
915 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; 883 }
916 if (lod_bias != bias_lod) { 884
917 lod_bias = bias_lod; 885 if (const float bias = config.GetLodBias(); lod_bias != bias) {
918 glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias); 886 lod_bias = bias;
887 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
919 } 888 }
920} 889}
921 890
@@ -955,8 +924,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
955 size = buffer.size; 924 size = buffer.size;
956 925
957 if (size > MaxConstbufferSize) { 926 if (size > MaxConstbufferSize) {
958 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 927 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
959 MaxConstbufferSize); 928 MaxConstbufferSize);
960 size = MaxConstbufferSize; 929 size = MaxConstbufferSize;
961 } 930 }
962 } else { 931 } else {
@@ -1016,10 +985,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1016 985
1017 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 986 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1018 987
1019 Surface surface = res_cache.GetTextureSurface(texture, entry); 988 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1020 if (surface != nullptr) {
1021 state.texture_units[current_bindpoint].texture = 989 state.texture_units[current_bindpoint].texture =
1022 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 990 surface->Texture(entry.IsArray()).handle;
1023 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 991 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1024 texture.tic.w_source); 992 texture.tic.w_source);
1025 } else { 993 } else {
@@ -1251,11 +1219,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1251 1219
1252void RasterizerOpenGL::SyncTransformFeedback() { 1220void RasterizerOpenGL::SyncTransformFeedback() {
1253 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1221 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1254 1222 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1255 if (regs.tfb_enabled != 0) {
1256 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1257 UNREACHABLE();
1258 }
1259} 1223}
1260 1224
1261void RasterizerOpenGL::SyncPointState() { 1225void RasterizerOpenGL::SyncPointState() {
@@ -1275,12 +1239,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1275 1239
1276void RasterizerOpenGL::CheckAlphaTests() { 1240void RasterizerOpenGL::CheckAlphaTests() {
1277 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1241 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1278 1242 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1279 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1243 "Alpha Testing is enabled with more than one rendertarget");
1280 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1281 "this behavior is undefined.");
1282 UNREACHABLE();
1283 }
1284} 1244}
1285 1245
1286} // namespace OpenGL 1246} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2f0524f85..7e63f8008 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -94,11 +94,12 @@ private:
94 private: 94 private:
95 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; 95 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
96 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; 96 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
97 Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; 97 Tegra::Texture::TextureMipmapFilter mipmap_filter =
98 Tegra::Texture::TextureMipmapFilter::None;
98 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; 99 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
99 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; 100 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
100 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; 101 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
101 bool uses_depth_compare = false; 102 bool use_depth_compare = false;
102 Tegra::Texture::DepthCompareFunc depth_compare_func = 103 Tegra::Texture::DepthCompareFunc depth_compare_func =
103 Tegra::Texture::DepthCompareFunc::Always; 104 Tegra::Texture::DepthCompareFunc::Always;
104 GLvec4 border_color = {}; 105 GLvec4 border_color = {};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index b5a9722f9..e9eb6e921 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -21,7 +21,7 @@
21#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
22#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h" 23#include "video_core/surface.h"
24#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
25#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
@@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
400 return format; 400 return format;
401} 401}
402 402
403/// Returns the discrepant array target
404constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
405 switch (target) {
406 case SurfaceTarget::Texture1D:
407 return GL_TEXTURE_1D_ARRAY;
408 case SurfaceTarget::Texture2D:
409 return GL_TEXTURE_2D_ARRAY;
410 case SurfaceTarget::Texture3D:
411 return GL_NONE;
412 case SurfaceTarget::Texture1DArray:
413 return GL_TEXTURE_1D;
414 case SurfaceTarget::Texture2DArray:
415 return GL_TEXTURE_2D;
416 case SurfaceTarget::TextureCubemap:
417 return GL_TEXTURE_CUBE_MAP_ARRAY;
418 case SurfaceTarget::TextureCubeArray:
419 return GL_TEXTURE_CUBE_MAP;
420 }
421 return GL_NONE;
422}
423
403Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 424Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
404 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 425 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
405 if (IsPixelFormatASTC(pixel_format)) { 426 if (IsPixelFormatASTC(pixel_format)) {
@@ -597,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
597 } 618 }
598} 619}
599 620
600static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
601 union S8Z24 {
602 BitField<0, 24, u32> z24;
603 BitField<24, 8, u32> s8;
604 };
605 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
606
607 union Z24S8 {
608 BitField<0, 8, u32> s8;
609 BitField<8, 24, u32> z24;
610 };
611 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
612
613 S8Z24 s8z24_pixel{};
614 Z24S8 z24s8_pixel{};
615 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
616 for (std::size_t y = 0; y < height; ++y) {
617 for (std::size_t x = 0; x < width; ++x) {
618 const std::size_t offset{bpp * (y * width + x)};
619 if (reverse) {
620 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
621 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
622 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
623 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
624 } else {
625 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
626 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
627 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
628 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
629 }
630 }
631 }
632}
633
634/**
635 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
636 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
637 * typical desktop GPUs.
638 */
639static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
640 u32 width, u32 height, u32 depth) {
641 switch (pixel_format) {
642 case PixelFormat::ASTC_2D_4X4:
643 case PixelFormat::ASTC_2D_8X8:
644 case PixelFormat::ASTC_2D_8X5:
645 case PixelFormat::ASTC_2D_5X4:
646 case PixelFormat::ASTC_2D_5X5:
647 case PixelFormat::ASTC_2D_4X4_SRGB:
648 case PixelFormat::ASTC_2D_8X8_SRGB:
649 case PixelFormat::ASTC_2D_8X5_SRGB:
650 case PixelFormat::ASTC_2D_5X4_SRGB:
651 case PixelFormat::ASTC_2D_5X5_SRGB:
652 case PixelFormat::ASTC_2D_10X8:
653 case PixelFormat::ASTC_2D_10X8_SRGB: {
654 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
655 u32 block_width{};
656 u32 block_height{};
657 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
658 data =
659 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
660 break;
661 }
662 case PixelFormat::S8Z24:
663 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
664 ConvertS8Z24ToZ24S8(data, width, height, false);
665 break;
666 }
667}
668
669/**
670 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
671 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
672 * with typical desktop GPUs.
673 */
674static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
675 u32 width, u32 height) {
676 switch (pixel_format) {
677 case PixelFormat::ASTC_2D_4X4:
678 case PixelFormat::ASTC_2D_8X8:
679 case PixelFormat::ASTC_2D_4X4_SRGB:
680 case PixelFormat::ASTC_2D_8X8_SRGB:
681 case PixelFormat::ASTC_2D_5X5:
682 case PixelFormat::ASTC_2D_5X5_SRGB:
683 case PixelFormat::ASTC_2D_10X8:
684 case PixelFormat::ASTC_2D_10X8_SRGB: {
685 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
686 static_cast<u32>(pixel_format));
687 UNREACHABLE();
688 break;
689 }
690 case PixelFormat::S8Z24:
691 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
692 ConvertS8Z24ToZ24S8(data, width, height, true);
693 break;
694 }
695}
696
697MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 621MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
698void CachedSurface::LoadGLBuffer() { 622void CachedSurface::LoadGLBuffer() {
699 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); 623 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -722,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
722 } 646 }
723 } 647 }
724 for (u32 i = 0; i < params.max_mip_level; i++) { 648 for (u32 i = 0; i < params.max_mip_level; i++) {
725 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 649 const u32 width = params.MipWidth(i);
726 params.MipHeight(i), params.MipDepth(i)); 650 const u32 height = params.MipHeight(i);
651 const u32 depth = params.MipDepth(i);
652 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
653 // Reserve size for RGBA8 conversion
654 constexpr std::size_t rgba_bpp = 4;
655 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
656 }
657 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
658 height, depth, true, true);
727 } 659 }
728} 660}
729 661
@@ -746,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
746 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 678 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
747 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 679 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
748 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 680 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
749 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
750 params.height); 682 params.height, params.depth, true, true);
751 const u8* const texture_src_data = Memory::GetPointer(params.addr); 683 const u8* const texture_src_data = Memory::GetPointer(params.addr);
752 ASSERT(texture_src_data); 684 ASSERT(texture_src_data);
753 if (params.is_tiled) { 685 if (params.is_tiled) {
@@ -884,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
884 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 816 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
885} 817}
886 818
887void CachedSurface::EnsureTextureView() { 819void CachedSurface::EnsureTextureDiscrepantView() {
888 if (texture_view.handle != 0) 820 if (discrepant_view.handle != 0)
889 return; 821 return;
890 822
891 const GLenum target{TargetLayer()}; 823 const GLenum target{GetArrayDiscrepantTarget(params.target)};
824 ASSERT(target != GL_NONE);
825
892 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 826 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
893 constexpr GLuint min_layer = 0; 827 constexpr GLuint min_layer = 0;
894 constexpr GLuint min_level = 0; 828 constexpr GLuint min_level = 0;
895 829
896 glGenTextures(1, &texture_view.handle); 830 glGenTextures(1, &discrepant_view.handle);
897 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 831 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
898 params.max_mip_level, min_layer, num_layers); 832 params.max_mip_level, min_layer, num_layers);
899 ApplyTextureDefaults(texture_view.handle, params.max_mip_level); 833 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
900 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, 834 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
901 reinterpret_cast<const GLint*>(swizzle.data())); 835 reinterpret_cast<const GLint*>(swizzle.data()));
902} 836}
903 837
@@ -923,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
923 swizzle = {new_x, new_y, new_z, new_w}; 857 swizzle = {new_x, new_y, new_z, new_w};
924 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); 858 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
925 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 859 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
926 if (texture_view.handle != 0) { 860 if (discrepant_view.handle != 0) {
927 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 861 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
928 } 862 }
929} 863}
930 864
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 797bbdc9c..9cf6f50be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -367,31 +367,19 @@ public:
367 return texture; 367 return texture;
368 } 368 }
369 369
370 const OGLTexture& TextureLayer() { 370 const OGLTexture& Texture(bool as_array) {
371 if (params.is_array) { 371 if (params.is_array == as_array) {
372 return Texture(); 372 return texture;
373 } else {
374 EnsureTextureDiscrepantView();
375 return discrepant_view;
373 } 376 }
374 EnsureTextureView();
375 return texture_view;
376 } 377 }
377 378
378 GLenum Target() const { 379 GLenum Target() const {
379 return gl_target; 380 return gl_target;
380 } 381 }
381 382
382 GLenum TargetLayer() const {
383 using VideoCore::Surface::SurfaceTarget;
384 switch (params.target) {
385 case SurfaceTarget::Texture1D:
386 return GL_TEXTURE_1D_ARRAY;
387 case SurfaceTarget::Texture2D:
388 return GL_TEXTURE_2D_ARRAY;
389 case SurfaceTarget::TextureCubemap:
390 return GL_TEXTURE_CUBE_MAP_ARRAY;
391 }
392 return Target();
393 }
394
395 const SurfaceParams& GetSurfaceParams() const { 383 const SurfaceParams& GetSurfaceParams() const {
396 return params; 384 return params;
397 } 385 }
@@ -431,10 +419,10 @@ public:
431private: 419private:
432 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 420 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
433 421
434 void EnsureTextureView(); 422 void EnsureTextureDiscrepantView();
435 423
436 OGLTexture texture; 424 OGLTexture texture;
437 OGLTexture texture_view; 425 OGLTexture discrepant_view;
438 std::vector<std::vector<u8>> gl_buffer; 426 std::vector<std::vector<u8>> gl_buffer;
439 SurfaceParams params{}; 427 SurfaceParams params{};
440 GLenum gl_target{}; 428 GLenum gl_target{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 72ff6ac6a..11d1169f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <array> 5#include <array>
6#include <string> 6#include <string>
7#include <string_view> 7#include <string_view>
8#include <utility>
8#include <variant> 9#include <variant>
10#include <vector>
9 11
10#include <fmt/format.h> 12#include <fmt/format.h>
11 13
@@ -717,7 +719,7 @@ private:
717 } 719 }
718 720
719 std::string GenerateTexture(Operation operation, const std::string& func, 721 std::string GenerateTexture(Operation operation, const std::string& func,
720 bool is_extra_int = false) { 722 const std::vector<std::pair<Type, Node>>& extras) {
721 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 723 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
722 724
723 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 725 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -738,36 +740,47 @@ private:
738 expr += Visit(operation[i]); 740 expr += Visit(operation[i]);
739 741
740 const std::size_t next = i + 1; 742 const std::size_t next = i + 1;
741 if (next < count || has_array || has_shadow) 743 if (next < count)
742 expr += ", "; 744 expr += ", ";
743 } 745 }
744 if (has_array) { 746 if (has_array) {
745 expr += "float(ftoi(" + Visit(meta->array) + "))"; 747 expr += ", float(ftoi(" + Visit(meta->array) + "))";
746 } 748 }
747 if (has_shadow) { 749 if (has_shadow) {
748 if (has_array) 750 expr += ", " + Visit(meta->depth_compare);
749 expr += ", ";
750 expr += Visit(meta->depth_compare);
751 } 751 }
752 expr += ')'; 752 expr += ')';
753 753
754 for (const Node extra : meta->extras) { 754 for (const auto& extra_pair : extras) {
755 const auto [type, operand] = extra_pair;
756 if (operand == nullptr) {
757 continue;
758 }
755 expr += ", "; 759 expr += ", ";
756 if (is_extra_int) { 760
757 if (const auto immediate = std::get_if<ImmediateNode>(extra)) { 761 switch (type) {
762 case Type::Int:
763 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
758 // Inline the string as an immediate integer in GLSL (some extra arguments are 764 // Inline the string as an immediate integer in GLSL (some extra arguments are
759 // required to be constant) 765 // required to be constant)
760 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 766 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
761 } else { 767 } else {
762 expr += "ftoi(" + Visit(extra) + ')'; 768 expr += "ftoi(" + Visit(operand) + ')';
763 } 769 }
764 } else { 770 break;
765 expr += Visit(extra); 771 case Type::Float:
772 expr += Visit(operand);
773 break;
774 default: {
775 const auto type_int = static_cast<u32>(type);
776 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
777 expr += '0';
778 break;
779 }
766 } 780 }
767 } 781 }
768 782
769 expr += ')'; 783 return expr + ')';
770 return expr;
771 } 784 }
772 785
773 std::string Assign(Operation operation) { 786 std::string Assign(Operation operation) {
@@ -1146,7 +1159,7 @@ private:
1146 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1159 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1147 ASSERT(meta); 1160 ASSERT(meta);
1148 1161
1149 std::string expr = GenerateTexture(operation, "texture"); 1162 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
1150 if (meta->sampler.IsShadow()) { 1163 if (meta->sampler.IsShadow()) {
1151 expr = "vec4(" + expr + ')'; 1164 expr = "vec4(" + expr + ')';
1152 } 1165 }
@@ -1157,7 +1170,7 @@ private:
1157 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1170 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1158 ASSERT(meta); 1171 ASSERT(meta);
1159 1172
1160 std::string expr = GenerateTexture(operation, "textureLod"); 1173 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
1161 if (meta->sampler.IsShadow()) { 1174 if (meta->sampler.IsShadow()) {
1162 expr = "vec4(" + expr + ')'; 1175 expr = "vec4(" + expr + ')';
1163 } 1176 }
@@ -1168,7 +1181,8 @@ private:
1168 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1181 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1169 ASSERT(meta); 1182 ASSERT(meta);
1170 1183
1171 return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + 1184 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1185 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
1172 GetSwizzle(meta->element); 1186 GetSwizzle(meta->element);
1173 } 1187 }
1174 1188
@@ -1197,8 +1211,8 @@ private:
1197 ASSERT(meta); 1211 ASSERT(meta);
1198 1212
1199 if (meta->element < 2) { 1213 if (meta->element < 2) {
1200 return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + 1214 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
1201 GetSwizzle(meta->element) + "))"; 1215 " * vec2(256))" + GetSwizzle(meta->element) + "))";
1202 } 1216 }
1203 return "0"; 1217 return "0";
1204 } 1218 }
@@ -1224,9 +1238,9 @@ private:
1224 else if (next < count) 1238 else if (next < count)
1225 expr += ", "; 1239 expr += ", ";
1226 } 1240 }
1227 for (std::size_t i = 0; i < meta->extras.size(); ++i) { 1241 if (meta->lod) {
1228 expr += ", "; 1242 expr += ", ";
1229 expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); 1243 expr += CastOperand(Visit(meta->lod), Type::Int);
1230 } 1244 }
1231 expr += ')'; 1245 expr += ')';
1232 1246
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 219f08053..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
461 461
462 if (has_delta) { 462 if (has_delta) {
463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
464 textures.data()); 464 textures.data() + first);
465 } 465 }
466} 466}
467 467
@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
482 } 482 }
483 if (has_delta) { 483 if (has_delta) {
484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
485 samplers.data()); 485 samplers.data() + first);
486 } 486 }
487} 487}
488 488
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e60b2eb44..8b510b6ae 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
245} 245}
246 246
247void RendererOpenGL::AddTelemetryFields() {
248 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
249 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
250 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
251
252 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
253 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
254 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
255
256 auto& telemetry_session = system.TelemetrySession();
257 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
258 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
260}
261
247void RendererOpenGL::CreateRasterizer() { 262void RendererOpenGL::CreateRasterizer() {
248 if (rasterizer) { 263 if (rasterizer) {
249 return; 264 return;
@@ -466,17 +481,7 @@ bool RendererOpenGL::Init() {
466 glDebugMessageCallback(DebugHandler, nullptr); 481 glDebugMessageCallback(DebugHandler, nullptr);
467 } 482 }
468 483
469 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 484 AddTelemetryFields();
470 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
471 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
472
473 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
474 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
475 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
476
477 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
478 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
479 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
480 485
481 if (!GLAD_GL_VERSION_4_3) { 486 if (!GLAD_GL_VERSION_4_3) {
482 return false; 487 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c168fa89e..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -60,6 +60,7 @@ public:
60 60
61private: 61private:
62 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
63 void CreateRasterizer(); 64 void CreateRasterizer();
64 65
65 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 18b7b94a1..4a33a6c84 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -8,7 +8,7 @@
8#include <tuple> 8#include <tuple>
9 9
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "core/core.h" 11#include "common/assert.h"
12#include "core/memory.h" 12#include "core/memory.h"
13#include "video_core/renderer_vulkan/declarations.h" 13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h" 14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 6cbe21202..d8e916f31 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -44,9 +44,9 @@ struct CachedBufferEntry final : public RasterizerCacheObject {
44 44
45class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 45class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
46public: 46public:
47 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, VideoCore::RasterizerInterface& rasterizer, 47 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
48 const VKDevice& device, VKMemoryManager& memory_manager, 48 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
49 VKScheduler& scheduler, u64 size); 49 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
50 ~VKBufferCache(); 50 ~VKBufferCache();
51 51
52 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 52 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 78a4e5f0e..00242ecbe 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
122 FormatType format_type) const { 122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format); 123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) { 124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", 125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 static_cast<u32>(wanted_format));
127 UNREACHABLE(); 126 UNREACHABLE();
128 return true; 127 return true;
129 } 128 }
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
219 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
220 }; 219 };
221 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); 220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
222 AddFormatQuery(vk::Format::eR5G6B5UnormPack16); 221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
223 AddFormatQuery(vk::Format::eD32Sfloat); 225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
224 AddFormatQuery(vk::Format::eD16UnormS8Uint); 227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
225 AddFormatQuery(vk::Format::eD24UnormS8Uint); 228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
226 AddFormatQuery(vk::Format::eD32SfloatS8Uint); 229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
227 234
228 return format_properties; 235 return format_properties;
229} 236}
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 740ac3118..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
168 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
169 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
170 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 38f01ca50..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
247 } 229 }
248 break; 230 break;
249 } 231 }
250 case OpCode::Id::TEX: {
251 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
252 "AOFFI is not implemented");
253
254 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
255 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
256 }
257
258 const TextureType texture_type{instr.tex.texture_type};
259 const bool is_array = instr.tex.array != 0;
260 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
261 const auto process_mode = instr.tex.GetTextureProcessMode();
262 WriteTexInstructionFloat(
263 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
264 break;
265 }
266 case OpCode::Id::TEXS: {
267 const TextureType texture_type{instr.texs.GetTextureType()};
268 const bool is_array{instr.texs.IsArrayTexture()};
269 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
270 const auto process_mode = instr.texs.GetTextureProcessMode();
271
272 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
273 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
274 }
275
276 const Node4 components =
277 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
278
279 if (instr.texs.fp32_flag) {
280 WriteTexsInstructionFloat(bb, instr, components);
281 } else {
282 WriteTexsInstructionHalfFloat(bb, instr, components);
283 }
284 break;
285 }
286 case OpCode::Id::TLD4: {
287 ASSERT(instr.tld4.array == 0);
288 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
289 "AOFFI is not implemented");
290 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
291 "NDV is not implemented");
292 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
293 "PTP is not implemented");
294
295 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
296 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
297 }
298
299 const auto texture_type = instr.tld4.texture_type.Value();
300 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
301 const bool is_array = instr.tld4.array != 0;
302 WriteTexInstructionFloat(bb, instr,
303 GetTld4Code(instr, texture_type, depth_compare, is_array));
304 break;
305 }
306 case OpCode::Id::TLD4S: {
307 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
308 "AOFFI is not implemented");
309 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
310 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
311 }
312
313 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
314 const Node op_a = GetRegister(instr.gpr8);
315 const Node op_b = GetRegister(instr.gpr20);
316
317 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
318 std::vector<Node> coords;
319 if (depth_compare) {
320 // Note: TLD4S coordinate encoding works just like TEXS's
321 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
322 coords.push_back(op_a);
323 coords.push_back(op_y);
324 coords.push_back(op_b);
325 } else {
326 coords.push_back(op_a);
327 coords.push_back(op_b);
328 }
329 std::vector<Node> extras;
330 extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
331
332 const auto& sampler =
333 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
334
335 Node4 values;
336 for (u32 element = 0; element < values.size(); ++element) {
337 auto coords_copy = coords;
338 MetaTexture meta{sampler, {}, {}, extras, element};
339 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
340 }
341
342 WriteTexsInstructionFloat(bb, instr, values);
343 break;
344 }
345 case OpCode::Id::TXQ: {
346 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
347 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
348 }
349
350 // TODO: The new commits on the texture refactor, change the way samplers work.
351 // Sadly, not all texture instructions specify the type of texture their sampler
352 // uses. This must be fixed at a later instance.
353 const auto& sampler =
354 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
355
356 u32 indexer = 0;
357 switch (instr.txq.query_type) {
358 case Tegra::Shader::TextureQueryType::Dimension: {
359 for (u32 element = 0; element < 4; ++element) {
360 if (!instr.txq.IsComponentEnabled(element)) {
361 continue;
362 }
363 MetaTexture meta{sampler, {}, {}, {}, element};
364 const Node value =
365 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
366 SetTemporal(bb, indexer++, value);
367 }
368 for (u32 i = 0; i < indexer; ++i) {
369 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
370 }
371 break;
372 }
373 default:
374 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
375 static_cast<u32>(instr.txq.query_type.Value()));
376 }
377 break;
378 }
379 case OpCode::Id::TMML: {
380 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
381 "NDV is not implemented");
382
383 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
384 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
385 }
386
387 auto texture_type = instr.tmml.texture_type.Value();
388 const bool is_array = instr.tmml.array != 0;
389 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
390
391 std::vector<Node> coords;
392
393 // TODO: Add coordinates for different samplers once other texture types are implemented.
394 switch (texture_type) {
395 case TextureType::Texture1D:
396 coords.push_back(GetRegister(instr.gpr8));
397 break;
398 case TextureType::Texture2D:
399 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
400 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
401 break;
402 default:
403 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
404
405 // Fallback to interpreting as a 2D texture for now
406 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
407 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
408 texture_type = TextureType::Texture2D;
409 }
410
411 for (u32 element = 0; element < 2; ++element) {
412 auto params = coords;
413 MetaTexture meta{sampler, {}, {}, {}, element};
414 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
415 SetTemporal(bb, element, value);
416 }
417 for (u32 element = 0; element < 2; ++element) {
418 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
419 }
420
421 break;
422 }
423 case OpCode::Id::TLDS: {
424 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
425 const bool is_array{instr.tlds.IsArrayTexture()};
426
427 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
428 "AOFFI is not implemented");
429 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
430
431 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
432 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
433 }
434
435 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
436 break;
437 }
438 default: 232 default:
439 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
440 } 234 }
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
442 return pc; 236 return pc;
443} 237}
444 238
445const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
446 bool is_array, bool is_shadow) {
447 const auto offset = static_cast<std::size_t>(sampler.index.Value());
448
449 // If this sampler has already been used, return the existing mapping.
450 const auto itr =
451 std::find_if(used_samplers.begin(), used_samplers.end(),
452 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
453 if (itr != used_samplers.end()) {
454 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
455 itr->IsShadow() == is_shadow);
456 return *itr;
457 }
458
459 // Otherwise create a new mapping for this sampler
460 const std::size_t next_index = used_samplers.size();
461 const Sampler entry{offset, next_index, type, is_array, is_shadow};
462 return *used_samplers.emplace(entry).first;
463}
464
465void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
466 u32 dest_elem = 0;
467 for (u32 elem = 0; elem < 4; ++elem) {
468 if (!instr.tex.IsComponentEnabled(elem)) {
469 // Skip disabled components
470 continue;
471 }
472 SetTemporal(bb, dest_elem++, components[elem]);
473 }
474 // After writing values in temporals, move them to the real registers
475 for (u32 i = 0; i < dest_elem; ++i) {
476 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
477 }
478}
479
480void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
481 const Node4& components) {
482 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
483 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
484
485 u32 dest_elem = 0;
486 for (u32 component = 0; component < 4; ++component) {
487 if (!instr.texs.IsComponentEnabled(component))
488 continue;
489 SetTemporal(bb, dest_elem++, components[component]);
490 }
491
492 for (u32 i = 0; i < dest_elem; ++i) {
493 if (i < 2) {
494 // Write the first two swizzle components to gpr0 and gpr0+1
495 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
496 } else {
497 ASSERT(instr.texs.HasTwoDestinations());
498 // Write the rest of the swizzle components to gpr28 and gpr28+1
499 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
500 }
501 }
502}
503
504void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
505 const Node4& components) {
506 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
507 // float instruction).
508
509 Node4 values;
510 u32 dest_elem = 0;
511 for (u32 component = 0; component < 4; ++component) {
512 if (!instr.texs.IsComponentEnabled(component))
513 continue;
514 values[dest_elem++] = components[component];
515 }
516 if (dest_elem == 0)
517 return;
518
519 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
520
521 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
522 if (dest_elem <= 2) {
523 SetRegister(bb, instr.gpr0, first_value);
524 return;
525 }
526
527 SetTemporal(bb, 0, first_value);
528 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
529
530 SetRegister(bb, instr.gpr0, GetTemporal(0));
531 SetRegister(bb, instr.gpr28, GetTemporal(1));
532}
533
534Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
535 TextureProcessMode process_mode, std::vector<Node> coords,
536 Node array, Node depth_compare, u32 bias_offset) {
537 const bool is_array = array;
538 const bool is_shadow = depth_compare;
539
540 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
541 (texture_type == TextureType::TextureCube && is_array && is_shadow),
542 "This method is not supported.");
543
544 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
545
546 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
547 process_mode == TextureProcessMode::LL ||
548 process_mode == TextureProcessMode::LLA;
549
550 // LOD selection (either via bias or explicit textureLod) not supported in GL for
551 // sampler2DArrayShadow and samplerCubeArrayShadow.
552 const bool gl_lod_supported =
553 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
554 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
555
556 const OperationCode read_method =
557 lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
558
559 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
560
561 std::vector<Node> extras;
562 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
563 if (process_mode == TextureProcessMode::LZ) {
564 extras.push_back(Immediate(0.0f));
565 } else {
566 // If present, lod or bias are always stored in the register indexed by the gpr20
567 // field with an offset depending on the usage of the other registers
568 extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
569 }
570 }
571
572 Node4 values;
573 for (u32 element = 0; element < values.size(); ++element) {
574 auto copy_coords = coords;
575 MetaTexture meta{sampler, array, depth_compare, extras, element};
576 values[element] = Operation(read_method, meta, std::move(copy_coords));
577 }
578
579 return values;
580}
581
582Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
583 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
584 const bool lod_bias_enabled =
585 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
586
587 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
588 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
589 // If enabled arrays index is always stored in the gpr8 field
590 const u64 array_register = instr.gpr8.Value();
591 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
592 const u64 coord_register = array_register + (is_array ? 1 : 0);
593
594 std::vector<Node> coords;
595 for (std::size_t i = 0; i < coord_count; ++i) {
596 coords.push_back(GetRegister(coord_register + i));
597 }
598 // 1D.DC in OpenGL the 2nd component is ignored.
599 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
600 coords.push_back(Immediate(0.0f));
601 }
602
603 const Node array = is_array ? GetRegister(array_register) : nullptr;
604
605 Node dc{};
606 if (depth_compare) {
607 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
608 // or bias are used
609 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
610 dc = GetRegister(depth_register);
611 }
612
613 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
614}
615
616Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
617 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
618 const bool lod_bias_enabled =
619 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
620
621 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
622 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
623 // If enabled arrays index is always stored in the gpr8 field
624 const u64 array_register = instr.gpr8.Value();
625 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
626 const u64 coord_register = array_register + (is_array ? 1 : 0);
627 const u64 last_coord_register =
628 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
629 ? static_cast<u64>(instr.gpr20.Value())
630 : coord_register + 1;
631 const u32 bias_offset = coord_count > 2 ? 1 : 0;
632
633 std::vector<Node> coords;
634 for (std::size_t i = 0; i < coord_count; ++i) {
635 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
636 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
637 }
638
639 const Node array = is_array ? GetRegister(array_register) : nullptr;
640
641 Node dc{};
642 if (depth_compare) {
643 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
644 // or bias are used
645 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
646 dc = GetRegister(depth_register);
647 }
648
649 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
650}
651
652Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
653 bool is_array) {
654 const std::size_t coord_count = GetCoordCount(texture_type);
655 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
656 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
657
658 // If enabled arrays index is always stored in the gpr8 field
659 const u64 array_register = instr.gpr8.Value();
660 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
661 const u64 coord_register = array_register + (is_array ? 1 : 0);
662
663 std::vector<Node> coords;
664 for (size_t i = 0; i < coord_count; ++i)
665 coords.push_back(GetRegister(coord_register + i));
666
667 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
668
669 Node4 values;
670 for (u32 element = 0; element < values.size(); ++element) {
671 auto coords_copy = coords;
672 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
673 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
674 }
675
676 return values;
677}
678
679Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
680 const std::size_t type_coord_count = GetCoordCount(texture_type);
681 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
682
683 // If enabled arrays index is always stored in the gpr8 field
684 const u64 array_register = instr.gpr8.Value();
685 // if is array gpr20 is used
686 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
687
688 const u64 last_coord_register =
689 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
690 ? static_cast<u64>(instr.gpr20.Value())
691 : coord_register + 1;
692
693 std::vector<Node> coords;
694 for (std::size_t i = 0; i < type_coord_count; ++i) {
695 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
696 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
697 }
698
699 const Node array = is_array ? GetRegister(array_register) : nullptr;
700 // When lod is used always is in gpr20
701 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
702
703 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
704
705 Node4 values;
706 for (u32 element = 0; element < values.size(); ++element) {
707 auto coords_copy = coords;
708 MetaTexture meta{sampler, array, {}, {lod}, element};
709 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
710 }
711 return values;
712}
713
714std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
715 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
716 std::size_t max_coords, std::size_t max_inputs) {
717 const std::size_t coord_count = GetCoordCount(texture_type);
718
719 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
720 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
721 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
722 UNIMPLEMENTED_MSG("Unsupported Texture operation");
723 total_coord_count = std::min(total_coord_count, max_coords);
724 }
725 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
726 total_coord_count +=
727 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
728
729 return {coord_count, total_coord_count};
730}
731
732} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a99ae19bf
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,534 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) {
329 const bool is_array = array;
330 const bool is_shadow = depth_compare;
331
332 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
333 (texture_type == TextureType::TextureCube && is_array && is_shadow),
334 "This method is not supported.");
335
336 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
337
338 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
339 process_mode == TextureProcessMode::LL ||
340 process_mode == TextureProcessMode::LLA;
341
342 // LOD selection (either via bias or explicit textureLod) not supported in GL for
343 // sampler2DArrayShadow and samplerCubeArrayShadow.
344 const bool gl_lod_supported =
345 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
346 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
347
348 const OperationCode read_method =
349 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
350
351 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
352
353 Node bias = {};
354 Node lod = {};
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 switch (process_mode) {
357 case TextureProcessMode::LZ:
358 lod = Immediate(0.0f);
359 break;
360 case TextureProcessMode::LB:
361 // If present, lod or bias are always stored in the register indexed by the gpr20
362 // field with an offset depending on the usage of the other registers
363 bias = GetRegister(instr.gpr20.Value() + bias_offset);
364 break;
365 case TextureProcessMode::LL:
366 lod = GetRegister(instr.gpr20.Value() + bias_offset);
367 break;
368 default:
369 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
370 break;
371 }
372 }
373
374 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 }
380
381 return values;
382}
383
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
386 const bool lod_bias_enabled =
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
388
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
391 // If enabled arrays index is always stored in the gpr8 field
392 const u64 array_register = instr.gpr8.Value();
393 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
394 const u64 coord_register = array_register + (is_array ? 1 : 0);
395
396 std::vector<Node> coords;
397 for (std::size_t i = 0; i < coord_count; ++i) {
398 coords.push_back(GetRegister(coord_register + i));
399 }
400 // 1D.DC in OpenGL the 2nd component is ignored.
401 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
402 coords.push_back(Immediate(0.0f));
403 }
404
405 const Node array = is_array ? GetRegister(array_register) : nullptr;
406
407 Node dc{};
408 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
412 dc = GetRegister(depth_register);
413 }
414
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
416}
417
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
419 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
420 const bool lod_bias_enabled =
421 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
422
423 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
424 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
425 // If enabled arrays index is always stored in the gpr8 field
426 const u64 array_register = instr.gpr8.Value();
427 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
428 const u64 coord_register = array_register + (is_array ? 1 : 0);
429 const u64 last_coord_register =
430 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
431 ? static_cast<u64>(instr.gpr20.Value())
432 : coord_register + 1;
433 const u32 bias_offset = coord_count > 2 ? 1 : 0;
434
435 std::vector<Node> coords;
436 for (std::size_t i = 0; i < coord_count; ++i) {
437 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
438 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
439 }
440
441 const Node array = is_array ? GetRegister(array_register) : nullptr;
442
443 Node dc{};
444 if (depth_compare) {
445 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
446 // or bias are used
447 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
448 dc = GetRegister(depth_register);
449 }
450
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
452}
453
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) {
456 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
459
460 // If enabled arrays index is always stored in the gpr8 field
461 const u64 array_register = instr.gpr8.Value();
462 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
463 const u64 coord_register = array_register + (is_array ? 1 : 0);
464
465 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i)
467 coords.push_back(GetRegister(coord_register + i));
468
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470
471 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 }
477
478 return values;
479}
480
481Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
482 const std::size_t type_coord_count = GetCoordCount(texture_type);
483 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
484
485 // If enabled arrays index is always stored in the gpr8 field
486 const u64 array_register = instr.gpr8.Value();
487 // if is array gpr20 is used
488 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
489
490 const u64 last_coord_register =
491 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
492 ? static_cast<u64>(instr.gpr20.Value())
493 : coord_register + 1;
494
495 std::vector<Node> coords;
496 for (std::size_t i = 0; i < type_coord_count; ++i) {
497 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
498 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
499 }
500
501 const Node array = is_array ? GetRegister(array_register) : nullptr;
502 // When lod is used always is in gpr20
503 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
504
505 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
506
507 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 }
513 return values;
514}
515
516std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
517 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
518 std::size_t max_coords, std::size_t max_inputs) {
519 const std::size_t coord_count = GetCoordCount(texture_type);
520
521 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
522 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
523 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
524 UNIMPLEMENTED_MSG("Unsupported Texture operation");
525 total_coord_count = std::min(total_coord_count, max_coords);
526 }
527 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
528 total_coord_count +=
529 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
530
531 return {coord_count, total_coord_count};
532}
533
534} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 52c7f2c4e..5bc3a3900 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -290,7 +290,9 @@ struct MetaTexture {
290 const Sampler& sampler; 290 const Sampler& sampler;
291 Node array{}; 291 Node array{};
292 Node depth_compare{}; 292 Node depth_compare{};
293 std::vector<Node> extras; 293 Node bias{};
294 Node lod{};
295 Node component{};
294 u32 element{}; 296 u32 element{};
295}; 297};
296 298
@@ -614,6 +616,7 @@ private:
614 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 616 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
615 u32 DecodeConversion(NodeBlock& bb, u32 pc); 617 u32 DecodeConversion(NodeBlock& bb, u32 pc);
616 u32 DecodeMemory(NodeBlock& bb, u32 pc); 618 u32 DecodeMemory(NodeBlock& bb, u32 pc);
619 u32 DecodeTexture(NodeBlock& bb, u32 pc);
617 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 620 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
618 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 621 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
619 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 622 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 044ba116a..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
89 89
90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 90PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
91 switch (format) { 91 switch (format) {
92 // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
93 // gamma.
94 case Tegra::RenderTargetFormat::RGBA8_SRGB: 92 case Tegra::RenderTargetFormat::RGBA8_SRGB:
95 return PixelFormat::RGBA8_SRGB; 93 return PixelFormat::RGBA8_SRGB;
96 case Tegra::RenderTargetFormat::RGBA8_UNORM: 94 case Tegra::RenderTargetFormat::RGBA8_UNORM:
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/textures/astc.h"
14#include "video_core/textures/convert.h"
15
16namespace Tegra::Texture {
17
18using VideoCore::Surface::PixelFormat;
19
20template <bool reverse>
21void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
22 union S8Z24 {
23 BitField<0, 24, u32> z24;
24 BitField<24, 8, u32> s8;
25 };
26 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
27
28 union Z24S8 {
29 BitField<0, 8, u32> s8;
30 BitField<8, 24, u32> z24;
31 };
32 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
33
34 S8Z24 s8z24_pixel{};
35 Z24S8 z24s8_pixel{};
36 constexpr auto bpp{
37 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
38 for (std::size_t y = 0; y < height; ++y) {
39 for (std::size_t x = 0; x < width; ++x) {
40 const std::size_t offset{bpp * (y * width + x)};
41 if constexpr (reverse) {
42 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
43 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
44 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
45 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
46 } else {
47 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
48 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
49 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
50 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
51 }
52 }
53 }
54}
55
56static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
57 SwapS8Z24ToZ24S8<false>(data, width, height);
58}
59
60static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
61 SwapS8Z24ToZ24S8<true>(data, width, height);
62}
63
64void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
65 bool convert_astc, bool convert_s8z24) {
66 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
67 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
68 u32 block_width{};
69 u32 block_height{};
70 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
71 const std::vector<u8> rgba8_data =
72 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
73 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
74
75 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
76 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
77 }
78}
79
80void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
81 bool convert_astc, bool convert_s8z24) {
82 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
83 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
84 static_cast<u32>(pixel_format));
85 UNREACHABLE();
86
87 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
88 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
89 }
90}
91
92} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace Tegra::Texture {
11
12void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
13 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
14
15void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 105 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 106 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 107 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 109 }
110 pixel_base += stride_x; 110 pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 154 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 155 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 156 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 157 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format.
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 20void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 26std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 27 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
55 u32 block_height); 48 u32 block_height);
49
56/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 52 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0fc5530f2..b8675f702 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
@@ -293,7 +294,7 @@ struct TSCEntry {
293 union { 294 union {
294 BitField<0, 2, TextureFilter> mag_filter; 295 BitField<0, 2, TextureFilter> mag_filter;
295 BitField<4, 2, TextureFilter> min_filter; 296 BitField<4, 2, TextureFilter> min_filter;
296 BitField<6, 2, TextureMipmapFilter> mip_filter; 297 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
297 BitField<9, 1, u32> cubemap_interface_filtering; 298 BitField<9, 1, u32> cubemap_interface_filtering;
298 BitField<12, 13, u32> mip_lod_bias; 299 BitField<12, 13, u32> mip_lod_bias;
299 }; 300 };
@@ -306,10 +307,33 @@ struct TSCEntry {
306 BitField<12, 8, u32> srgb_border_color_g; 307 BitField<12, 8, u32> srgb_border_color_g;
307 BitField<20, 8, u32> srgb_border_color_b; 308 BitField<20, 8, u32> srgb_border_color_b;
308 }; 309 };
309 float border_color_r; 310 std::array<f32, 4> border_color;
310 float border_color_g; 311
311 float border_color_b; 312 float GetMaxAnisotropy() const {
312 float border_color_a; 313 return static_cast<float>(1U << max_anisotropy);
314 }
315
316 float GetMinLod() const {
317 return static_cast<float>(min_lod_clamp) / 256.0f;
318 }
319
320 float GetMaxLod() const {
321 return static_cast<float>(max_lod_clamp) / 256.0f;
322 }
323
324 float GetLodBias() const {
325 // Sign extend the 13-bit value.
326 constexpr u32 mask = 1U << (13 - 1);
327 return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
328 }
329
330 std::array<float, 4> GetBorderColor() const {
331 if (srgb_conversion) {
332 return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
333 srgb_border_color_b / 255.0f, border_color[3]};
334 }
335 return border_color;
336 }
313}; 337};
314static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 338static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
315 339
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <future>
9#include <string> 7#include <string>
10 8
11namespace WebService { 9namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..40da1a4e2 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/web_result.h" 12#include "common/web_result.h"
13#include "core/settings.h"
14#include "web_service/web_backend.h" 13#include "web_service/web_backend.h"
15 14
16namespace WebService { 15namespace WebService {
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..979b9ec14 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index b2a087aa5..d2c97b1f8 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,10 +20,7 @@
20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
21 21
22void EmuThread::run() { 22void EmuThread::run() {
23 if (!Settings::values.use_multi_core) { 23 render_window->MakeCurrent();
24 // Single core mode must acquire OpenGL context for entire emulation session
25 render_window->MakeCurrent();
26 }
27 24
28 MicroProfileOnThreadCreate("EmuThread"); 25 MicroProfileOnThreadCreate("EmuThread");
29 26
@@ -38,6 +35,11 @@ void EmuThread::run() {
38 35
39 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 36 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
40 37
38 if (Settings::values.use_asynchronous_gpu_emulation) {
39 // Release OpenGL context for the GPU thread
40 render_window->DoneCurrent();
41 }
42
41 // holds whether the cpu was running during the last iteration, 43 // holds whether the cpu was running during the last iteration,
42 // so that the DebugModeLeft signal can be emitted before the 44 // so that the DebugModeLeft signal can be emitted before the
43 // next execution step 45 // next execution step
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index c09a06520..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,8 +53,8 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9546dadf..74dc6bb28 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -374,6 +374,8 @@ void Config::ReadValues() {
374 qt_config->value("use_disk_shader_cache", false).toBool(); 374 qt_config->value("use_disk_shader_cache", false).toBool();
375 Settings::values.use_accurate_gpu_emulation = 375 Settings::values.use_accurate_gpu_emulation =
376 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 376 qt_config->value("use_accurate_gpu_emulation", false).toBool();
377 Settings::values.use_asynchronous_gpu_emulation =
378 qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
377 379
378 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 380 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
379 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 381 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -633,6 +635,8 @@ void Config::SaveValues() {
633 qt_config->setValue("frame_limit", Settings::values.frame_limit); 635 qt_config->setValue("frame_limit", Settings::values.frame_limit);
634 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); 636 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
635 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 637 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
638 qt_config->setValue("use_asynchronous_gpu_emulation",
639 Settings::values.use_asynchronous_gpu_emulation);
636 640
637 // Cast to double because Qt's written float values are not human-readable 641 // Cast to double because Qt's written float values are not human-readable
638 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 642 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 0f5dd534b..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
75 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
78 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
79 Settings::values.bg_blue)); 81 Settings::values.bg_blue));
80} 82}
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
86 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
87 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
88 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
89 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
90 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
91 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 824f5810a..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -64,6 +64,13 @@
64 </widget> 64 </widget>
65 </item> 65 </item>
66 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
67 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
68 <item> 75 <item>
69 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index f50225d5f..06ad74ffe 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const {
81 return text; 81 return text;
82} 82}
83 83
84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { 84WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
85 const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); 85 : mutex_address(mutex_address) {
86
87 mutex_value = Memory::Read32(mutex_address); 86 mutex_value = Memory::Read32(mutex_address);
88 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); 87 owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
89 owner = handle_table.Get<Kernel::Thread>(owner_handle); 88 owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -316,7 +315,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
316 315
317 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 316 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
318 if (mutex_wait_address != 0) { 317 if (mutex_wait_address != 0) {
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); 318 const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
319 list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
320 } else { 320 } else {
321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); 321 list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
322 } 322 }
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 365c3dbfe..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,6 +17,7 @@
17class EmuThread; 17class EmuThread;
18 18
19namespace Kernel { 19namespace Kernel {
20class HandleTable;
20class ReadableEvent; 21class ReadableEvent;
21class WaitObject; 22class WaitObject;
22class Thread; 23class Thread;
@@ -72,7 +73,7 @@ public:
72class WaitTreeMutexInfo : public WaitTreeExpandableItem { 73class WaitTreeMutexInfo : public WaitTreeExpandableItem {
73 Q_OBJECT 74 Q_OBJECT
74public: 75public:
75 explicit WaitTreeMutexInfo(VAddr mutex_address); 76 explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
76 ~WaitTreeMutexInfo() override; 77 ~WaitTreeMutexInfo() override;
77 78
78 QString GetText() const override; 79 QString GetText() const override;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 1d460c189..41ba3c4c6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,6 +11,7 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
@@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
339 .arg(QString::fromStdString(std::to_string(key_code)))); 340 .arg(QString::fromStdString(std::to_string(key_code))));
340 }; 341 };
341 342
343 QMessageBox::information(
344 this, tr("Exit"),
345 tr("To exit the web application, use the game provided controls to select exit, select the "
346 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
347
342 bool running_exit_check = false; 348 bool running_exit_check = false;
343 while (!finished) { 349 while (!finished) {
344 QApplication::processEvents(); 350 QApplication::processEvents();
@@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() {
522 Qt::ApplicationShortcut); 528 Qt::ApplicationShortcut);
523 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", 529 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
524 QKeySequence(QKeySequence::Print)); 530 QKeySequence(QKeySequence::Print));
531 hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10));
525 532
526 hotkey_registry.LoadHotkeys(); 533 hotkey_registry.LoadHotkeys();
527 534
@@ -561,7 +568,10 @@ void GMainWindow::InitializeHotkeys() {
561 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 568 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
562 UpdateStatusBar(); 569 UpdateStatusBar();
563 }); 570 });
564 constexpr u16 SPEED_LIMIT_STEP = 5; 571 // TODO: Remove this comment/static whenever the next major release of
572 // MSVC occurs and we make it a requirement (see:
573 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
574 static constexpr u16 SPEED_LIMIT_STEP = 5;
565 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), 575 connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
566 &QShortcut::activated, this, [&] { 576 &QShortcut::activated, this, [&] {
567 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 577 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -588,6 +598,12 @@ void GMainWindow::InitializeHotkeys() {
588 OnCaptureScreenshot(); 598 OnCaptureScreenshot();
589 } 599 }
590 }); 600 });
601 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
602 &QShortcut::activated, this, [&] {
603 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
604 OnDockedModeChanged(!Settings::values.use_docked_mode,
605 Settings::values.use_docked_mode);
606 });
591} 607}
592 608
593void GMainWindow::SetDefaultUIGeometry() { 609void GMainWindow::SetDefaultUIGeometry() {
@@ -846,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
846 } 862 }
847 game_path = filename; 863 game_path = filename;
848 864
849 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 865 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
850 return true; 866 return true;
851} 867}
852 868
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ff05b3179..32e78049c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -346,7 +346,7 @@ void Config::ReadValues() {
346 346
347 // Renderer 347 // Renderer
348 Settings::values.resolution_factor = 348 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 349 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -354,17 +354,20 @@ void Config::ReadValues() {
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
355 Settings::values.use_accurate_gpu_emulation = 355 Settings::values.use_accurate_gpu_emulation =
356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
357 Settings::values.use_asynchronous_gpu_emulation =
358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
357 359
358 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 360 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
359 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 361 Settings::values.bg_green =
360 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 362 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
363 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
361 364
362 // Audio 365 // Audio
363 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 366 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
364 Settings::values.enable_audio_stretching = 367 Settings::values.enable_audio_stretching =
365 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 368 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
366 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 369 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
367 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 370 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
368 371
369 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 372 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
370 373
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index a81986f8e..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -118,6 +118,10 @@ use_disk_shader_cache =
118# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
119use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
120 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
121# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
122# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
123bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c34b5467f..c6c66a787 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -216,7 +216,7 @@ int main(int argc, char** argv) {
216 } 216 }
217 } 217 }
218 218
219 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
220 220
221 system.Renderer().Rasterizer().LoadDiskResources(); 221 system.Renderer().Rasterizer().LoadDiskResources();
222 222