summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml2
-rwxr-xr-x.travis/macos/build.sh2
-rw-r--r--CMakeModules/GenerateSCMRev.cmake1
-rw-r--r--src/audio_core/cubeb_sink.cpp6
-rw-r--r--src/audio_core/cubeb_sink.h2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/bit_field.h16
-rw-r--r--src/core/core.cpp27
-rw-r--r--src/core/core.h4
-rw-r--r--src/core/core_cpu.cpp9
-rw-r--r--src/core/core_cpu.h8
-rw-r--r--src/core/cpu_core_manager.cpp3
-rw-r--r--src/core/hle/ipc.h4
-rw-r--r--src/core/hle/ipc_helpers.h2
-rw-r--r--src/core/hle/kernel/client_session.cpp14
-rw-r--r--src/core/hle/kernel/client_session.h9
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp22
-rw-r--r--src/core/hle/kernel/hle_ipc.h25
-rw-r--r--src/core/hle/kernel/scheduler.cpp14
-rw-r--r--src/core/hle/kernel/scheduler.h6
-rw-r--r--src/core/hle/kernel/server_session.cpp91
-rw-r--r--src/core/hle/kernel/server_session.h53
-rw-r--r--src/core/hle/kernel/shared_memory.cpp5
-rw-r--r--src/core/hle/kernel/svc.cpp1
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h3
-rw-r--r--src/core/hle/service/audio/audout_u.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp2
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/memory.cpp8
-rw-r--r--src/core/settings.cpp3
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/video_core/CMakeLists.txt11
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_dma.cpp4
-rw-r--r--src/video_core/engines/shader_bytecode.h25
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h57
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp152
-rw-r--r--src/video_core/gpu_thread.h133
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp156
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp56
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp13
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/texture.cpp534
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/decoders.h18
-rw-r--r--src/yuzu/applets/web_browser.cpp2
-rw-r--r--src/yuzu/bootmanager.cpp10
-rw-r--r--src/yuzu/compatdb.cpp4
-rw-r--r--src/yuzu/configuration/config.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu/main.cpp15
-rw-r--r--src/yuzu_cmd/config.cpp13
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
79 files changed, 2139 insertions, 943 deletions
diff --git a/.travis.yml b/.travis.yml
index b0fbe3c5f..9512f7843 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
24 - os: osx 24 - os: osx
25 env: NAME="macos build" 25 env: NAME="macos build"
26 sudo: false 26 sudo: false
27 osx_image: xcode10 27 osx_image: xcode10.1
28 install: "./.travis/macos/deps.sh" 28 install: "./.travis/macos/deps.sh"
29 script: "./.travis/macos/build.sh" 29 script: "./.travis/macos/build.sh"
30 after_success: "./.travis/macos/upload.sh" 30 after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index 4a14837fc..b7b4c6f8c 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@
2 2
3set -o pipefail 3set -o pipefail
4 4
5export MACOSX_DEPLOYMENT_TARGET=10.13 5export MACOSX_DEPLOYMENT_TARGET=10.14
6export Qt5_DIR=$(brew --prefix)/opt/qt5 6export Qt5_DIR=$(brew --prefix)/opt/qt5
7export UNICORNDIR=$(pwd)/externals/unicorn 7export UNICORNDIR=$(pwd)/externals/unicorn
8export PATH="/usr/local/opt/ccache/libexec:$PATH" 8export PATH="/usr/local/opt/ccache/libexec:$PATH"
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 78728e08b..08315a1f1 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 73 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp" 75 "${VIDEO_CORE}/shader/decode/memory.cpp"
76 "${VIDEO_CORE}/shader/decode/texture.cpp"
76 "${VIDEO_CORE}/shader/decode/other.cpp" 77 "${VIDEO_CORE}/shader/decode/other.cpp"
77 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 78 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
78 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 79 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 1da0b9f2a..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,7 +12,7 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _MSC_VER 15#ifdef _WIN32
16#include <objbase.h> 16#include <objbase.h>
17#endif 17#endif
18 18
@@ -113,7 +113,7 @@ private:
113 113
114CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows 115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _MSC_VER 116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); 117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif 118#endif
119 119
@@ -152,7 +152,7 @@ CubebSink::~CubebSink() {
152 152
153 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154 154
155#ifdef _MSC_VER 155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) { 156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize(); 157 CoUninitialize();
158 } 158 }
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index 511df7bb1..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -26,7 +26,7 @@ private:
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28 28
29#ifdef _MSC_VER 29#ifdef _WIN32
30 u32 com_init_result = 0; 30 u32 com_init_result = 0;
31#endif 31#endif
32}; 32};
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bdd885273..3d30f0e3e 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp" 51 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..7433c39ba 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
111template <std::size_t Position, std::size_t Bits, typename T> 111template <std::size_t Position, std::size_t Bits, typename T>
112struct BitField { 112struct BitField {
113private: 113private:
114 // We hide the copy assigment operator here, because the default copy
115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable.
118 constexpr BitField& operator=(const BitField&) = default;
119
120 // UnderlyingType is T for non-enum types and the underlying type of T if 114 // UnderlyingType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 115 // T is an enumeration. Note that T is wrapped within an enable_if in the
122 // former case to workaround compile errors which arise when using 116 // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
163 BitField(T val) = delete; 157 BitField(T val) = delete;
164 BitField& operator=(T val) = delete; 158 BitField& operator=(T val) = delete;
165 159
166 // Force default constructor to be created 160 constexpr BitField() noexcept = default;
167 // so that we can use this within unions 161
168 constexpr BitField() = default; 162 constexpr BitField(const BitField&) noexcept = default;
163 constexpr BitField& operator=(const BitField&) noexcept = default;
164
165 constexpr BitField(BitField&&) noexcept = default;
166 constexpr BitField& operator=(BitField&&) noexcept = default;
169 167
170 constexpr FORCE_INLINE operator T() const { 168 constexpr FORCE_INLINE operator T() const {
171 return Value(); 169 return Value();
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 6dda20faa..eba2177d1 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
36#include "frontend/applets/software_keyboard.h" 36#include "frontend/applets/software_keyboard.h"
37#include "frontend/applets/web_browser.h" 37#include "frontend/applets/web_browser.h"
38#include "video_core/debug_utils/debug_utils.h" 38#include "video_core/debug_utils/debug_utils.h"
39#include "video_core/gpu.h" 39#include "video_core/gpu_asynch.h"
40#include "video_core/gpu_synch.h"
40#include "video_core/renderer_base.h" 41#include "video_core/renderer_base.h"
41#include "video_core/video_core.h" 42#include "video_core/video_core.h"
42 43
@@ -129,10 +130,16 @@ struct System::Impl {
129 return ResultStatus::ErrorVideoCore; 130 return ResultStatus::ErrorVideoCore;
130 } 131 }
131 132
132 gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); 133 is_powered_on = true;
134
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
137 } else {
138 gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
139 }
133 140
134 cpu_core_manager.Initialize(system); 141 cpu_core_manager.Initialize(system);
135 is_powered_on = true; 142
136 LOG_DEBUG(Core, "Initialized OK"); 143 LOG_DEBUG(Core, "Initialized OK");
137 144
138 // Reset counters and set time origin to current frame 145 // Reset counters and set time origin to current frame
@@ -183,13 +190,13 @@ struct System::Impl {
183 190
184 void Shutdown() { 191 void Shutdown() {
185 // Log last frame performance stats 192 // Log last frame performance stats
186 auto perf_results = GetAndResetPerfStats(); 193 const auto perf_results = GetAndResetPerfStats();
187 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 194 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
188 perf_results.emulation_speed * 100.0); 195 perf_results.emulation_speed * 100.0);
189 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 196 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
190 perf_results.game_fps); 197 perf_results.game_fps);
191 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 198 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
192 perf_results.frametime * 1000.0); 199 perf_results.frametime * 1000.0);
193 200
194 is_powered_on = false; 201 is_powered_on = false;
195 202
diff --git a/src/core/core.h b/src/core/core.h
index d720013f7..ba76a41d8 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
293 return System::GetInstance().CurrentArmInterface(); 293 return System::GetInstance().CurrentArmInterface();
294} 294}
295 295
296inline TelemetrySession& Telemetry() {
297 return System::GetInstance().TelemetrySession();
298}
299
300inline Kernel::Process* CurrentProcess() { 296inline Kernel::Process* CurrentProcess() {
301 return System::GetInstance().CurrentProcess(); 297 return System::GetInstance().CurrentProcess();
302} 298}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 54aa21a3a..1eefed6d0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 CpuBarrier& cpu_barrier, std::size_t core_index) 54 std::size_t core_index)
54 : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
55 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
56#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
57 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
@@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
63 arm_interface = std::make_unique<ARM_Unicorn>(core_timing); 64 arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
64 } 65 }
65 66
66 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
67} 68}
68 69
69Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index e2204c6b0..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
15class Scheduler; 15class Scheduler;
16} 16}
17 17
18namespace Core {
19class System;
20}
21
18namespace Core::Timing { 22namespace Core::Timing {
19class CoreTiming; 23class CoreTiming;
20} 24}
@@ -45,8 +49,8 @@ private:
45 49
46class Cpu { 50class Cpu {
47public: 51public:
48 Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
49 CpuBarrier& cpu_barrier, std::size_t core_index); 53 std::size_t core_index);
50 ~Cpu(); 54 ~Cpu();
51 55
52 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 2ddb3610d..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
32 } 31 }
33 32
34 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..455d1f346 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/swap.h" 10#include "common/swap.h"
9#include "core/hle/kernel/errors.h"
10#include "core/memory.h"
11 11
12namespace IPC { 12namespace IPC {
13 13
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 90f276ee8..079283830 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -350,7 +350,7 @@ public:
350 template <class T> 350 template <class T>
351 std::shared_ptr<T> PopIpcInterface() { 351 std::shared_ptr<T> PopIpcInterface() {
352 ASSERT(context->Session()->IsDomain()); 352 ASSERT(context->Session()->IsDomain());
353 ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); 353 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); 354 return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
355 } 355 }
356}; 356};
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
17 // This destructor will be called automatically when the last ClientSession handle is closed by 17 // This destructor will be called automatically when the last ClientSession handle is closed by
18 // the emulated application. 18 // the emulated application.
19 19
20 // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they 20 // A local reference to the ServerSession is necessary to guarantee it
21 // will be kept alive until after ClientDisconnected() returns. 21 // will be kept alive until after ClientDisconnected() returns.
22 SharedPtr<ServerSession> server = parent->server; 22 SharedPtr<ServerSession> server = parent->server;
23 if (server) { 23 if (server) {
24 std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; 24 server->ClientDisconnected();
25 if (hle_handler)
26 hle_handler->ClientDisconnected(server);
27
28 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
29 // their WaitSynchronization result to 0xC920181A.
30
31 // Clean up the list of client threads with pending requests, they are unneeded now that the
32 // client endpoint is closed.
33 server->pending_requesting_threads.clear();
34 server->currently_handling = nullptr;
35 } 25 }
36 26
37 parent->client = nullptr; 27 parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..b1f39aad7 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:
36 36
37 ResultCode SendSyncRequest(SharedPtr<Thread> thread); 37 ResultCode SendSyncRequest(SharedPtr<Thread> thread);
38 38
39 std::string name; ///< Name of client port (optional) 39private:
40 explicit ClientSession(KernelCore& kernel);
41 ~ClientSession() override;
40 42
41 /// The parent session, which links to the server endpoint. 43 /// The parent session, which links to the server endpoint.
42 std::shared_ptr<Session> parent; 44 std::shared_ptr<Session> parent;
43 45
44private: 46 /// Name of the client session (optional)
45 explicit ClientSession(KernelCore& kernel); 47 std::string name;
46 ~ClientSession() override;
47}; 48};
48 49
49} // namespace Kernel 50} // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, 86void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
87 bool incoming) { 87 bool incoming) {
88 IPC::RequestParser rp(src_cmdbuf); 88 IPC::RequestParser rp(src_cmdbuf);
89 command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); 89 command_header = rp.PopRaw<IPC::CommandHeader>();
90 90
91 if (command_header->type == IPC::CommandType::Close) { 91 if (command_header->type == IPC::CommandType::Close) {
92 // Close does not populate the rest of the IPC header 92 // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
95 95
96 // If handle descriptor is present, add size of it 96 // If handle descriptor is present, add size of it
97 if (command_header->enable_handle_descriptor) { 97 if (command_header->enable_handle_descriptor) {
98 handle_descriptor_header = 98 handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
99 std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
100 if (handle_descriptor_header->send_current_pid) { 99 if (handle_descriptor_header->send_current_pid) {
101 rp.Skip(2, false); 100 rp.Skip(2, false);
102 } 101 }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
140 // If this is an incoming message, only CommandType "Request" has a domain header 139 // If this is an incoming message, only CommandType "Request" has a domain header
141 // All outgoing domain messages have the domain header, if only incoming has it 140 // All outgoing domain messages have the domain header, if only incoming has it
142 if (incoming || domain_message_header) { 141 if (incoming || domain_message_header) {
143 domain_message_header = 142 domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
144 std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
145 } else { 143 } else {
146 if (Session()->IsDomain()) 144 if (Session()->IsDomain()) {
147 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); 145 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
146 }
148 } 147 }
149 } 148 }
150 149
151 data_payload_header = 150 data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
152 std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
153 151
154 data_payload_offset = rp.GetCurrentOffset(); 152 data_payload_offset = rp.GetCurrentOffset();
155 153
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
264 // Write the domain objects to the command buffer, these go after the raw untranslated data. 262 // Write the domain objects to the command buffer, these go after the raw untranslated data.
265 // TODO(Subv): This completely ignores C buffers. 263 // TODO(Subv): This completely ignores C buffers.
266 std::size_t domain_offset = size - domain_message_header->num_objects; 264 std::size_t domain_offset = size - domain_message_header->num_objects;
267 auto& request_handlers = server_session->domain_request_handlers;
268 265
269 for (auto& object : domain_objects) { 266 for (const auto& object : domain_objects) {
270 request_handlers.emplace_back(object); 267 server_session->AppendDomainRequestHandler(object);
271 dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); 268 dst_cmdbuf[domain_offset++] =
269 static_cast<u32_le>(server_session->NumDomainRequestHandlers());
272 } 270 }
273 } 271 }
274 272
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <type_traits> 11#include <type_traits>
11#include <vector> 12#include <vector>
@@ -15,6 +16,8 @@
15#include "core/hle/ipc.h" 16#include "core/hle/ipc.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17 18
19union ResultCode;
20
18namespace Service { 21namespace Service {
19class ServiceFrameworkBase; 22class ServiceFrameworkBase;
20} 23}
@@ -166,12 +169,12 @@ public:
166 return buffer_c_desciptors; 169 return buffer_c_desciptors;
167 } 170 }
168 171
169 const IPC::DomainMessageHeader* GetDomainMessageHeader() const { 172 const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
170 return domain_message_header.get(); 173 return domain_message_header.value();
171 } 174 }
172 175
173 bool HasDomainMessageHeader() const { 176 bool HasDomainMessageHeader() const {
174 return domain_message_header != nullptr; 177 return domain_message_header.has_value();
175 } 178 }
176 179
177 /// Helper function to read a buffer using the appropriate buffer descriptor 180 /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
208 211
209 template <typename T> 212 template <typename T>
210 SharedPtr<T> GetCopyObject(std::size_t index) { 213 SharedPtr<T> GetCopyObject(std::size_t index) {
211 ASSERT(index < copy_objects.size()); 214 return DynamicObjectCast<T>(copy_objects.at(index));
212 return DynamicObjectCast<T>(copy_objects[index]);
213 } 215 }
214 216
215 template <typename T> 217 template <typename T>
216 SharedPtr<T> GetMoveObject(std::size_t index) { 218 SharedPtr<T> GetMoveObject(std::size_t index) {
217 ASSERT(index < move_objects.size()); 219 return DynamicObjectCast<T>(move_objects.at(index));
218 return DynamicObjectCast<T>(move_objects[index]);
219 } 220 }
220 221
221 void AddMoveObject(SharedPtr<Object> object) { 222 void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
232 233
233 template <typename T> 234 template <typename T>
234 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { 235 std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
235 return std::static_pointer_cast<T>(domain_request_handlers[index]); 236 return std::static_pointer_cast<T>(domain_request_handlers.at(index));
236 } 237 }
237 238
238 void SetDomainRequestHandlers( 239 void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
272 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; 273 boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
273 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; 274 boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
274 275
275 std::shared_ptr<IPC::CommandHeader> command_header; 276 std::optional<IPC::CommandHeader> command_header;
276 std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; 277 std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
277 std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; 278 std::optional<IPC::DataPayloadHeader> data_payload_header;
278 std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; 279 std::optional<IPC::DomainMessageHeader> domain_message_header;
279 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; 280 std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
280 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; 281 std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
281 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; 282 std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 44f30d070..5fccfd9f4 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {
61 62
62void Scheduler::SwitchContext(Thread* new_thread) { 63void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 64 Thread* const previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 65 Process* const previous_process = system.Kernel().CurrentProcess();
65 66
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 67 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 68
@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
94 95
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 96 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 97 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 98 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); 99 SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
99 } 100 }
100 101
101 cpu_core.LoadContext(new_thread->GetContext()); 102 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 112
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 113void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 114 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); 115 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 116 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 117
117 if (thread != nullptr) { 118 if (thread != nullptr) {
@@ -223,8 +224,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 224 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 225 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 226 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 227 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 228
229 // If scheduler provides a suggested thread 229 // If scheduler provides a suggested thread
230 if (res != nullptr) { 230 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..1c5bf57d9 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..4d8a337a7 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
63 pending_requesting_threads.pop_back(); 63 pending_requesting_threads.pop_back();
64} 64}
65 65
66void ServerSession::ClientDisconnected() {
67 // We keep a shared pointer to the hle handler to keep it alive throughout
68 // the call to ClientDisconnected, as ClientDisconnected invalidates the
69 // hle_handler member itself during the course of the function executing.
70 std::shared_ptr<SessionRequestHandler> handler = hle_handler;
71 if (handler) {
72 // Note that after this returns, this server session's hle_handler is
73 // invalidated (set to null).
74 handler->ClientDisconnected(this);
75 }
76
77 // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
78 // their WaitSynchronization result to 0xC920181A.
79
80 // Clean up the list of client threads with pending requests, they are unneeded now that the
81 // client endpoint is closed.
82 pending_requesting_threads.clear();
83 currently_handling = nullptr;
84}
85
86void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
87 domain_request_handlers.push_back(std::move(handler));
88}
89
90std::size_t ServerSession::NumDomainRequestHandlers() const {
91 return domain_request_handlers.size();
92}
93
66ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { 94ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
67 auto* const domain_message_header = context.GetDomainMessageHeader(); 95 if (!context.HasDomainMessageHeader()) {
68 if (domain_message_header) { 96 return RESULT_SUCCESS;
69 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs 97 }
70 context.SetDomainRequestHandlers(domain_request_handlers); 98
71 99 // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
72 // If there is a DomainMessageHeader, then this is CommandType "Request" 100 context.SetDomainRequestHandlers(domain_request_handlers);
73 const u32 object_id{context.GetDomainMessageHeader()->object_id}; 101
74 switch (domain_message_header->command) { 102 // If there is a DomainMessageHeader, then this is CommandType "Request"
75 case IPC::DomainMessageHeader::CommandType::SendMessage: 103 const auto& domain_message_header = context.GetDomainMessageHeader();
76 if (object_id > domain_request_handlers.size()) { 104 const u32 object_id{domain_message_header.object_id};
77 LOG_CRITICAL(IPC, 105 switch (domain_message_header.command) {
78 "object_id {} is too big! This probably means a recent service call " 106 case IPC::DomainMessageHeader::CommandType::SendMessage:
79 "to {} needed to return a new interface!", 107 if (object_id > domain_request_handlers.size()) {
80 object_id, name); 108 LOG_CRITICAL(IPC,
81 UNREACHABLE(); 109 "object_id {} is too big! This probably means a recent service call "
82 return RESULT_SUCCESS; // Ignore error if asserts are off 110 "to {} needed to return a new interface!",
83 } 111 object_id, name);
84 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); 112 UNREACHABLE();
85 113 return RESULT_SUCCESS; // Ignore error if asserts are off
86 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
87 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
88
89 domain_request_handlers[object_id - 1] = nullptr;
90
91 IPC::ResponseBuilder rb{context, 2};
92 rb.Push(RESULT_SUCCESS);
93 return RESULT_SUCCESS;
94 }
95 } 114 }
115 return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
96 116
97 LOG_CRITICAL(IPC, "Unknown domain command={}", 117 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
98 static_cast<int>(domain_message_header->command.Value())); 118 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
99 ASSERT(false); 119
120 domain_request_handlers[object_id - 1] = nullptr;
121
122 IPC::ResponseBuilder rb{context, 2};
123 rb.Push(RESULT_SUCCESS);
124 return RESULT_SUCCESS;
125 }
100 } 126 }
101 127
128 LOG_CRITICAL(IPC, "Unknown domain command={}",
129 static_cast<int>(domain_message_header.command.Value()));
130 ASSERT(false);
102 return RESULT_SUCCESS; 131 return RESULT_SUCCESS;
103} 132}
104 133
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..aea4ccfeb 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
46 return HANDLE_TYPE; 46 return HANDLE_TYPE;
47 } 47 }
48 48
49 Session* GetParent() {
50 return parent.get();
51 }
52
53 const Session* GetParent() const {
54 return parent.get();
55 }
56
49 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 57 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
50 58
51 /** 59 /**
@@ -78,23 +86,16 @@ public:
78 86
79 void Acquire(Thread* thread) override; 87 void Acquire(Thread* thread) override;
80 88
81 std::string name; ///< The name of this session (optional) 89 /// Called when a client disconnection occurs.
82 std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. 90 void ClientDisconnected();
83 std::shared_ptr<SessionRequestHandler>
84 hle_handler; ///< This session's HLE request handler (applicable when not a domain)
85 91
86 /// This is the list of domain request handlers (after conversion to a domain) 92 /// Adds a new domain request handler to the collection of request handlers within
87 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; 93 /// this ServerSession instance.
88 94 void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
89 /// List of threads that are pending a response after a sync request. This list is processed in
90 /// a LIFO manner, thus, the last request will be dispatched first.
91 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
92 std::vector<SharedPtr<Thread>> pending_requesting_threads;
93 95
94 /// Thread whose request is currently being handled. A request is considered "handled" when a 96 /// Retrieves the total number of domain request handlers that have been
95 /// response is sent via svcReplyAndReceive. 97 /// appended to this ServerSession instance.
96 /// TODO(Subv): Find a better name for this. 98 std::size_t NumDomainRequestHandlers() const;
97 SharedPtr<Thread> currently_handling;
98 99
99 /// Returns true if the session has been converted to a domain, otherwise False 100 /// Returns true if the session has been converted to a domain, otherwise False
100 bool IsDomain() const { 101 bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
129 /// object handle. 130 /// object handle.
130 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); 131 ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
131 132
133 /// The parent session, which links to the client endpoint.
134 std::shared_ptr<Session> parent;
135
136 /// This session's HLE request handler (applicable when not a domain)
137 std::shared_ptr<SessionRequestHandler> hle_handler;
138
139 /// This is the list of domain request handlers (after conversion to a domain)
140 std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
141
142 /// List of threads that are pending a response after a sync request. This list is processed in
143 /// a LIFO manner, thus, the last request will be dispatched first.
144 /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
145 std::vector<SharedPtr<Thread>> pending_requesting_threads;
146
147 /// Thread whose request is currently being handled. A request is considered "handled" when a
148 /// response is sent via svcReplyAndReceive.
149 /// TODO(Subv): Find a better name for this.
150 SharedPtr<Thread> currently_handling;
151
132 /// When set to True, converts the session to a domain at the end of the command 152 /// When set to True, converts the session to a domain at the end of the command
133 bool convert_to_domain{}; 153 bool convert_to_domain{};
154
155 /// The name of this session (optional)
156 std::string name;
134}; 157};
135 158
136} // namespace Kernel 159} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..62861da36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
34 shared_memory->backing_block_offset = 0; 33 shared_memory->backing_block_offset = 0;
35 34
36 // Refresh the address mappings for the current process. 35 // Refresh the address mappings for the current process.
37 if (Core::CurrentProcess() != nullptr) { 36 if (kernel.CurrentProcess() != nullptr) {
38 Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( 37 kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
39 shared_memory->backing_block.get()); 38 shared_memory->backing_block.get());
40 } 39 }
41 } else { 40 } else {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 75b88a333..7f5c0cc86 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/address_arbiter.h" 20#include "core/hle/kernel/address_arbiter.h"
21#include "core/hle/kernel/client_port.h" 21#include "core/hle/kernel/client_port.h"
22#include "core/hle/kernel/client_session.h" 22#include "core/hle/kernel/client_session.h"
23#include "core/hle/kernel/errors.h"
23#include "core/hle/kernel/handle_table.h" 24#include "core/hle/kernel/handle_table.h"
24#include "core/hle/kernel/kernel.h" 25#include "core/hle/kernel/kernel.h"
25#include "core/hle/kernel/mutex.h" 26#include "core/hle/kernel/mutex.h"
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..1ed144481 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,7 +8,6 @@
8#include <utility> 8#include <utility>
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13 12
14// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes 13// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
7#include "common/string_util.h" 7#include "common/string_util.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/frontend/applets/software_keyboard.h" 9#include "core/frontend/applets/software_keyboard.h"
10#include "core/hle/result.h"
10#include "core/hle/service/am/am.h" 11#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/software_keyboard.h" 12#include "core/hle/service/am/applets/software_keyboard.h"
12 13
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h"
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/service/am/am.h" 14#include "core/hle/service/am/am.h"
14#include "core/hle/service/am/applets/applets.h" 15#include "core/hle/service/am/applets/applets.h"
15 16
17union ResultCode;
18
16namespace Service::AM::Applets { 19namespace Service::AM::Applets {
17 20
18enum class KeysetDisable : u32 { 21enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index bbe813490..21f5e64c7 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -107,7 +107,9 @@ private:
107 void StopAudioOut(Kernel::HLERequestContext& ctx) { 107 void StopAudioOut(Kernel::HLERequestContext& ctx) {
108 LOG_DEBUG(Service_Audio, "called"); 108 LOG_DEBUG(Service_Audio, "called");
109 109
110 audio_core.StopStream(stream); 110 if (stream->IsPlaying()) {
111 audio_core.StopStream(stream);
112 }
111 113
112 IPC::ResponseBuilder rb{ctx, 2}; 114 IPC::ResponseBuilder rb{ctx, 2};
113 rb.Push(RESULT_SUCCESS); 115 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index dbe7ee6e8..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
36 36
37 auto& instance = Core::System::GetInstance(); 37 auto& instance = Core::System::GetInstance();
38 instance.GetPerfStats().EndGameFrame(); 38 instance.GetPerfStats().EndGameFrame();
39 instance.Renderer().SwapBuffers(framebuffer); 39 instance.GPU().SwapBuffers(framebuffer);
40} 40}
41 41
42} // namespace Service::Nvidia::Devices 42} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..a34b9e753 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
178 auto& gpu = system_instance.GPU(); 178 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); 179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr); 180 ASSERT(cpu_addr);
181 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); 181 gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
182 182
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184 184
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
136 return 0; 136 return 0;
137} 137}
138 138
139static void PushGPUEntries(Tegra::CommandList&& entries) {
140 if (entries.empty()) {
141 return;
142 }
143
144 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
145 dma_pusher.Push(std::move(entries));
146 dma_pusher.DispatchCalls();
147}
148
149u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
150 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
151 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
163 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
164 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
165 155
166 PushGPUEntries(std::move(entries)); 156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
167 157
168 params.fence_out.id = 0; 158 params.fence_out.id = 0;
169 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
184 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
185 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
186 176
187 PushGPUEntries(std::move(entries)); 177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
188 178
189 params.fence_out.id = 0; 179 params.fence_out.id = 0;
190 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 56f31e2ac..fc496b654 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {
186 186
187 // There was no queued buffer to draw, render previous frame 187 // There was no queued buffer to draw, render previous frame
188 system_instance.GetPerfStats().EndGameFrame(); 188 system_instance.GetPerfStats().EndGameFrame();
189 system_instance.Renderer().SwapBuffers({}); 189 system_instance.GPU().SwapBuffers({});
190 continue; 190 continue;
191 } 191 }
192 192
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
30 30
31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 31 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
32 rb.Push(RESULT_SUCCESS); 32 rb.Push(RESULT_SUCCESS);
33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; 33 Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
34 rb.PushMoveObjects(session); 34 rb.PushMoveObjects(session);
35 35
36 LOG_DEBUG(Service, "session={}", session->GetObjectId()); 36 LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index ec279cef8..6591c45d2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
356 const VAddr overlap_end = std::min(end, region_end); 356 const VAddr overlap_end = std::min(end, region_end);
357 const VAddr overlap_size = overlap_end - overlap_start; 357 const VAddr overlap_size = overlap_end - overlap_start;
358 358
359 auto& rasterizer = system_instance.Renderer().Rasterizer(); 359 auto& gpu = system_instance.GPU();
360 switch (mode) { 360 switch (mode) {
361 case FlushMode::Flush: 361 case FlushMode::Flush:
362 rasterizer.FlushRegion(overlap_start, overlap_size); 362 gpu.FlushRegion(overlap_start, overlap_size);
363 break; 363 break;
364 case FlushMode::Invalidate: 364 case FlushMode::Invalidate:
365 rasterizer.InvalidateRegion(overlap_start, overlap_size); 365 gpu.InvalidateRegion(overlap_start, overlap_size);
366 break; 366 break;
367 case FlushMode::FlushAndInvalidate: 367 case FlushMode::FlushAndInvalidate:
368 rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); 368 gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
369 break; 369 break;
370 } 370 }
371 }; 371 };
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6dd3139cc 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 98 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 99 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 100 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index 7e76e0466..cdfb2f742 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
393 u16 frame_limit; 393 u16 frame_limit;
394 bool use_disk_shader_cache; 394 bool use_disk_shader_cache;
395 bool use_accurate_gpu_emulation; 395 bool use_accurate_gpu_emulation;
396 bool use_asynchronous_gpu_emulation;
396 397
397 float bg_red; 398 float bg_red;
398 float bg_green; 399 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 58dfcc4df..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
162 Settings::values.use_disk_shader_cache); 162 Settings::values.use_disk_shader_cache);
163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
164 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
166 Settings::values.use_asynchronous_gpu_emulation);
165 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 167 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
166 Settings::values.use_docked_mode); 168 Settings::values.use_docked_mode);
167} 169}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3e9d2b3be..0c3038c52 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
17 engines/shader_header.h 17 engines/shader_header.h
18 gpu.cpp 18 gpu.cpp
19 gpu.h 19 gpu.h
20 gpu_asynch.cpp
21 gpu_asynch.h
22 gpu_synch.cpp
23 gpu_synch.h
24 gpu_thread.cpp
25 gpu_thread.h
20 macro_interpreter.cpp 26 macro_interpreter.cpp
21 macro_interpreter.h 27 macro_interpreter.h
22 memory_manager.cpp 28 memory_manager.cpp
@@ -74,6 +80,7 @@ add_library(video_core STATIC
74 shader/decode/hfma2.cpp 80 shader/decode/hfma2.cpp
75 shader/decode/conversion.cpp 81 shader/decode/conversion.cpp
76 shader/decode/memory.cpp 82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
77 shader/decode/float_set_predicate.cpp 84 shader/decode/float_set_predicate.cpp
78 shader/decode/integer_set_predicate.cpp 85 shader/decode/integer_set_predicate.cpp
79 shader/decode/half_set_predicate.cpp 86 shader/decode/half_set_predicate.cpp
@@ -94,6 +101,8 @@ add_library(video_core STATIC
94 surface.h 101 surface.h
95 textures/astc.cpp 102 textures/astc.cpp
96 textures/astc.h 103 textures/astc.h
104 textures/convert.cpp
105 textures/convert.h
97 textures/decoders.cpp 106 textures/decoders.cpp
98 textures/decoders.h 107 textures/decoders.h
99 textures/texture.h 108 textures/texture.h
@@ -104,6 +113,8 @@ add_library(video_core STATIC
104if (ENABLE_VULKAN) 113if (ENABLE_VULKAN)
105 target_sources(video_core PRIVATE 114 target_sources(video_core PRIVATE
106 renderer_vulkan/declarations.h 115 renderer_vulkan/declarations.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
107 renderer_vulkan/vk_buffer_cache.cpp 118 renderer_vulkan/vk_buffer_cache.cpp
108 renderer_vulkan/vk_buffer_cache.h 119 renderer_vulkan/vk_buffer_cache.h
109 renderer_vulkan/vk_device.cpp 120 renderer_vulkan/vk_device.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 669541b4b..bff1a37ff 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
39 } 39 }
40 40
41 const CommandList& command_list{dma_pushbuffer.front()}; 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 42 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr; 43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); 44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main; 45 bool non_main = command_list_header.is_non_main;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4f6126116..aae2a4019 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
48 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 48 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
49 // We do this before actually writing the new data because the destination address might contain 49 // We do this before actually writing the new data because the destination address might contain
50 // a dirty surface that will have to be written back to memory. 50 // a dirty surface that will have to be written back to memory.
51 rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); 51 Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
52 52
53 Memory::Write32(*dest_address, data); 53 Memory::Write32(*dest_address, data);
54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 0474c7ba3..9dfea5999 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
92 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 92 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
93 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 93 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
94 // copying. 94 // copying.
95 rasterizer.FlushRegion(*source_cpu, src_size); 95 Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
96 96
97 // We have to invalidate the destination region to evict any outdated surfaces from the 97 // We have to invalidate the destination region to evict any outdated surfaces from the
98 // cache. We do this before actually writing the new data because the destination address 98 // cache. We do this before actually writing the new data because the destination address
99 // might contain a dirty surface that will have to be written back to memory. 99 // might contain a dirty surface that will have to be written back to memory.
100 rasterizer.InvalidateRegion(*dest_cpu, dst_size); 100 Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
101 }; 101 };
102 102
103 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 103 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c7eb15b6a..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -324,11 +324,11 @@ enum class TextureQueryType : u64 {
324 324
325enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
326 None = 0, 326 None = 0,
327 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
328 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
329 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
332}; 332};
333 333
334enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -1445,6 +1445,7 @@ public:
1445 Flow, 1445 Flow,
1446 Synch, 1446 Synch,
1447 Memory, 1447 Memory,
1448 Texture,
1448 FloatSet, 1449 FloatSet,
1449 FloatSetPredicate, 1450 FloatSetPredicate,
1450 IntegerSet, 1451 IntegerSet,
@@ -1575,14 +1576,14 @@ private:
1575 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1576 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1577 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1578 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1579 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1579 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1580 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1580 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1581 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1581 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1582 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1582 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1583 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1583 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1584 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1584 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1585 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1585 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1586 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1586 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1587 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1588 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ac30d1a89..08abf8ac9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
14#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 15#include "video_core/renderer_base.h"
16 16
17namespace Tegra { 17namespace Tegra {
18 18
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
28 UNREACHABLE(); 28 UNREACHABLE();
29} 29}
30 30
31GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { 31GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
32 auto& rasterizer{renderer.Rasterizer()};
32 memory_manager = std::make_unique<Tegra::MemoryManager>(); 33 memory_manager = std::make_unique<Tegra::MemoryManager>();
33 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 34 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
34 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 35 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 6313702f2..56a203275 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,8 +16,8 @@ class System;
16} 16}
17 17
18namespace VideoCore { 18namespace VideoCore {
19class RasterizerInterface; 19class RendererBase;
20} 20} // namespace VideoCore
21 21
22namespace Tegra { 22namespace Tegra {
23 23
@@ -119,10 +119,11 @@ enum class EngineID {
119 MAXWELL_DMA_COPY_A = 0xB0B5, 119 MAXWELL_DMA_COPY_A = 0xB0B5,
120}; 120};
121 121
122class GPU final { 122class GPU {
123public: 123public:
124 explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); 124 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
125 ~GPU(); 125
126 virtual ~GPU();
126 127
127 struct MethodCall { 128 struct MethodCall {
128 u32 method{}; 129 u32 method{};
@@ -200,8 +201,42 @@ public:
200 }; 201 };
201 } regs{}; 202 } regs{};
202 203
204 /// Push GPU command entries to be processed
205 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
206
207 /// Swap buffers (render frame)
208 virtual void SwapBuffers(
209 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
210
211 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
212 virtual void FlushRegion(VAddr addr, u64 size) = 0;
213
214 /// Notify rasterizer that any caches of the specified region should be invalidated
215 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
216
217 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
218 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
219
203private: 220private:
221 void ProcessBindMethod(const MethodCall& method_call);
222 void ProcessSemaphoreTriggerMethod();
223 void ProcessSemaphoreRelease();
224 void ProcessSemaphoreAcquire();
225
226 /// Calls a GPU puller method.
227 void CallPullerMethod(const MethodCall& method_call);
228
229 /// Calls a GPU engine method.
230 void CallEngineMethod(const MethodCall& method_call);
231
232 /// Determines where the method should be executed.
233 bool ExecuteMethodOnEngine(const MethodCall& method_call);
234
235protected:
204 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 236 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
237 VideoCore::RendererBase& renderer;
238
239private:
205 std::unique_ptr<Tegra::MemoryManager> memory_manager; 240 std::unique_ptr<Tegra::MemoryManager> memory_manager;
206 241
207 /// Mapping of command subchannels to their bound engine ids. 242 /// Mapping of command subchannels to their bound engine ids.
@@ -217,18 +252,6 @@ private:
217 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 252 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
218 /// Inline memory engine 253 /// Inline memory engine
219 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 254 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
220
221 void ProcessBindMethod(const MethodCall& method_call);
222 void ProcessSemaphoreTriggerMethod();
223 void ProcessSemaphoreRelease();
224 void ProcessSemaphoreAcquire();
225
226 // Calls a GPU puller method.
227 void CallPullerMethod(const MethodCall& method_call);
228 // Calls a GPU engine method.
229 void CallEngineMethod(const MethodCall& method_call);
230 // Determines where the method should be executed.
231 bool ExecuteMethodOnEngine(const MethodCall& method_call);
232}; 255};
233 256
234#define ASSERT_REG_POSITION(field_name, position) \ 257#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..ad0a747e3
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h"
8
9namespace VideoCommon {
10
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
13
14GPUAsynch::~GPUAsynch() = default;
15
16void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
17 gpu_thread.SubmitList(std::move(entries));
18}
19
20void GPUAsynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 gpu_thread.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size);
27}
28
29void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size);
31}
32
33void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..e6a807aba
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace VideoCore {
11class RendererBase;
12} // namespace VideoCore
13
14namespace VideoCommon {
15
16namespace GPUThread {
17class ThreadManager;
18} // namespace GPUThread
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
24 ~GPUAsynch() override;
25
26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(VAddr addr, u64 size) override;
30 void InvalidateRegion(VAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
32
33private:
34 GPUThread::ThreadManager gpu_thread;
35};
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..4c00b96c7
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : Tegra::GPU(system, renderer) {}
12
13GPUSynch::~GPUSynch() = default;
14
15void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
16 dma_pusher->Push(std::move(entries));
17 dma_pusher->DispatchCalls();
18}
19
20void GPUSynch::SwapBuffers(
21 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
22 renderer.SwapBuffers(std::move(framebuffer));
23}
24
25void GPUSynch::FlushRegion(VAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size);
27}
28
29void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size);
31}
32
33void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35}
36
37} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..7d5a241ff
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace VideoCore {
10class RendererBase;
11} // namespace VideoCore
12
13namespace VideoCommon {
14
15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU {
17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override;
20
21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(VAddr addr, u64 size) override;
25 void InvalidateRegion(VAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
27};
28
29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..c5bdd2a17
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,152 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h"
8#include "core/settings.h"
9#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h"
12#include "video_core/renderer_base.h"
13
14namespace VideoCommon::GPUThread {
15
16/// Executes a single GPU thread command
17static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
18 Tegra::DmaPusher& dma_pusher) {
19 if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
20 dma_pusher.Push(std::move(submit_list->entries));
21 dma_pusher.DispatchCalls();
22 } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
23 renderer.SwapBuffers(data->framebuffer);
24 } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
25 renderer.Rasterizer().FlushRegion(data->addr, data->size);
26 } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
27 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
28 } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
29 renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
30 } else {
31 UNREACHABLE();
32 }
33}
34
35/// Runs the GPU thread
36static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
37 SynchState& state) {
38
39 MicroProfileOnThreadCreate("GpuThread");
40
41 auto WaitForWakeup = [&]() {
42 std::unique_lock<std::mutex> lock{state.signal_mutex};
43 state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
44 };
45
46 // Wait for first GPU command before acquiring the window context
47 WaitForWakeup();
48
49 // If emulation was stopped during disk shader loading, abort before trying to acquire context
50 if (!state.is_running) {
51 return;
52 }
53
54 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
55
56 while (state.is_running) {
57 if (!state.is_running) {
58 return;
59 }
60
61 {
62 // Thread has been woken up, so make the previous write queue the next read queue
63 std::lock_guard<std::mutex> lock{state.signal_mutex};
64 std::swap(state.push_queue, state.pop_queue);
65 }
66
67 // Execute all of the GPU commands
68 while (!state.pop_queue->empty()) {
69 ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
70 state.pop_queue->pop();
71 }
72
73 state.UpdateIdleState();
74
75 // Signal that the GPU thread has finished processing commands
76 if (state.is_idle) {
77 state.idle_condition.notify_one();
78 }
79
80 // Wait for CPU thread to send more GPU commands
81 WaitForWakeup();
82 }
83}
84
85ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
86 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
87 std::ref(dma_pusher), std::ref(state)},
88 thread_id{thread.get_id()} {}
89
90ThreadManager::~ThreadManager() {
91 {
92 // Notify GPU thread that a shutdown is pending
93 std::lock_guard<std::mutex> lock{state.signal_mutex};
94 state.is_running = false;
95 }
96
97 state.signal_condition.notify_one();
98 thread.join();
99}
100
101void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
102 if (entries.empty()) {
103 return;
104 }
105
106 PushCommand(SubmitListCommand(std::move(entries)), false, false);
107}
108
109void ThreadManager::SwapBuffers(
110 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
111 PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
112}
113
114void ThreadManager::FlushRegion(VAddr addr, u64 size) {
115 // Block the CPU when using accurate emulation
116 PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
117}
118
119void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
120 PushCommand(InvalidateRegionCommand(addr, size), true, true);
121}
122
123void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
124 InvalidateRegion(addr, size);
125}
126
127void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
128 {
129 std::lock_guard<std::mutex> lock{state.signal_mutex};
130
131 if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
132 // Execute the command synchronously on the current thread
133 ExecuteCommand(&command_data, renderer, dma_pusher);
134 return;
135 }
136
137 // Push the command to the GPU thread
138 state.UpdateIdleState();
139 state.push_queue->emplace(command_data);
140 }
141
142 // Signal the GPU thread that commands are pending
143 state.signal_condition.notify_one();
144
145 if (wait_for_idle) {
146 // Wait for the GPU to be idle (all commands to be executed)
147 std::unique_lock<std::mutex> lock{state.idle_mutex};
148 state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
149 }
150}
151
152} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..edb148b14
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,133 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <thread>
14#include <variant>
15
16namespace Tegra {
17struct FramebufferConfig;
18class DmaPusher;
19} // namespace Tegra
20
21namespace VideoCore {
22class RendererBase;
23} // namespace VideoCore
24
25namespace VideoCommon::GPUThread {
26
27/// Command to signal to the GPU thread that a command list is ready for processing
28struct SubmitListCommand final {
29 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
30
31 Tegra::CommandList entries;
32};
33
34/// Command to signal to the GPU thread that a swap buffers is pending
35struct SwapBuffersCommand final {
36 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
37 : framebuffer{std::move(framebuffer)} {}
38
39 std::optional<const Tegra::FramebufferConfig> framebuffer;
40};
41
42/// Command to signal to the GPU thread to flush a region
43struct FlushRegionCommand final {
44 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
45
46 const VAddr addr;
47 const u64 size;
48};
49
50/// Command to signal to the GPU thread to invalidate a region
51struct InvalidateRegionCommand final {
52 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
53
54 const VAddr addr;
55 const u64 size;
56};
57
58/// Command to signal to the GPU thread to flush and invalidate a region
59struct FlushAndInvalidateRegionCommand final {
60 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
61 : addr{addr}, size{size} {}
62
63 const VAddr addr;
64 const u64 size;
65};
66
67using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
68 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
69
70/// Struct used to synchronize the GPU thread
71struct SynchState final {
72 std::atomic<bool> is_running{true};
73 std::atomic<bool> is_idle{true};
74 std::condition_variable signal_condition;
75 std::mutex signal_mutex;
76 std::condition_variable idle_condition;
77 std::mutex idle_mutex;
78
79 // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
80 // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
81 // empty. This allows for efficient thread-safe access, as it does not require any copies.
82
83 using CommandQueue = std::queue<CommandData>;
84 std::array<CommandQueue, 2> command_queues;
85 CommandQueue* push_queue{&command_queues[0]};
86 CommandQueue* pop_queue{&command_queues[1]};
87
88 void UpdateIdleState() {
89 std::lock_guard<std::mutex> lock{idle_mutex};
90 is_idle = command_queues[0].empty() && command_queues[1].empty();
91 }
92};
93
94/// Class used to manage the GPU thread
95class ThreadManager final {
96public:
97 explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
98 ~ThreadManager();
99
100 /// Push GPU command entries to be processed
101 void SubmitList(Tegra::CommandList&& entries);
102
103 /// Swap buffers (render frame)
104 void SwapBuffers(
105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
106
107 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
108 void FlushRegion(VAddr addr, u64 size);
109
110 /// Notify rasterizer that any caches of the specified region should be invalidated
111 void InvalidateRegion(VAddr addr, u64 size);
112
113 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
114 void FlushAndInvalidateRegion(VAddr addr, u64 size);
115
116private:
117 /// Pushes a command to be executed by the GPU thread
118 void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
119
120 /// Returns true if this is called by the GPU thread
121 bool IsGpuThread() const {
122 return std::this_thread::get_id() == thread_id;
123 }
124
125private:
126 SynchState state;
127 VideoCore::RendererBase& renderer;
128 Tegra::DmaPusher& dma_pusher;
129 std::thread thread;
130 std::thread::id thread_id;
131};
132
133} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 321d9dd3d..301562ff6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
118 118
119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
120 120
121 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 121 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
122 CheckExtensions(); 122 CheckExtensions();
123} 123}
124 124
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
177 continue; 177 continue;
178 178
179 const auto& buffer = regs.vertex_array[attrib.buffer]; 179 const auto& buffer = regs.vertex_array[attrib.buffer];
180 LOG_TRACE(HW_GPU, 180 LOG_TRACE(Render_OpenGL,
181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
183 attrib.offset.Value(), attrib.IsNormalized()); 183 attrib.offset.Value(), attrib.IsNormalized());
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 shader_program_manager->UseProgrammableFragmentShader(program_handle); 343 shader_program_manager->UseProgrammableFragmentShader(program_handle);
344 break; 344 break;
345 default: 345 default:
346 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 346 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
347 shader_config.enable.Value(), shader_config.offset); 347 shader_config.enable.Value(), shader_config.offset);
348 UNREACHABLE();
349 } 348 }
350 349
351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 350 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -749,11 +748,7 @@ void RasterizerOpenGL::FlushAll() {}
749 748
750void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 749void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
751 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 750 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
752 751 res_cache.FlushRegion(addr, size);
753 if (Settings::values.use_accurate_gpu_emulation) {
754 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
755 res_cache.FlushRegion(addr, size);
756 }
757} 752}
758 753
759void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 754void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -797,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
797 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 792 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
798 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 793 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
799 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 794 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
800 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 795
796 if (params.pixel_format != pixel_format) {
797 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
798 }
801 799
802 screen_info.display_texture = surface->Texture().handle; 800 screen_info.display_texture = surface->Texture().handle;
803 801
@@ -943,8 +941,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
943 size = buffer.size; 941 size = buffer.size;
944 942
945 if (size > MaxConstbufferSize) { 943 if (size > MaxConstbufferSize) {
946 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 944 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
947 MaxConstbufferSize); 945 MaxConstbufferSize);
948 size = MaxConstbufferSize; 946 size = MaxConstbufferSize;
949 } 947 }
950 } else { 948 } else {
@@ -1004,10 +1002,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1004 1002
1005 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 1003 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1006 1004
1007 Surface surface = res_cache.GetTextureSurface(texture, entry); 1005 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
1008 if (surface != nullptr) {
1009 state.texture_units[current_bindpoint].texture = 1006 state.texture_units[current_bindpoint].texture =
1010 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 1007 surface->Texture(entry.IsArray()).handle;
1011 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 1008 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1012 texture.tic.w_source); 1009 texture.tic.w_source);
1013 } else { 1010 } else {
@@ -1239,11 +1236,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1239 1236
1240void RasterizerOpenGL::SyncTransformFeedback() { 1237void RasterizerOpenGL::SyncTransformFeedback() {
1241 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1238 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1242 1239 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1243 if (regs.tfb_enabled != 0) {
1244 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1245 UNREACHABLE();
1246 }
1247} 1240}
1248 1241
1249void RasterizerOpenGL::SyncPointState() { 1242void RasterizerOpenGL::SyncPointState() {
@@ -1263,12 +1256,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1263 1256
1264void RasterizerOpenGL::CheckAlphaTests() { 1257void RasterizerOpenGL::CheckAlphaTests() {
1265 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1258 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1266 1259 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1267 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1260 "Alpha Testing is enabled with more than one rendertarget");
1268 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1269 "this behavior is undefined.");
1270 UNREACHABLE();
1271 }
1272} 1261}
1273 1262
1274} // namespace OpenGL 1263} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index b5a9722f9..e9eb6e921 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -21,7 +21,7 @@
21#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
22#include "video_core/renderer_opengl/utils.h" 22#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h" 23#include "video_core/surface.h"
24#include "video_core/textures/astc.h" 24#include "video_core/textures/convert.h"
25#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
@@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
400 return format; 400 return format;
401} 401}
402 402
403/// Returns the discrepant array target
404constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
405 switch (target) {
406 case SurfaceTarget::Texture1D:
407 return GL_TEXTURE_1D_ARRAY;
408 case SurfaceTarget::Texture2D:
409 return GL_TEXTURE_2D_ARRAY;
410 case SurfaceTarget::Texture3D:
411 return GL_NONE;
412 case SurfaceTarget::Texture1DArray:
413 return GL_TEXTURE_1D;
414 case SurfaceTarget::Texture2DArray:
415 return GL_TEXTURE_2D;
416 case SurfaceTarget::TextureCubemap:
417 return GL_TEXTURE_CUBE_MAP_ARRAY;
418 case SurfaceTarget::TextureCubeArray:
419 return GL_TEXTURE_CUBE_MAP;
420 }
421 return GL_NONE;
422}
423
403Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 424Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
404 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 425 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
405 if (IsPixelFormatASTC(pixel_format)) { 426 if (IsPixelFormatASTC(pixel_format)) {
@@ -597,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
597 } 618 }
598} 619}
599 620
600static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
601 union S8Z24 {
602 BitField<0, 24, u32> z24;
603 BitField<24, 8, u32> s8;
604 };
605 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
606
607 union Z24S8 {
608 BitField<0, 8, u32> s8;
609 BitField<8, 24, u32> z24;
610 };
611 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
612
613 S8Z24 s8z24_pixel{};
614 Z24S8 z24s8_pixel{};
615 constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
616 for (std::size_t y = 0; y < height; ++y) {
617 for (std::size_t x = 0; x < width; ++x) {
618 const std::size_t offset{bpp * (y * width + x)};
619 if (reverse) {
620 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
621 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
622 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
623 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
624 } else {
625 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
626 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
627 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
628 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
629 }
630 }
631 }
632}
633
634/**
635 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
636 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
637 * typical desktop GPUs.
638 */
639static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
640 u32 width, u32 height, u32 depth) {
641 switch (pixel_format) {
642 case PixelFormat::ASTC_2D_4X4:
643 case PixelFormat::ASTC_2D_8X8:
644 case PixelFormat::ASTC_2D_8X5:
645 case PixelFormat::ASTC_2D_5X4:
646 case PixelFormat::ASTC_2D_5X5:
647 case PixelFormat::ASTC_2D_4X4_SRGB:
648 case PixelFormat::ASTC_2D_8X8_SRGB:
649 case PixelFormat::ASTC_2D_8X5_SRGB:
650 case PixelFormat::ASTC_2D_5X4_SRGB:
651 case PixelFormat::ASTC_2D_5X5_SRGB:
652 case PixelFormat::ASTC_2D_10X8:
653 case PixelFormat::ASTC_2D_10X8_SRGB: {
654 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
655 u32 block_width{};
656 u32 block_height{};
657 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
658 data =
659 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
660 break;
661 }
662 case PixelFormat::S8Z24:
663 // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
664 ConvertS8Z24ToZ24S8(data, width, height, false);
665 break;
666 }
667}
668
669/**
670 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
671 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
672 * with typical desktop GPUs.
673 */
674static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
675 u32 width, u32 height) {
676 switch (pixel_format) {
677 case PixelFormat::ASTC_2D_4X4:
678 case PixelFormat::ASTC_2D_8X8:
679 case PixelFormat::ASTC_2D_4X4_SRGB:
680 case PixelFormat::ASTC_2D_8X8_SRGB:
681 case PixelFormat::ASTC_2D_5X5:
682 case PixelFormat::ASTC_2D_5X5_SRGB:
683 case PixelFormat::ASTC_2D_10X8:
684 case PixelFormat::ASTC_2D_10X8_SRGB: {
685 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
686 static_cast<u32>(pixel_format));
687 UNREACHABLE();
688 break;
689 }
690 case PixelFormat::S8Z24:
691 // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
692 ConvertS8Z24ToZ24S8(data, width, height, true);
693 break;
694 }
695}
696
697MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 621MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
698void CachedSurface::LoadGLBuffer() { 622void CachedSurface::LoadGLBuffer() {
699 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); 623 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -722,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
722 } 646 }
723 } 647 }
724 for (u32 i = 0; i < params.max_mip_level; i++) { 648 for (u32 i = 0; i < params.max_mip_level; i++) {
725 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), 649 const u32 width = params.MipWidth(i);
726 params.MipHeight(i), params.MipDepth(i)); 650 const u32 height = params.MipHeight(i);
651 const u32 depth = params.MipDepth(i);
652 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
653 // Reserve size for RGBA8 conversion
654 constexpr std::size_t rgba_bpp = 4;
655 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
656 }
657 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
658 height, depth, true, true);
727 } 659 }
728} 660}
729 661
@@ -746,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
746 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, 678 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
747 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); 679 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
748 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 680 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
749 ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, 681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
750 params.height); 682 params.height, params.depth, true, true);
751 const u8* const texture_src_data = Memory::GetPointer(params.addr); 683 const u8* const texture_src_data = Memory::GetPointer(params.addr);
752 ASSERT(texture_src_data); 684 ASSERT(texture_src_data);
753 if (params.is_tiled) { 685 if (params.is_tiled) {
@@ -884,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
884 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 816 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
885} 817}
886 818
887void CachedSurface::EnsureTextureView() { 819void CachedSurface::EnsureTextureDiscrepantView() {
888 if (texture_view.handle != 0) 820 if (discrepant_view.handle != 0)
889 return; 821 return;
890 822
891 const GLenum target{TargetLayer()}; 823 const GLenum target{GetArrayDiscrepantTarget(params.target)};
824 ASSERT(target != GL_NONE);
825
892 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 826 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
893 constexpr GLuint min_layer = 0; 827 constexpr GLuint min_layer = 0;
894 constexpr GLuint min_level = 0; 828 constexpr GLuint min_level = 0;
895 829
896 glGenTextures(1, &texture_view.handle); 830 glGenTextures(1, &discrepant_view.handle);
897 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 831 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
898 params.max_mip_level, min_layer, num_layers); 832 params.max_mip_level, min_layer, num_layers);
899 ApplyTextureDefaults(texture_view.handle, params.max_mip_level); 833 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
900 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, 834 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
901 reinterpret_cast<const GLint*>(swizzle.data())); 835 reinterpret_cast<const GLint*>(swizzle.data()));
902} 836}
903 837
@@ -923,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
923 swizzle = {new_x, new_y, new_z, new_w}; 857 swizzle = {new_x, new_y, new_z, new_w};
924 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); 858 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
925 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 859 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
926 if (texture_view.handle != 0) { 860 if (discrepant_view.handle != 0) {
927 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 861 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
928 } 862 }
929} 863}
930 864
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 797bbdc9c..9cf6f50be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -367,31 +367,19 @@ public:
367 return texture; 367 return texture;
368 } 368 }
369 369
370 const OGLTexture& TextureLayer() { 370 const OGLTexture& Texture(bool as_array) {
371 if (params.is_array) { 371 if (params.is_array == as_array) {
372 return Texture(); 372 return texture;
373 } else {
374 EnsureTextureDiscrepantView();
375 return discrepant_view;
373 } 376 }
374 EnsureTextureView();
375 return texture_view;
376 } 377 }
377 378
378 GLenum Target() const { 379 GLenum Target() const {
379 return gl_target; 380 return gl_target;
380 } 381 }
381 382
382 GLenum TargetLayer() const {
383 using VideoCore::Surface::SurfaceTarget;
384 switch (params.target) {
385 case SurfaceTarget::Texture1D:
386 return GL_TEXTURE_1D_ARRAY;
387 case SurfaceTarget::Texture2D:
388 return GL_TEXTURE_2D_ARRAY;
389 case SurfaceTarget::TextureCubemap:
390 return GL_TEXTURE_CUBE_MAP_ARRAY;
391 }
392 return Target();
393 }
394
395 const SurfaceParams& GetSurfaceParams() const { 383 const SurfaceParams& GetSurfaceParams() const {
396 return params; 384 return params;
397 } 385 }
@@ -431,10 +419,10 @@ public:
431private: 419private:
432 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 420 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
433 421
434 void EnsureTextureView(); 422 void EnsureTextureDiscrepantView();
435 423
436 OGLTexture texture; 424 OGLTexture texture;
437 OGLTexture texture_view; 425 OGLTexture discrepant_view;
438 std::vector<std::vector<u8>> gl_buffer; 426 std::vector<std::vector<u8>> gl_buffer;
439 SurfaceParams params{}; 427 SurfaceParams params{};
440 GLenum gl_target{}; 428 GLenum gl_target{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 72ff6ac6a..11d1169f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <array> 5#include <array>
6#include <string> 6#include <string>
7#include <string_view> 7#include <string_view>
8#include <utility>
8#include <variant> 9#include <variant>
10#include <vector>
9 11
10#include <fmt/format.h> 12#include <fmt/format.h>
11 13
@@ -717,7 +719,7 @@ private:
717 } 719 }
718 720
719 std::string GenerateTexture(Operation operation, const std::string& func, 721 std::string GenerateTexture(Operation operation, const std::string& func,
720 bool is_extra_int = false) { 722 const std::vector<std::pair<Type, Node>>& extras) {
721 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 723 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
722 724
723 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 725 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -738,36 +740,47 @@ private:
738 expr += Visit(operation[i]); 740 expr += Visit(operation[i]);
739 741
740 const std::size_t next = i + 1; 742 const std::size_t next = i + 1;
741 if (next < count || has_array || has_shadow) 743 if (next < count)
742 expr += ", "; 744 expr += ", ";
743 } 745 }
744 if (has_array) { 746 if (has_array) {
745 expr += "float(ftoi(" + Visit(meta->array) + "))"; 747 expr += ", float(ftoi(" + Visit(meta->array) + "))";
746 } 748 }
747 if (has_shadow) { 749 if (has_shadow) {
748 if (has_array) 750 expr += ", " + Visit(meta->depth_compare);
749 expr += ", ";
750 expr += Visit(meta->depth_compare);
751 } 751 }
752 expr += ')'; 752 expr += ')';
753 753
754 for (const Node extra : meta->extras) { 754 for (const auto& extra_pair : extras) {
755 const auto [type, operand] = extra_pair;
756 if (operand == nullptr) {
757 continue;
758 }
755 expr += ", "; 759 expr += ", ";
756 if (is_extra_int) { 760
757 if (const auto immediate = std::get_if<ImmediateNode>(extra)) { 761 switch (type) {
762 case Type::Int:
763 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
758 // Inline the string as an immediate integer in GLSL (some extra arguments are 764 // Inline the string as an immediate integer in GLSL (some extra arguments are
759 // required to be constant) 765 // required to be constant)
760 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 766 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
761 } else { 767 } else {
762 expr += "ftoi(" + Visit(extra) + ')'; 768 expr += "ftoi(" + Visit(operand) + ')';
763 } 769 }
764 } else { 770 break;
765 expr += Visit(extra); 771 case Type::Float:
772 expr += Visit(operand);
773 break;
774 default: {
775 const auto type_int = static_cast<u32>(type);
776 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
777 expr += '0';
778 break;
779 }
766 } 780 }
767 } 781 }
768 782
769 expr += ')'; 783 return expr + ')';
770 return expr;
771 } 784 }
772 785
773 std::string Assign(Operation operation) { 786 std::string Assign(Operation operation) {
@@ -1146,7 +1159,7 @@ private:
1146 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1159 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1147 ASSERT(meta); 1160 ASSERT(meta);
1148 1161
1149 std::string expr = GenerateTexture(operation, "texture"); 1162 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
1150 if (meta->sampler.IsShadow()) { 1163 if (meta->sampler.IsShadow()) {
1151 expr = "vec4(" + expr + ')'; 1164 expr = "vec4(" + expr + ')';
1152 } 1165 }
@@ -1157,7 +1170,7 @@ private:
1157 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1170 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1158 ASSERT(meta); 1171 ASSERT(meta);
1159 1172
1160 std::string expr = GenerateTexture(operation, "textureLod"); 1173 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
1161 if (meta->sampler.IsShadow()) { 1174 if (meta->sampler.IsShadow()) {
1162 expr = "vec4(" + expr + ')'; 1175 expr = "vec4(" + expr + ')';
1163 } 1176 }
@@ -1168,7 +1181,8 @@ private:
1168 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1181 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1169 ASSERT(meta); 1182 ASSERT(meta);
1170 1183
1171 return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + 1184 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1185 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
1172 GetSwizzle(meta->element); 1186 GetSwizzle(meta->element);
1173 } 1187 }
1174 1188
@@ -1197,8 +1211,8 @@ private:
1197 ASSERT(meta); 1211 ASSERT(meta);
1198 1212
1199 if (meta->element < 2) { 1213 if (meta->element < 2) {
1200 return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + 1214 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
1201 GetSwizzle(meta->element) + "))"; 1215 " * vec2(256))" + GetSwizzle(meta->element) + "))";
1202 } 1216 }
1203 return "0"; 1217 return "0";
1204 } 1218 }
@@ -1224,9 +1238,9 @@ private:
1224 else if (next < count) 1238 else if (next < count)
1225 expr += ", "; 1239 expr += ", ";
1226 } 1240 }
1227 for (std::size_t i = 0; i < meta->extras.size(); ++i) { 1241 if (meta->lod) {
1228 expr += ", "; 1242 expr += ", ";
1229 expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); 1243 expr += CastOperand(Visit(meta->lod), Type::Int);
1230 } 1244 }
1231 expr += ')'; 1245 expr += ')';
1232 1246
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e60b2eb44..8b510b6ae 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
245} 245}
246 246
247void RendererOpenGL::AddTelemetryFields() {
248 const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
249 const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
250 const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
251
252 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
253 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
254 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
255
256 auto& telemetry_session = system.TelemetrySession();
257 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
258 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
259 telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
260}
261
247void RendererOpenGL::CreateRasterizer() { 262void RendererOpenGL::CreateRasterizer() {
248 if (rasterizer) { 263 if (rasterizer) {
249 return; 264 return;
@@ -466,17 +481,7 @@ bool RendererOpenGL::Init() {
466 glDebugMessageCallback(DebugHandler, nullptr); 481 glDebugMessageCallback(DebugHandler, nullptr);
467 } 482 }
468 483
469 const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; 484 AddTelemetryFields();
470 const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
471 const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
472
473 LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
474 LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
475 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
476
477 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
478 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
479 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
480 485
481 if (!GLAD_GL_VERSION_4_3) { 486 if (!GLAD_GL_VERSION_4_3) {
482 return false; 487 return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c168fa89e..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -60,6 +60,7 @@ public:
60 60
61private: 61private:
62 void InitOpenGLObjects(); 62 void InitOpenGLObjects();
63 void AddTelemetryFields();
63 void CreateRasterizer(); 64 void CreateRasterizer();
64 65
65 void ConfigureFramebufferTexture(TextureInfo& texture, 66 void ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13
14namespace Vulkan::MaxwellToVK {
15
16namespace Sampler {
17
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
19 switch (filter) {
20 case Tegra::Texture::TextureFilter::Linear:
21 return vk::Filter::eLinear;
22 case Tegra::Texture::TextureFilter::Nearest:
23 return vk::Filter::eNearest;
24 }
25 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
26 return {};
27}
28
29vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
30 switch (mipmap_filter) {
31 case Tegra::Texture::TextureMipmapFilter::None:
32 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
33 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
34 // use an image view with a single mipmap level to emulate this.
35 return vk::SamplerMipmapMode::eLinear;
36 case Tegra::Texture::TextureMipmapFilter::Linear:
37 return vk::SamplerMipmapMode::eLinear;
38 case Tegra::Texture::TextureMipmapFilter::Nearest:
39 return vk::SamplerMipmapMode::eNearest;
40 }
41 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
42 return {};
43}
44
45vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
46 switch (wrap_mode) {
47 case Tegra::Texture::WrapMode::Wrap:
48 return vk::SamplerAddressMode::eRepeat;
49 case Tegra::Texture::WrapMode::Mirror:
50 return vk::SamplerAddressMode::eMirroredRepeat;
51 case Tegra::Texture::WrapMode::ClampToEdge:
52 return vk::SamplerAddressMode::eClampToEdge;
53 case Tegra::Texture::WrapMode::Border:
54 return vk::SamplerAddressMode::eClampToBorder;
55 case Tegra::Texture::WrapMode::ClampOGL:
56 // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
57 // eClampToBorder to get the border color of the texture, and then sample the edge to
58 // manually mix them. However the shader part of this is not yet implemented.
59 return vk::SamplerAddressMode::eClampToBorder;
60 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
61 return vk::SamplerAddressMode::eMirrorClampToEdge;
62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68}
69
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
71 switch (depth_compare_func) {
72 case Tegra::Texture::DepthCompareFunc::Never:
73 return vk::CompareOp::eNever;
74 case Tegra::Texture::DepthCompareFunc::Less:
75 return vk::CompareOp::eLess;
76 case Tegra::Texture::DepthCompareFunc::LessEqual:
77 return vk::CompareOp::eLessOrEqual;
78 case Tegra::Texture::DepthCompareFunc::Equal:
79 return vk::CompareOp::eEqual;
80 case Tegra::Texture::DepthCompareFunc::NotEqual:
81 return vk::CompareOp::eNotEqual;
82 case Tegra::Texture::DepthCompareFunc::Greater:
83 return vk::CompareOp::eGreater;
84 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
85 return vk::CompareOp::eGreaterOrEqual;
86 case Tegra::Texture::DepthCompareFunc::Always:
87 return vk::CompareOp::eAlways;
88 }
89 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
90 static_cast<u32>(depth_compare_func));
91 return {};
92}
93
94} // namespace Sampler
95
96struct FormatTuple {
97 vk::Format format; ///< Vulkan format
98 ComponentType component_type; ///< Abstracted component type
99 bool attachable; ///< True when this format can be used as an attachment
100};
101
102static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
103 {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
104 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
105 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
106 {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
107 {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
108 {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
109 {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
110 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
111 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
112 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
113 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
114 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
115 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
116 {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
117 {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
118 {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
119 {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
120 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
121 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
122 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
123 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
124 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
125 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
126 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
127 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
128 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
129 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
130 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
131 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
132 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
133 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
134 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
135 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
136 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
137 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
138 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
139 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
140 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
141 {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
142 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
143 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
144 {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
145 {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
146 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
147 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
148 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
149
150 // Compressed sRGB formats
151 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
152 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
153 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
154 {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
155 {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
156 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
157 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
158 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
159 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
160 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
161 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
162 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
163 {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
164
165 // Depth formats
166 {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
167 {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
168
169 // DepthStencil formats
170 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
171 {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
172 {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
173}};
174
175static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
176 return pixel_format >= PixelFormat::MaxColorFormat &&
177 pixel_format < PixelFormat::MaxDepthStencilFormat;
178}
179
180std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
181 PixelFormat pixel_format, ComponentType component_type) {
182 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
183
184 const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
185 UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
186 "Unimplemented texture format with pixel format={} and component type={}",
187 static_cast<u32>(pixel_format), static_cast<u32>(component_type));
188 ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
189
190 auto usage = vk::FormatFeatureFlagBits::eSampledImage |
191 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
192 if (tuple.attachable) {
193 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
194 : vk::FormatFeatureFlagBits::eColorAttachment;
195 }
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197}
198
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
200 switch (stage) {
201 case Maxwell::ShaderStage::Vertex:
202 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment:
210 return vk::ShaderStageFlagBits::eFragment;
211 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
213 return {};
214}
215
216vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
217 switch (topology) {
218 case Maxwell::PrimitiveTopology::Points:
219 return vk::PrimitiveTopology::ePointList;
220 case Maxwell::PrimitiveTopology::Lines:
221 return vk::PrimitiveTopology::eLineList;
222 case Maxwell::PrimitiveTopology::LineStrip:
223 return vk::PrimitiveTopology::eLineStrip;
224 case Maxwell::PrimitiveTopology::Triangles:
225 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip;
228 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231}
232
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
234 switch (type) {
235 case Maxwell::VertexAttribute::Type::SignedNorm:
236 break;
237 case Maxwell::VertexAttribute::Type::UnsignedNorm:
238 switch (size) {
239 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
240 return vk::Format::eR8G8B8A8Unorm;
241 default:
242 break;
243 }
244 break;
245 case Maxwell::VertexAttribute::Type::SignedInt:
246 break;
247 case Maxwell::VertexAttribute::Type::UnsignedInt:
248 switch (size) {
249 case Maxwell::VertexAttribute::Size::Size_32:
250 return vk::Format::eR32Uint;
251 default:
252 break;
253 }
254 case Maxwell::VertexAttribute::Type::UnsignedScaled:
255 case Maxwell::VertexAttribute::Type::SignedScaled:
256 break;
257 case Maxwell::VertexAttribute::Type::Float:
258 switch (size) {
259 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
260 return vk::Format::eR32G32B32A32Sfloat;
261 case Maxwell::VertexAttribute::Size::Size_32_32_32:
262 return vk::Format::eR32G32B32Sfloat;
263 case Maxwell::VertexAttribute::Size::Size_32_32:
264 return vk::Format::eR32G32Sfloat;
265 case Maxwell::VertexAttribute::Size::Size_32:
266 return vk::Format::eR32Sfloat;
267 default:
268 break;
269 }
270 break;
271 }
272 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
273 static_cast<u32>(size));
274 return {};
275}
276
277vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
278 switch (comparison) {
279 case Maxwell::ComparisonOp::Never:
280 case Maxwell::ComparisonOp::NeverOld:
281 return vk::CompareOp::eNever;
282 case Maxwell::ComparisonOp::Less:
283 case Maxwell::ComparisonOp::LessOld:
284 return vk::CompareOp::eLess;
285 case Maxwell::ComparisonOp::Equal:
286 case Maxwell::ComparisonOp::EqualOld:
287 return vk::CompareOp::eEqual;
288 case Maxwell::ComparisonOp::LessEqual:
289 case Maxwell::ComparisonOp::LessEqualOld:
290 return vk::CompareOp::eLessOrEqual;
291 case Maxwell::ComparisonOp::Greater:
292 case Maxwell::ComparisonOp::GreaterOld:
293 return vk::CompareOp::eGreater;
294 case Maxwell::ComparisonOp::NotEqual:
295 case Maxwell::ComparisonOp::NotEqualOld:
296 return vk::CompareOp::eNotEqual;
297 case Maxwell::ComparisonOp::GreaterEqual:
298 case Maxwell::ComparisonOp::GreaterEqualOld:
299 return vk::CompareOp::eGreaterOrEqual;
300 case Maxwell::ComparisonOp::Always:
301 case Maxwell::ComparisonOp::AlwaysOld:
302 return vk::CompareOp::eAlways;
303 }
304 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
305 return {};
306}
307
308vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
309 switch (index_format) {
310 case Maxwell::IndexFormat::UnsignedByte:
311 UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
312 return vk::IndexType::eUint16;
313 case Maxwell::IndexFormat::UnsignedShort:
314 return vk::IndexType::eUint16;
315 case Maxwell::IndexFormat::UnsignedInt:
316 return vk::IndexType::eUint32;
317 }
318 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
319 return {};
320}
321
322vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
323 switch (stencil_op) {
324 case Maxwell::StencilOp::Keep:
325 case Maxwell::StencilOp::KeepOGL:
326 return vk::StencilOp::eKeep;
327 case Maxwell::StencilOp::Zero:
328 case Maxwell::StencilOp::ZeroOGL:
329 return vk::StencilOp::eZero;
330 case Maxwell::StencilOp::Replace:
331 case Maxwell::StencilOp::ReplaceOGL:
332 return vk::StencilOp::eReplace;
333 case Maxwell::StencilOp::Incr:
334 case Maxwell::StencilOp::IncrOGL:
335 return vk::StencilOp::eIncrementAndClamp;
336 case Maxwell::StencilOp::Decr:
337 case Maxwell::StencilOp::DecrOGL:
338 return vk::StencilOp::eDecrementAndClamp;
339 case Maxwell::StencilOp::Invert:
340 case Maxwell::StencilOp::InvertOGL:
341 return vk::StencilOp::eInvert;
342 case Maxwell::StencilOp::IncrWrap:
343 case Maxwell::StencilOp::IncrWrapOGL:
344 return vk::StencilOp::eIncrementAndWrap;
345 case Maxwell::StencilOp::DecrWrap:
346 case Maxwell::StencilOp::DecrWrapOGL:
347 return vk::StencilOp::eDecrementAndWrap;
348 }
349 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
350 return {};
351}
352
353vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
354 switch (equation) {
355 case Maxwell::Blend::Equation::Add:
356 case Maxwell::Blend::Equation::AddGL:
357 return vk::BlendOp::eAdd;
358 case Maxwell::Blend::Equation::Subtract:
359 case Maxwell::Blend::Equation::SubtractGL:
360 return vk::BlendOp::eSubtract;
361 case Maxwell::Blend::Equation::ReverseSubtract:
362 case Maxwell::Blend::Equation::ReverseSubtractGL:
363 return vk::BlendOp::eReverseSubtract;
364 case Maxwell::Blend::Equation::Min:
365 case Maxwell::Blend::Equation::MinGL:
366 return vk::BlendOp::eMin;
367 case Maxwell::Blend::Equation::Max:
368 case Maxwell::Blend::Equation::MaxGL:
369 return vk::BlendOp::eMax;
370 }
371 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
372 return {};
373}
374
375vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
376 switch (factor) {
377 case Maxwell::Blend::Factor::Zero:
378 case Maxwell::Blend::Factor::ZeroGL:
379 return vk::BlendFactor::eZero;
380 case Maxwell::Blend::Factor::One:
381 case Maxwell::Blend::Factor::OneGL:
382 return vk::BlendFactor::eOne;
383 case Maxwell::Blend::Factor::SourceColor:
384 case Maxwell::Blend::Factor::SourceColorGL:
385 return vk::BlendFactor::eSrcColor;
386 case Maxwell::Blend::Factor::OneMinusSourceColor:
387 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
388 return vk::BlendFactor::eOneMinusSrcColor;
389 case Maxwell::Blend::Factor::SourceAlpha:
390 case Maxwell::Blend::Factor::SourceAlphaGL:
391 return vk::BlendFactor::eSrcAlpha;
392 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
393 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
394 return vk::BlendFactor::eOneMinusSrcAlpha;
395 case Maxwell::Blend::Factor::DestAlpha:
396 case Maxwell::Blend::Factor::DestAlphaGL:
397 return vk::BlendFactor::eDstAlpha;
398 case Maxwell::Blend::Factor::OneMinusDestAlpha:
399 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
400 return vk::BlendFactor::eOneMinusDstAlpha;
401 case Maxwell::Blend::Factor::DestColor:
402 case Maxwell::Blend::Factor::DestColorGL:
403 return vk::BlendFactor::eDstColor;
404 case Maxwell::Blend::Factor::OneMinusDestColor:
405 case Maxwell::Blend::Factor::OneMinusDestColorGL:
406 return vk::BlendFactor::eOneMinusDstColor;
407 case Maxwell::Blend::Factor::SourceAlphaSaturate:
408 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
409 return vk::BlendFactor::eSrcAlphaSaturate;
410 case Maxwell::Blend::Factor::Source1Color:
411 case Maxwell::Blend::Factor::Source1ColorGL:
412 return vk::BlendFactor::eSrc1Color;
413 case Maxwell::Blend::Factor::OneMinusSource1Color:
414 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
415 return vk::BlendFactor::eOneMinusSrc1Color;
416 case Maxwell::Blend::Factor::Source1Alpha:
417 case Maxwell::Blend::Factor::Source1AlphaGL:
418 return vk::BlendFactor::eSrc1Alpha;
419 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
420 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
421 return vk::BlendFactor::eOneMinusSrc1Alpha;
422 case Maxwell::Blend::Factor::ConstantColor:
423 case Maxwell::Blend::Factor::ConstantColorGL:
424 return vk::BlendFactor::eConstantColor;
425 case Maxwell::Blend::Factor::OneMinusConstantColor:
426 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
427 return vk::BlendFactor::eOneMinusConstantColor;
428 case Maxwell::Blend::Factor::ConstantAlpha:
429 case Maxwell::Blend::Factor::ConstantAlphaGL:
430 return vk::BlendFactor::eConstantAlpha;
431 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
432 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
433 return vk::BlendFactor::eOneMinusConstantAlpha;
434 }
435 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
436 return {};
437}
438
439vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
440 switch (front_face) {
441 case Maxwell::Cull::FrontFace::ClockWise:
442 return vk::FrontFace::eClockwise;
443 case Maxwell::Cull::FrontFace::CounterClockWise:
444 return vk::FrontFace::eCounterClockwise;
445 }
446 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
447 return {};
448}
449
450vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
451 switch (cull_face) {
452 case Maxwell::Cull::CullFace::Front:
453 return vk::CullModeFlagBits::eFront;
454 case Maxwell::Cull::CullFace::Back:
455 return vk::CullModeFlagBits::eBack;
456 case Maxwell::Cull::CullFace::FrontAndBack:
457 return vk::CullModeFlagBits::eFrontAndBack;
458 }
459 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
460 return {};
461}
462
463vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
464 switch (swizzle) {
465 case Tegra::Texture::SwizzleSource::Zero:
466 return vk::ComponentSwizzle::eZero;
467 case Tegra::Texture::SwizzleSource::R:
468 return vk::ComponentSwizzle::eR;
469 case Tegra::Texture::SwizzleSource::G:
470 return vk::ComponentSwizzle::eG;
471 case Tegra::Texture::SwizzleSource::B:
472 return vk::ComponentSwizzle::eB;
473 case Tegra::Texture::SwizzleSource::A:
474 return vk::ComponentSwizzle::eA;
475 case Tegra::Texture::SwizzleSource::OneInt:
476 case Tegra::Texture::SwizzleSource::OneFloat:
477 return vk::ComponentSwizzle::eOne;
478 }
479 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
480 return {};
481}
482
483} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/surface.h"
13#include "video_core/textures/texture.h"
14
15namespace Vulkan::MaxwellToVK {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using PixelFormat = VideoCore::Surface::PixelFormat;
19using ComponentType = VideoCore::Surface::ComponentType;
20
21namespace Sampler {
22
23vk::Filter Filter(Tegra::Texture::TextureFilter filter);
24
25vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
26
27vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
28
29vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
30
31} // namespace Sampler
32
33std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
34 PixelFormat pixel_format, ComponentType component_type);
35
36vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
37
38vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
39
40vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
41
42vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
43
44vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
45
46vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
47
48vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
49
50vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
51
52vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
53
54vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
55
56vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
57
58} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 78a4e5f0e..00242ecbe 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
122 FormatType format_type) const { 122 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format); 123 const auto it = format_properties.find(wanted_format);
124 if (it == format_properties.end()) { 124 if (it == format_properties.end()) {
125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", 125 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
126 static_cast<u32>(wanted_format));
127 UNREACHABLE(); 126 UNREACHABLE();
128 return true; 127 return true;
129 } 128 }
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
219 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 218 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
220 }; 219 };
221 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); 220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
222 AddFormatQuery(vk::Format::eR5G6B5UnormPack16); 221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
223 AddFormatQuery(vk::Format::eD32Sfloat); 225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
224 AddFormatQuery(vk::Format::eD16UnormS8Uint); 227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
225 AddFormatQuery(vk::Format::eD24UnormS8Uint); 228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
226 AddFormatQuery(vk::Format::eD32SfloatS8Uint); 229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
227 234
228 return format_properties; 235 return format_properties;
229} 236}
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 740ac3118..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
168 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
169 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
170 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 38f01ca50..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
247 } 229 }
248 break; 230 break;
249 } 231 }
250 case OpCode::Id::TEX: {
251 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
252 "AOFFI is not implemented");
253
254 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
255 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
256 }
257
258 const TextureType texture_type{instr.tex.texture_type};
259 const bool is_array = instr.tex.array != 0;
260 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
261 const auto process_mode = instr.tex.GetTextureProcessMode();
262 WriteTexInstructionFloat(
263 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
264 break;
265 }
266 case OpCode::Id::TEXS: {
267 const TextureType texture_type{instr.texs.GetTextureType()};
268 const bool is_array{instr.texs.IsArrayTexture()};
269 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
270 const auto process_mode = instr.texs.GetTextureProcessMode();
271
272 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
273 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
274 }
275
276 const Node4 components =
277 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
278
279 if (instr.texs.fp32_flag) {
280 WriteTexsInstructionFloat(bb, instr, components);
281 } else {
282 WriteTexsInstructionHalfFloat(bb, instr, components);
283 }
284 break;
285 }
286 case OpCode::Id::TLD4: {
287 ASSERT(instr.tld4.array == 0);
288 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
289 "AOFFI is not implemented");
290 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
291 "NDV is not implemented");
292 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
293 "PTP is not implemented");
294
295 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
296 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
297 }
298
299 const auto texture_type = instr.tld4.texture_type.Value();
300 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
301 const bool is_array = instr.tld4.array != 0;
302 WriteTexInstructionFloat(bb, instr,
303 GetTld4Code(instr, texture_type, depth_compare, is_array));
304 break;
305 }
306 case OpCode::Id::TLD4S: {
307 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
308 "AOFFI is not implemented");
309 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
310 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
311 }
312
313 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
314 const Node op_a = GetRegister(instr.gpr8);
315 const Node op_b = GetRegister(instr.gpr20);
316
317 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
318 std::vector<Node> coords;
319 if (depth_compare) {
320 // Note: TLD4S coordinate encoding works just like TEXS's
321 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
322 coords.push_back(op_a);
323 coords.push_back(op_y);
324 coords.push_back(op_b);
325 } else {
326 coords.push_back(op_a);
327 coords.push_back(op_b);
328 }
329 std::vector<Node> extras;
330 extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
331
332 const auto& sampler =
333 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
334
335 Node4 values;
336 for (u32 element = 0; element < values.size(); ++element) {
337 auto coords_copy = coords;
338 MetaTexture meta{sampler, {}, {}, extras, element};
339 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
340 }
341
342 WriteTexsInstructionFloat(bb, instr, values);
343 break;
344 }
345 case OpCode::Id::TXQ: {
346 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
347 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
348 }
349
350 // TODO: The new commits on the texture refactor, change the way samplers work.
351 // Sadly, not all texture instructions specify the type of texture their sampler
352 // uses. This must be fixed at a later instance.
353 const auto& sampler =
354 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
355
356 u32 indexer = 0;
357 switch (instr.txq.query_type) {
358 case Tegra::Shader::TextureQueryType::Dimension: {
359 for (u32 element = 0; element < 4; ++element) {
360 if (!instr.txq.IsComponentEnabled(element)) {
361 continue;
362 }
363 MetaTexture meta{sampler, {}, {}, {}, element};
364 const Node value =
365 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
366 SetTemporal(bb, indexer++, value);
367 }
368 for (u32 i = 0; i < indexer; ++i) {
369 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
370 }
371 break;
372 }
373 default:
374 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
375 static_cast<u32>(instr.txq.query_type.Value()));
376 }
377 break;
378 }
379 case OpCode::Id::TMML: {
380 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
381 "NDV is not implemented");
382
383 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
384 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
385 }
386
387 auto texture_type = instr.tmml.texture_type.Value();
388 const bool is_array = instr.tmml.array != 0;
389 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
390
391 std::vector<Node> coords;
392
393 // TODO: Add coordinates for different samplers once other texture types are implemented.
394 switch (texture_type) {
395 case TextureType::Texture1D:
396 coords.push_back(GetRegister(instr.gpr8));
397 break;
398 case TextureType::Texture2D:
399 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
400 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
401 break;
402 default:
403 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
404
405 // Fallback to interpreting as a 2D texture for now
406 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
407 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
408 texture_type = TextureType::Texture2D;
409 }
410
411 for (u32 element = 0; element < 2; ++element) {
412 auto params = coords;
413 MetaTexture meta{sampler, {}, {}, {}, element};
414 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
415 SetTemporal(bb, element, value);
416 }
417 for (u32 element = 0; element < 2; ++element) {
418 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
419 }
420
421 break;
422 }
423 case OpCode::Id::TLDS: {
424 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
425 const bool is_array{instr.tlds.IsArrayTexture()};
426
427 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
428 "AOFFI is not implemented");
429 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
430
431 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
432 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
433 }
434
435 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
436 break;
437 }
438 default: 232 default:
439 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
440 } 234 }
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
442 return pc; 236 return pc;
443} 237}
444 238
445const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
446 bool is_array, bool is_shadow) {
447 const auto offset = static_cast<std::size_t>(sampler.index.Value());
448
449 // If this sampler has already been used, return the existing mapping.
450 const auto itr =
451 std::find_if(used_samplers.begin(), used_samplers.end(),
452 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
453 if (itr != used_samplers.end()) {
454 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
455 itr->IsShadow() == is_shadow);
456 return *itr;
457 }
458
459 // Otherwise create a new mapping for this sampler
460 const std::size_t next_index = used_samplers.size();
461 const Sampler entry{offset, next_index, type, is_array, is_shadow};
462 return *used_samplers.emplace(entry).first;
463}
464
465void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
466 u32 dest_elem = 0;
467 for (u32 elem = 0; elem < 4; ++elem) {
468 if (!instr.tex.IsComponentEnabled(elem)) {
469 // Skip disabled components
470 continue;
471 }
472 SetTemporal(bb, dest_elem++, components[elem]);
473 }
474 // After writing values in temporals, move them to the real registers
475 for (u32 i = 0; i < dest_elem; ++i) {
476 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
477 }
478}
479
480void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
481 const Node4& components) {
482 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
483 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
484
485 u32 dest_elem = 0;
486 for (u32 component = 0; component < 4; ++component) {
487 if (!instr.texs.IsComponentEnabled(component))
488 continue;
489 SetTemporal(bb, dest_elem++, components[component]);
490 }
491
492 for (u32 i = 0; i < dest_elem; ++i) {
493 if (i < 2) {
494 // Write the first two swizzle components to gpr0 and gpr0+1
495 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
496 } else {
497 ASSERT(instr.texs.HasTwoDestinations());
498 // Write the rest of the swizzle components to gpr28 and gpr28+1
499 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
500 }
501 }
502}
503
504void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
505 const Node4& components) {
506 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
507 // float instruction).
508
509 Node4 values;
510 u32 dest_elem = 0;
511 for (u32 component = 0; component < 4; ++component) {
512 if (!instr.texs.IsComponentEnabled(component))
513 continue;
514 values[dest_elem++] = components[component];
515 }
516 if (dest_elem == 0)
517 return;
518
519 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
520
521 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
522 if (dest_elem <= 2) {
523 SetRegister(bb, instr.gpr0, first_value);
524 return;
525 }
526
527 SetTemporal(bb, 0, first_value);
528 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
529
530 SetRegister(bb, instr.gpr0, GetTemporal(0));
531 SetRegister(bb, instr.gpr28, GetTemporal(1));
532}
533
534Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
535 TextureProcessMode process_mode, std::vector<Node> coords,
536 Node array, Node depth_compare, u32 bias_offset) {
537 const bool is_array = array;
538 const bool is_shadow = depth_compare;
539
540 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
541 (texture_type == TextureType::TextureCube && is_array && is_shadow),
542 "This method is not supported.");
543
544 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
545
546 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
547 process_mode == TextureProcessMode::LL ||
548 process_mode == TextureProcessMode::LLA;
549
550 // LOD selection (either via bias or explicit textureLod) not supported in GL for
551 // sampler2DArrayShadow and samplerCubeArrayShadow.
552 const bool gl_lod_supported =
553 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
554 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
555
556 const OperationCode read_method =
557 lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
558
559 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
560
561 std::vector<Node> extras;
562 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
563 if (process_mode == TextureProcessMode::LZ) {
564 extras.push_back(Immediate(0.0f));
565 } else {
566 // If present, lod or bias are always stored in the register indexed by the gpr20
567 // field with an offset depending on the usage of the other registers
568 extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
569 }
570 }
571
572 Node4 values;
573 for (u32 element = 0; element < values.size(); ++element) {
574 auto copy_coords = coords;
575 MetaTexture meta{sampler, array, depth_compare, extras, element};
576 values[element] = Operation(read_method, meta, std::move(copy_coords));
577 }
578
579 return values;
580}
581
582Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
583 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
584 const bool lod_bias_enabled =
585 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
586
587 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
588 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
589 // If enabled arrays index is always stored in the gpr8 field
590 const u64 array_register = instr.gpr8.Value();
591 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
592 const u64 coord_register = array_register + (is_array ? 1 : 0);
593
594 std::vector<Node> coords;
595 for (std::size_t i = 0; i < coord_count; ++i) {
596 coords.push_back(GetRegister(coord_register + i));
597 }
598 // 1D.DC in OpenGL the 2nd component is ignored.
599 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
600 coords.push_back(Immediate(0.0f));
601 }
602
603 const Node array = is_array ? GetRegister(array_register) : nullptr;
604
605 Node dc{};
606 if (depth_compare) {
607 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
608 // or bias are used
609 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
610 dc = GetRegister(depth_register);
611 }
612
613 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
614}
615
616Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
617 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
618 const bool lod_bias_enabled =
619 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
620
621 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
622 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
623 // If enabled arrays index is always stored in the gpr8 field
624 const u64 array_register = instr.gpr8.Value();
625 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
626 const u64 coord_register = array_register + (is_array ? 1 : 0);
627 const u64 last_coord_register =
628 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
629 ? static_cast<u64>(instr.gpr20.Value())
630 : coord_register + 1;
631 const u32 bias_offset = coord_count > 2 ? 1 : 0;
632
633 std::vector<Node> coords;
634 for (std::size_t i = 0; i < coord_count; ++i) {
635 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
636 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
637 }
638
639 const Node array = is_array ? GetRegister(array_register) : nullptr;
640
641 Node dc{};
642 if (depth_compare) {
643 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
644 // or bias are used
645 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
646 dc = GetRegister(depth_register);
647 }
648
649 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
650}
651
652Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
653 bool is_array) {
654 const std::size_t coord_count = GetCoordCount(texture_type);
655 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
656 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
657
658 // If enabled arrays index is always stored in the gpr8 field
659 const u64 array_register = instr.gpr8.Value();
660 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
661 const u64 coord_register = array_register + (is_array ? 1 : 0);
662
663 std::vector<Node> coords;
664 for (size_t i = 0; i < coord_count; ++i)
665 coords.push_back(GetRegister(coord_register + i));
666
667 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
668
669 Node4 values;
670 for (u32 element = 0; element < values.size(); ++element) {
671 auto coords_copy = coords;
672 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
673 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
674 }
675
676 return values;
677}
678
679Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
680 const std::size_t type_coord_count = GetCoordCount(texture_type);
681 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
682
683 // If enabled arrays index is always stored in the gpr8 field
684 const u64 array_register = instr.gpr8.Value();
685 // if is array gpr20 is used
686 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
687
688 const u64 last_coord_register =
689 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
690 ? static_cast<u64>(instr.gpr20.Value())
691 : coord_register + 1;
692
693 std::vector<Node> coords;
694 for (std::size_t i = 0; i < type_coord_count; ++i) {
695 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
696 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
697 }
698
699 const Node array = is_array ? GetRegister(array_register) : nullptr;
700 // When lod is used always is in gpr20
701 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
702
703 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
704
705 Node4 values;
706 for (u32 element = 0; element < values.size(); ++element) {
707 auto coords_copy = coords;
708 MetaTexture meta{sampler, array, {}, {lod}, element};
709 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
710 }
711 return values;
712}
713
714std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
715 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
716 std::size_t max_coords, std::size_t max_inputs) {
717 const std::size_t coord_count = GetCoordCount(texture_type);
718
719 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
720 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
721 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
722 UNIMPLEMENTED_MSG("Unsupported Texture operation");
723 total_coord_count = std::min(total_coord_count, max_coords);
724 }
725 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
726 total_coord_count +=
727 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
728
729 return {coord_count, total_coord_count};
730}
731
732} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a99ae19bf
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,534 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) {
329 const bool is_array = array;
330 const bool is_shadow = depth_compare;
331
332 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
333 (texture_type == TextureType::TextureCube && is_array && is_shadow),
334 "This method is not supported.");
335
336 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
337
338 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
339 process_mode == TextureProcessMode::LL ||
340 process_mode == TextureProcessMode::LLA;
341
342 // LOD selection (either via bias or explicit textureLod) not supported in GL for
343 // sampler2DArrayShadow and samplerCubeArrayShadow.
344 const bool gl_lod_supported =
345 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
346 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
347
348 const OperationCode read_method =
349 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
350
351 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
352
353 Node bias = {};
354 Node lod = {};
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 switch (process_mode) {
357 case TextureProcessMode::LZ:
358 lod = Immediate(0.0f);
359 break;
360 case TextureProcessMode::LB:
361 // If present, lod or bias are always stored in the register indexed by the gpr20
362 // field with an offset depending on the usage of the other registers
363 bias = GetRegister(instr.gpr20.Value() + bias_offset);
364 break;
365 case TextureProcessMode::LL:
366 lod = GetRegister(instr.gpr20.Value() + bias_offset);
367 break;
368 default:
369 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
370 break;
371 }
372 }
373
374 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 }
380
381 return values;
382}
383
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
386 const bool lod_bias_enabled =
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
388
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
391 // If enabled arrays index is always stored in the gpr8 field
392 const u64 array_register = instr.gpr8.Value();
393 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
394 const u64 coord_register = array_register + (is_array ? 1 : 0);
395
396 std::vector<Node> coords;
397 for (std::size_t i = 0; i < coord_count; ++i) {
398 coords.push_back(GetRegister(coord_register + i));
399 }
400 // 1D.DC in OpenGL the 2nd component is ignored.
401 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
402 coords.push_back(Immediate(0.0f));
403 }
404
405 const Node array = is_array ? GetRegister(array_register) : nullptr;
406
407 Node dc{};
408 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
412 dc = GetRegister(depth_register);
413 }
414
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
416}
417
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
419 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
420 const bool lod_bias_enabled =
421 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
422
423 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
424 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
425 // If enabled arrays index is always stored in the gpr8 field
426 const u64 array_register = instr.gpr8.Value();
427 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
428 const u64 coord_register = array_register + (is_array ? 1 : 0);
429 const u64 last_coord_register =
430 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
431 ? static_cast<u64>(instr.gpr20.Value())
432 : coord_register + 1;
433 const u32 bias_offset = coord_count > 2 ? 1 : 0;
434
435 std::vector<Node> coords;
436 for (std::size_t i = 0; i < coord_count; ++i) {
437 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
438 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
439 }
440
441 const Node array = is_array ? GetRegister(array_register) : nullptr;
442
443 Node dc{};
444 if (depth_compare) {
445 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
446 // or bias are used
447 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
448 dc = GetRegister(depth_register);
449 }
450
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
452}
453
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) {
456 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
459
460 // If enabled arrays index is always stored in the gpr8 field
461 const u64 array_register = instr.gpr8.Value();
462 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
463 const u64 coord_register = array_register + (is_array ? 1 : 0);
464
465 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i)
467 coords.push_back(GetRegister(coord_register + i));
468
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470
471 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 }
477
478 return values;
479}
480
481Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
482 const std::size_t type_coord_count = GetCoordCount(texture_type);
483 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
484
485 // If enabled arrays index is always stored in the gpr8 field
486 const u64 array_register = instr.gpr8.Value();
487 // if is array gpr20 is used
488 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
489
490 const u64 last_coord_register =
491 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
492 ? static_cast<u64>(instr.gpr20.Value())
493 : coord_register + 1;
494
495 std::vector<Node> coords;
496 for (std::size_t i = 0; i < type_coord_count; ++i) {
497 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
498 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
499 }
500
501 const Node array = is_array ? GetRegister(array_register) : nullptr;
502 // When lod is used always is in gpr20
503 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
504
505 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
506
507 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 }
513 return values;
514}
515
516std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
517 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
518 std::size_t max_coords, std::size_t max_inputs) {
519 const std::size_t coord_count = GetCoordCount(texture_type);
520
521 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
522 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
523 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
524 UNIMPLEMENTED_MSG("Unsupported Texture operation");
525 total_coord_count = std::min(total_coord_count, max_coords);
526 }
527 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
528 total_coord_count +=
529 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
530
531 return {coord_count, total_coord_count};
532}
533
534} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 52c7f2c4e..5bc3a3900 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -290,7 +290,9 @@ struct MetaTexture {
290 const Sampler& sampler; 290 const Sampler& sampler;
291 Node array{}; 291 Node array{};
292 Node depth_compare{}; 292 Node depth_compare{};
293 std::vector<Node> extras; 293 Node bias{};
294 Node lod{};
295 Node component{};
294 u32 element{}; 296 u32 element{};
295}; 297};
296 298
@@ -614,6 +616,7 @@ private:
614 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 616 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
615 u32 DecodeConversion(NodeBlock& bb, u32 pc); 617 u32 DecodeConversion(NodeBlock& bb, u32 pc);
616 u32 DecodeMemory(NodeBlock& bb, u32 pc); 618 u32 DecodeMemory(NodeBlock& bb, u32 pc);
619 u32 DecodeTexture(NodeBlock& bb, u32 pc);
617 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 620 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
618 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 621 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
619 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 622 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23 23
24#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
25 25
26class BitStream { 26class InputBitStream {
27public: 27public:
28 explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 28 explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 29 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 30
31 ~BitStream() = default; 31 ~InputBitStream() = default;
32
33 int GetBitsWritten() const {
34 return m_BitsWritten;
35 }
36
37 void WriteBitsR(unsigned int val, unsigned int nBits) {
38 for (unsigned int i = 0; i < nBits; i++) {
39 WriteBit((val >> (nBits - i - 1)) & 1);
40 }
41 }
42
43 void WriteBits(unsigned int val, unsigned int nBits) {
44 for (unsigned int i = 0; i < nBits; i++) {
45 WriteBit((val >> i) & 1);
46 }
47 }
48 32
49 int GetBitsRead() const { 33 int GetBitsRead() const {
50 return m_BitsRead; 34 return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71 } 55 }
72 56
73private: 57private:
58 const int m_NumBits;
59 const unsigned char* m_CurByte;
60 int m_NextBit = 0;
61 int m_BitsRead = 0;
62
63 bool done = false;
64};
65
66class OutputBitStream {
67public:
68 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
69 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
70
71 ~OutputBitStream() = default;
72
73 int GetBitsWritten() const {
74 return m_BitsWritten;
75 }
76
77 void WriteBitsR(unsigned int val, unsigned int nBits) {
78 for (unsigned int i = 0; i < nBits; i++) {
79 WriteBit((val >> (nBits - i - 1)) & 1);
80 }
81 }
82
83 void WriteBits(unsigned int val, unsigned int nBits) {
84 for (unsigned int i = 0; i < nBits; i++) {
85 WriteBit((val >> i) & 1);
86 }
87 }
88
89private:
74 void WriteBit(int b) { 90 void WriteBit(int b) {
75 91
76 if (done) 92 if (done)
@@ -238,8 +254,8 @@ public:
238 // Fills result with the values that are encoded in the given 254 // Fills result with the values that are encoded in the given
239 // bitstream. We must know beforehand what the maximum possible 255 // bitstream. We must know beforehand what the maximum possible
240 // value is, and how many values we're decoding. 256 // value is, and how many values we're decoding.
241 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, 257 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242 uint32_t maxRange, uint32_t nValues) { 258 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243 // Determine encoding parameters 259 // Determine encoding parameters
244 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); 260 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245 261
@@ -267,7 +283,7 @@ public:
267 } 283 }
268 284
269private: 285private:
270 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 286 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271 uint32_t nBitsPerValue) { 287 uint32_t nBitsPerValue) {
272 // Implement the algorithm in section C.2.12 288 // Implement the algorithm in section C.2.12
273 uint32_t m[5]; 289 uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327 } 343 }
328 } 344 }
329 345
330 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, 346 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331 uint32_t nBitsPerValue) { 347 uint32_t nBitsPerValue) {
332 // Implement the algorithm in section C.2.12 348 // Implement the algorithm in section C.2.12
333 uint32_t m[3]; 349 uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406 } 422 }
407}; 423};
408 424
409static TexelWeightParams DecodeBlockInfo(BitStream& strm) { 425static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410 TexelWeightParams params; 426 TexelWeightParams params;
411 427
412 // Read the entire block mode all at once 428 // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605 return params; 621 return params;
606} 622}
607 623
608static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 624static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609 uint32_t blockHeight) { 625 uint32_t blockHeight) {
610 // Don't actually care about the void extent, just read the bits... 626 // Don't actually care about the void extent, just read the bits...
611 for (int i = 0; i < 4; ++i) { 627 for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821 837
822 // We now have enough to decode our integer sequence. 838 // We now have enough to decode our integer sequence.
823 std::vector<IntegerEncodedValue> decodedColorValues; 839 std::vector<IntegerEncodedValue> decodedColorValues;
824 BitStream colorStream(data); 840 InputBitStream colorStream(data);
825 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 841 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826 842
827 // Once we have the decoded values, we need to dequantize them to the 0-255 range 843 // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365#undef READ_INT_VALUES 1381#undef READ_INT_VALUES
1366} 1382}
1367 1383
1368static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, 1384static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369 const uint32_t blockHeight, uint32_t* outBuf) { 1385 const uint32_t blockHeight, uint32_t* outBuf) {
1370 BitStream strm(inBuf); 1386 InputBitStream strm(inBuf);
1371 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1387 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372 1388
1373 // Was there an error? 1389 // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421 // Define color data. 1437 // Define color data.
1422 uint8_t colorEndpointData[16]; 1438 uint8_t colorEndpointData[16];
1423 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1439 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1440 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425 1441
1426 // Read extra config data... 1442 // Read extra config data...
1427 uint32_t baseCEM = 0; 1443 uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550 1566
1551 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
1552 BitStream weightStream(texelWeightData); 1568 InputBitStream weightStream(texelWeightData);
1553 1569
1554 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555 weightParams.m_MaxWeight, 1571 weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597 1613
1598namespace Tegra::Texture::ASTC { 1614namespace Tegra::Texture::ASTC {
1599 1615
1600std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1603 std::vector<uint8_t> outData(height * width * depth * 4); 1619 std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605 for (uint32_t j = 0; j < height; j += block_height) { 1621 for (uint32_t j = 0; j < height; j += block_height) {
1606 for (uint32_t i = 0; i < width; i += block_width) { 1622 for (uint32_t i = 0; i < width; i += block_width) {
1607 1623
1608 uint8_t* blockPtr = data.data() + blockIdx * 16; 1624 const uint8_t* blockPtr = data + blockIdx * 16;
1609 1625
1610 // Blocks can be at most 12x12 1626 // Blocks can be at most 12x12
1611 uint32_t uncompData[144]; 1627 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, 12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 13 uint32_t depth, uint32_t block_width, uint32_t block_height);
14 14
15} // namespace Tegra::Texture::ASTC 15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/textures/astc.h"
14#include "video_core/textures/convert.h"
15
16namespace Tegra::Texture {
17
18using VideoCore::Surface::PixelFormat;
19
20template <bool reverse>
21void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
22 union S8Z24 {
23 BitField<0, 24, u32> z24;
24 BitField<24, 8, u32> s8;
25 };
26 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
27
28 union Z24S8 {
29 BitField<0, 8, u32> s8;
30 BitField<8, 24, u32> z24;
31 };
32 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
33
34 S8Z24 s8z24_pixel{};
35 Z24S8 z24s8_pixel{};
36 constexpr auto bpp{
37 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
38 for (std::size_t y = 0; y < height; ++y) {
39 for (std::size_t x = 0; x < width; ++x) {
40 const std::size_t offset{bpp * (y * width + x)};
41 if constexpr (reverse) {
42 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
43 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
44 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
45 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
46 } else {
47 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
48 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
49 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
50 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
51 }
52 }
53 }
54}
55
56static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
57 SwapS8Z24ToZ24S8<false>(data, width, height);
58}
59
60static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
61 SwapS8Z24ToZ24S8<true>(data, width, height);
62}
63
64void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
65 bool convert_astc, bool convert_s8z24) {
66 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
67 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
68 u32 block_width{};
69 u32 block_height{};
70 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
71 const std::vector<u8> rgba8_data =
72 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
73 std::copy(rgba8_data.begin(), rgba8_data.end(), data);
74
75 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
76 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
77 }
78}
79
80void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
81 bool convert_astc, bool convert_s8z24) {
82 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
83 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
84 static_cast<u32>(pixel_format));
85 UNREACHABLE();
86
87 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
88 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
89 }
90}
91
92} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace Tegra::Texture {
11
12void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
13 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
14
15void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
17
18} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; 103 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 104 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105 const u32 pixel_index{out_x + pixel_base}; 105 const u32 pixel_index{out_x + pixel_base};
106 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 106 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 107 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); 108 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109 } 109 }
110 pixel_base += stride_x; 110 pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154 for (u32 xb = 0; xb < blocks_on_x; xb++) { 154 for (u32 xb = 0; xb < blocks_on_x; xb++) {
155 const u32 x_start = xb * block_x_elements; 155 const u32 x_start = xb * block_x_elements;
156 const u32 x_end = std::min(width, x_start + block_x_elements); 156 const u32 x_end = std::min(width, x_start + block_x_elements);
157 if (fast) { 157 if constexpr (fast) {
158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 158 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 159 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 160 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
16 return 512; 16 return 512;
17} 17}
18 18
19/** 19/// Unswizzles a swizzled texture without changing its format.
20 * Unswizzles a swizzled texture without changing its format.
21 */
22void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 20void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
25 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26/** 24
27 * Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
28 */
29std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 26std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
30 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 27 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 34 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); 35 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39 36
40/** 37/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41 * Decodes an unswizzled texture into a A8R8G8B8 texture.
42 */
43std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 38std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44 u32 height); 39 u32 height);
45 40
46/** 41/// This function calculates the correct size of a texture depending if it's tiled or not.
47 * This function calculates the correct size of a texture depending if it's tiled or not.
48 */
49std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50 u32 block_height, u32 block_depth); 43 u32 block_height, u32 block_depth);
51 44
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
53void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
55 u32 block_height); 48 u32 block_height);
49
56/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
57void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 52 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..979b9ec14 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
56 window.nx.endApplet = function() { 56 window.nx.endApplet = function() {
57 applet_done = true; 57 applet_done = true;
58 }; 58 };
59
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
59)"; 61)";
60 62
61QString GetNXShimInjectionScript() { 63QString GetNXShimInjectionScript() {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 73b04b749..3b070bfbb 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,10 +20,7 @@
20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 20EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
21 21
22void EmuThread::run() { 22void EmuThread::run() {
23 if (!Settings::values.use_multi_core) { 23 render_window->MakeCurrent();
24 // Single core mode must acquire OpenGL context for entire emulation session
25 render_window->MakeCurrent();
26 }
27 24
28 MicroProfileOnThreadCreate("EmuThread"); 25 MicroProfileOnThreadCreate("EmuThread");
29 26
@@ -38,6 +35,11 @@ void EmuThread::run() {
38 35
39 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 36 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
40 37
38 if (Settings::values.use_asynchronous_gpu_emulation) {
39 // Release OpenGL context for the GPU thread
40 render_window->DoneCurrent();
41 }
42
41 // holds whether the cpu was running during the last iteration, 43 // holds whether the cpu was running during the last iteration,
42 // so that the DebugModeLeft signal can be emitted before the 44 // so that the DebugModeLeft signal can be emitted before the
43 // next execution step 45 // next execution step
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index c09a06520..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,8 +53,8 @@ void CompatDB::Submit() {
53 case CompatDBPage::Final: 53 case CompatDBPage::Final:
54 back(); 54 back();
55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); 55 LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
56 Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", 56 Core::System::GetInstance().TelemetrySession().AddField(
57 compatibility->checkedId()); 57 Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9546dadf..74dc6bb28 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -374,6 +374,8 @@ void Config::ReadValues() {
374 qt_config->value("use_disk_shader_cache", false).toBool(); 374 qt_config->value("use_disk_shader_cache", false).toBool();
375 Settings::values.use_accurate_gpu_emulation = 375 Settings::values.use_accurate_gpu_emulation =
376 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 376 qt_config->value("use_accurate_gpu_emulation", false).toBool();
377 Settings::values.use_asynchronous_gpu_emulation =
378 qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
377 379
378 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 380 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
379 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 381 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -633,6 +635,8 @@ void Config::SaveValues() {
633 qt_config->setValue("frame_limit", Settings::values.frame_limit); 635 qt_config->setValue("frame_limit", Settings::values.frame_limit);
634 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); 636 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
635 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 637 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
638 qt_config->setValue("use_asynchronous_gpu_emulation",
639 Settings::values.use_asynchronous_gpu_emulation);
636 640
637 // Cast to double because Qt's written float values are not human-readable 641 // Cast to double because Qt's written float values are not human-readable
638 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 642 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 0f5dd534b..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
75 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
78 ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
79 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
78 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 80 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
79 Settings::values.bg_blue)); 81 Settings::values.bg_blue));
80} 82}
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
86 Settings::values.frame_limit = ui->frame_limit->value(); 88 Settings::values.frame_limit = ui->frame_limit->value();
87 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 89 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
88 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.use_asynchronous_gpu_emulation =
92 ui->use_asynchronous_gpu_emulation->isChecked();
89 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 93 Settings::values.bg_red = static_cast<float>(bg_color.redF());
90 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 94 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
91 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 95 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 824f5810a..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -64,6 +64,13 @@
64 </widget> 64 </widget>
65 </item> 65 </item>
66 <item> 66 <item>
67 <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
68 <property name="text">
69 <string>Use asynchronous GPU emulation</string>
70 </property>
71 </widget>
72 </item>
73 <item>
67 <layout class="QHBoxLayout" name="horizontalLayout"> 74 <layout class="QHBoxLayout" name="horizontalLayout">
68 <item> 75 <item>
69 <widget class="QLabel" name="label"> 76 <widget class="QLabel" name="label">
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 5ab7896d4..41ba3c4c6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,6 +11,7 @@
11#include "applets/profile_select.h" 11#include "applets/profile_select.h"
12#include "applets/software_keyboard.h" 12#include "applets/software_keyboard.h"
13#include "applets/web_browser.h" 13#include "applets/web_browser.h"
14#include "configuration/configure_input.h"
14#include "configuration/configure_per_general.h" 15#include "configuration/configure_per_general.h"
15#include "core/file_sys/vfs.h" 16#include "core/file_sys/vfs.h"
16#include "core/file_sys/vfs_real.h" 17#include "core/file_sys/vfs_real.h"
@@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
339 .arg(QString::fromStdString(std::to_string(key_code)))); 340 .arg(QString::fromStdString(std::to_string(key_code))));
340 }; 341 };
341 342
343 QMessageBox::information(
344 this, tr("Exit"),
345 tr("To exit the web application, use the game provided controls to select exit, select the "
346 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
347
342 bool running_exit_check = false; 348 bool running_exit_check = false;
343 while (!finished) { 349 while (!finished) {
344 QApplication::processEvents(); 350 QApplication::processEvents();
@@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() {
522 Qt::ApplicationShortcut); 528 Qt::ApplicationShortcut);
523 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", 529 hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
524 QKeySequence(QKeySequence::Print)); 530 QKeySequence(QKeySequence::Print));
531 hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10));
525 532
526 hotkey_registry.LoadHotkeys(); 533 hotkey_registry.LoadHotkeys();
527 534
@@ -591,6 +598,12 @@ void GMainWindow::InitializeHotkeys() {
591 OnCaptureScreenshot(); 598 OnCaptureScreenshot();
592 } 599 }
593 }); 600 });
601 connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
602 &QShortcut::activated, this, [&] {
603 Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
604 OnDockedModeChanged(!Settings::values.use_docked_mode,
605 Settings::values.use_docked_mode);
606 });
594} 607}
595 608
596void GMainWindow::SetDefaultUIGeometry() { 609void GMainWindow::SetDefaultUIGeometry() {
@@ -849,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
849 } 862 }
850 game_path = filename; 863 game_path = filename;
851 864
852 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); 865 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
853 return true; 866 return true;
854} 867}
855 868
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ff05b3179..32e78049c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -346,7 +346,7 @@ void Config::ReadValues() {
346 346
347 // Renderer 347 // Renderer
348 Settings::values.resolution_factor = 348 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 349 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -354,17 +354,20 @@ void Config::ReadValues() {
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
355 Settings::values.use_accurate_gpu_emulation = 355 Settings::values.use_accurate_gpu_emulation =
356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
357 Settings::values.use_asynchronous_gpu_emulation =
358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
357 359
358 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 360 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
359 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 361 Settings::values.bg_green =
360 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 362 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
363 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
361 364
362 // Audio 365 // Audio
363 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 366 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
364 Settings::values.enable_audio_stretching = 367 Settings::values.enable_audio_stretching =
365 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 368 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
366 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 369 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
367 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 370 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
368 371
369 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 372 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
370 373
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index a81986f8e..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -118,6 +118,10 @@ use_disk_shader_cache =
118# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
119use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
120 120
121# Whether to use asynchronous GPU emulation
122# 0 : Off (slow), 1 (default): On (fast)
123use_asynchronous_gpu_emulation =
124
121# The clear color for the renderer. What shows up on the sides of the bottom screen. 125# The clear color for the renderer. What shows up on the sides of the bottom screen.
122# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 126# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
123bg_red = 127bg_red =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c34b5467f..c6c66a787 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -216,7 +216,7 @@ int main(int argc, char** argv) {
216 } 216 }
217 } 217 }
218 218
219 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
220 220
221 system.Renderer().Rasterizer().LoadDiskResources(); 221 system.Renderer().Rasterizer().LoadDiskResources();
222 222