53 files changed, 818 insertions, 425 deletions
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ce2d374e..8ab5649df 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -37,7 +37,7 @@ Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callbac
    : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
      sink_stream{sink_stream}, name{std::move(name_)} {
-    release_event = CoreTiming::RegisterEvent(
+    release_event = Core::Timing::RegisterEvent(
        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
 }
@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {
 s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
    const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
-    return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
+    return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
 }
 static void VolumeAdjustSamples(std::vector<s16>& samples) {
@@ -99,7 +99,8 @@ void Stream::PlayNextBuffer() {
    sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
-    CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
+    Core::Timing::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event,
+                                          {});
 }
 void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index aebfeb51d..caa775544 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -13,7 +13,7 @@
 #include "audio_core/buffer.h"
 #include "common/common_types.h"
-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
 }
@@ -91,16 +91,16 @@ private:
    /// Gets the number of core cycles when the specified buffer will be released
    s64 GetBufferReleaseCycles(const Buffer& buffer) const;
-    u32 sample_rate;                        ///< Sample rate of the stream
+    u32 sample_rate;                          ///< Sample rate of the stream
-    Format format;                          ///< Format of the stream
+    Format format;                            ///< Format of the stream
-    ReleaseCallback release_callback;       ///< Buffer release callback for the stream
+    ReleaseCallback release_callback;         ///< Buffer release callback for the stream
-    State state{State::Stopped};            ///< Playback state of the stream
+    State state{State::Stopped};              ///< Playback state of the stream
-    CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream
+    Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
-    BufferPtr active_buffer;                ///< Actively playing buffer in the stream
+    BufferPtr active_buffer;                  ///< Actively playing buffer in the stream
-    std::queue<BufferPtr> queued_buffers;   ///< Buffers queued to be played in the stream
+    std::queue<BufferPtr> queued_buffers;     ///< Buffers queued to be played in the stream
-    std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
+    std::queue<BufferPtr> released_buffers;   ///< Buffers recently released from the stream
-    SinkStream& sink_stream;                ///< Output sink for the stream
+    SinkStream& sink_stream;                  ///< Output sink for the stream
-    std::string name;                       ///< Name of the stream, must be unique
+    std::string name;                         ///< Name of the stream, must be unique
 };
 using StreamPtr = std::shared_ptr<Stream>;
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 12f6d0114..a5e031189 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -232,6 +232,7 @@ void DebuggerBackend::Write(const Entry& entry) {
    CLS(Render)                                                                                    \
    SUB(Render, Software)                                                                          \
    SUB(Render, OpenGL)                                                                            \
+    SUB(Render, Vulkan)                                                                            \
    CLS(Audio)                                                                                     \
    SUB(Audio, DSP)                                                                                \
    SUB(Audio, Sink)                                                                               \
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index d4ec31ec3..8ed6d5050 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
    Render,            ///< Emulator video output and hardware acceleration
    Render_Software,   ///< Software renderer backend
    Render_OpenGL,     ///< OpenGL backend
+    Render_Vulkan,     ///< Vulkan backend
    Audio,             ///< Audio emulation
    Audio_DSP,         ///< The HLE implementation of the DSP
    Audio_Sink,        ///< Emulator audio output backend
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index edf13bc49..f553efdc9 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,16 @@
 // a simple lockless thread-safe,
 // single reader, single writer queue
-#include <algorithm>
 #include <atomic>
 #include <cstddef>
 #include <mutex>
-#include "common/common_types.h"
+#include <utility>
 namespace Common {
-template <typename T, bool NeedSize = true>
+template <typename T>
 class SPSCQueue {
 public:
-    SPSCQueue() : size(0) {
+    SPSCQueue() {
        write_ptr = read_ptr = new ElementPtr();
    }
    ~SPSCQueue() {
@@ -25,13 +24,12 @@ public:
        delete read_ptr;
    }
-    u32 Size() const {
+    std::size_t Size() const {
-        static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
        return size.load();
    }
    bool Empty() const {
-        return !read_ptr->next.load();
+        return Size() == 0;
    }
    T& Front() const {
@@ -47,13 +45,13 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        if (NeedSize)
-            size++;
+        ++size;
    }
    void Pop() {
-        if (NeedSize)
+        --size;
-            size--;
        ElementPtr* tmpptr = read_ptr;
        // advance the read pointer
        read_ptr = tmpptr->next.load();
@@ -66,8 +64,7 @@ public:
        if (Empty())
            return false;
-        if (NeedSize)
+        --size;
-            size--;
        ElementPtr* tmpptr = read_ptr;
        read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -89,7 +86,7 @@ private:
    // and a pointer to the next ElementPtr
    class ElementPtr {
    public:
-        ElementPtr() : next(nullptr) {}
+        ElementPtr() {}
        ~ElementPtr() {
            ElementPtr* next_ptr = next.load();
@@ -98,21 +95,21 @@ private:
        }
        T current;
-        std::atomic<ElementPtr*> next;
+        std::atomic<ElementPtr*> next{nullptr};
    };
    ElementPtr* write_ptr;
    ElementPtr* read_ptr;
-    std::atomic<u32> size;
+    std::atomic_size_t size{0};
 };
 // a simple thread-safe,
 // single reader, multiple writer queue
-template <typename T, bool NeedSize = true>
+template <typename T>
 class MPSCQueue {
 public:
-    u32 Size() const {
+    std::size_t Size() const {
        return spsc_queue.Size();
    }
@@ -144,7 +141,7 @@ public:
    }
 private:
-    SPSCQueue<T, NeedSize> spsc_queue;
+    SPSCQueue<T> spsc_queue;
    std::mutex write_lock;
 };
 } // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index afbda8d8b..f28951f8a 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -112,14 +112,14 @@ public:
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);
-        CoreTiming::AddTicks(amortized_ticks);
+        Timing::AddTicks(amortized_ticks);
        num_interpreted_instructions = 0;
    }
    u64 GetTicksRemaining() override {
-        return std::max(CoreTiming::GetDowncount(), 0);
+        return std::max(Timing::GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return CoreTiming::GetTicks();
+        return Timing::GetTicks();
    }
    ARM_Dynarmic& parent;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c455c81fb..c36c15c02 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
    if (GDBStub::IsServerEnabled()) {
        ExecuteInstructions(std::max(4000000, 0));
    } else {
-        ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
+        ExecuteInstructions(std::max(Timing::GetDowncount(), 0));
    }
 }
@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
    MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
    CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    CoreTiming::AddTicks(num_instructions);
+    Timing::AddTicks(num_instructions);
    if (GDBStub::IsServerEnabled()) {
        if (last_bkpt_hit) {
            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 1dd576c26..4d9d21ee4 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -94,7 +94,7 @@ struct System::Impl {
    ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
        LOG_DEBUG(HW_Memory, "initialized OK");
-        CoreTiming::Init();
+        Timing::Init();
        kernel.Initialize();
        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -205,7 +205,7 @@ struct System::Impl {
        // Shutdown kernel and core timing
        kernel.Shutdown();
-        CoreTiming::Shutdown();
+        Timing::Shutdown();
        // Close app loader
        app_loader.reset();
@@ -232,7 +232,7 @@ struct System::Impl {
    }
    PerfStatsResults GetAndResetPerfStats() {
-        return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
+        return perf_stats.GetAndResetStats(Timing::GetGlobalTimeUs());
    }
    Kernel::KernelCore kernel;
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index fffda8a99..452366250 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -93,14 +93,14 @@ void Cpu::RunLoop(bool tight_loop) {
        if (IsMainCore()) {
            // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
-            CoreTiming::Idle();
+            Timing::Idle();
-            CoreTiming::Advance();
+            Timing::Advance();
        }
        PrepareReschedule();
    } else {
        if (IsMainCore()) {
-            CoreTiming::Advance();
+            Timing::Advance();
        }
        if (tight_loop) {
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 7953c8720..4ea00c277 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -15,7 +15,7 @@
 #include "common/threadsafe_queue.h"
 #include "core/core_timing_util.h"
-namespace CoreTiming {
+namespace Core::Timing {
 static s64 global_timer;
 static int slice_length;
@@ -54,10 +54,10 @@ static std::vector<Event> event_queue;
 static u64 event_fifo_id;
 // the queue for storing the events from other threads threadsafe until they will be added
 // to the event_queue by the emu thread
-static Common::MPSCQueue<Event, false> ts_queue;
+static Common::MPSCQueue<Event> ts_queue;
 // the queue for unscheduling the events from other threads threadsafe
-static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
+static Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
 constexpr int MAX_SLICE_LENGTH = 20000;
@@ -70,8 +70,6 @@ static bool is_global_timer_sane;
 static EventType* ev_lost = nullptr;
-static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
 EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
    // check for existing type with same name.
    // we want event type names to remain unique so that we can use them for serialization.
@@ -104,7 +102,9 @@ void Init() {
    is_global_timer_sane = true;
    event_fifo_id = 0;
-    ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
 }
 void Shutdown() {
@@ -242,4 +242,4 @@ int GetDowncount() {
    return downcount;
 }
-} // namespace CoreTiming
+} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 9ed757bd7..093989d4c 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -22,7 +22,7 @@
 #include <string>
 #include "common/common_types.h"
-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
@@ -92,4 +92,4 @@ std::chrono::microseconds GetGlobalTimeUs();
 int GetDowncount();
-} // namespace CoreTiming
+} // namespace Core::Timing
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 73dea4edb..88ff70233 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -8,7 +8,7 @@
 #include <limits>
 #include "common/logging/log.h"
-namespace CoreTiming {
+namespace Core::Timing {
 constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
@@ -60,4 +60,4 @@ s64 nsToCycles(u64 ns) {
    return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }
-} // namespace CoreTiming
+} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 5c3718782..513cfac1b 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,7 +6,7 @@
 #include "common/common_types.h"
-namespace CoreTiming {
+namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
@@ -61,4 +61,4 @@ inline u64 cyclesToMs(s64 cycles) {
    return cycles * 1000 / BASE_CLOCK_RATE;
 }
-} // namespace CoreTiming
+} // namespace Core::Timing
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7a524ce5a..3721ae8fe 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -124,7 +124,7 @@ struct KernelCore::Impl {
    void InitializeThreads() {
        thread_wakeup_event_type =
-            CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            Core::Timing::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }
    std::atomic<u32> next_object_id{0};
@@ -137,7 +137,7 @@ struct KernelCore::Impl {
    SharedPtr<ResourceLimit> system_resource_limit;
-    CoreTiming::EventType* thread_wakeup_event_type = nullptr;
+    Core::Timing::EventType* thread_wakeup_event_type = nullptr;
    // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
    // allowing us to simply use a pool index or similar.
    Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -213,7 +213,7 @@ u64 KernelCore::CreateNewProcessID() {
    return impl->next_process_id++;
 }
-CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
+Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
    return impl->thread_wakeup_event_type;
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index c643a6401..7406f107e 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,7 +11,7 @@
 template <typename T>
 class ResultVal;
-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
 }
@@ -89,7 +89,7 @@ private:
    u64 CreateNewThreadID();
    /// Retrieves the event type used for thread wakeup callbacks.
-    CoreTiming::EventType* ThreadWakeupCallbackEventType() const;
+    Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
    /// Provides a reference to the thread wakeup callback handle table.
    Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index df4d6cf0a..9e2517e1b 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -111,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
    const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = CoreTiming::GetTicks();
+    const u64 most_recent_switch_ticks = Core::Timing::GetTicks();
    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
    if (thread != nullptr) {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 7cfecb68c..5f040f79f 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -927,9 +927,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
            const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
-            out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks);
+            out_ticks = thread_ticks + (Core::Timing::GetTicks() - prev_ctx_ticks);
        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
-            out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks;
+            out_ticks = Core::Timing::GetTicks() - prev_ctx_ticks;
        }
        *result = out_ticks;
@@ -1546,10 +1546,10 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
 static u64 GetSystemTick() {
    LOG_TRACE(Kernel_SVC, "called");
-    const u64 result{CoreTiming::GetTicks()};
+    const u64 result{Core::Timing::GetTicks()};
    // Advance time to defeat dumb games that busy-wait for the frame to end.
-    CoreTiming::AddTicks(400);
+    Core::Timing::AddTicks(400);
    return result;
 }
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d3984dfc4..7881c2b90 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -43,7 +43,7 @@ Thread::~Thread() = default;
 void Thread::Stop() {
    // Cancel any outstanding wakeup events for this thread
-    CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::Timing::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;
@@ -85,12 +85,13 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
    // This function might be called from any thread so we have to be cautious and use the
    // thread-safe version of ScheduleEvent.
-    CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds),
+    Core::Timing::ScheduleEventThreadsafe(Core::Timing::nsToCycles(nanoseconds),
-                                        kernel.ThreadWakeupCallbackEventType(), callback_handle);
+                                          kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }
 void Thread::CancelWakeupTimer() {
-    CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::Timing::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(),
+                                            callback_handle);
 }
 static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -197,7 +198,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->stack_top = stack_top;
    thread->tpidr_el0 = 0;
    thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = CoreTiming::GetTicks();
+    thread->last_running_ticks = Core::Timing::GetTicks();
    thread->processor_id = processor_id;
    thread->ideal_core = processor_id;
    thread->affinity_mask = 1ULL << processor_id;
@@ -257,7 +258,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
    }
    if (status == ThreadStatus::Running) {
-        last_running_ticks = CoreTiming::GetTicks();
+        last_running_ticks = Core::Timing::GetTicks();
    }
    status = new_status;
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index c22357d8c..b264c9503 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -22,7 +22,7 @@ void Controller_DebugPad::OnInit() {}
 void Controller_DebugPad::OnRelease() {}
 void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+    shared_memory.header.timestamp = Core::Timing::GetTicks();
    shared_memory.header.total_entry_count = 17;
    if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 898572277..6d21f1a7d 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -18,7 +18,7 @@ void Controller_Gesture::OnInit() {}
 void Controller_Gesture::OnRelease() {}
 void Controller_Gesture::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+    shared_memory.header.timestamp = Core::Timing::GetTicks();
    shared_memory.header.total_entry_count = 17;
    if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index ca75adc2b..798f30436 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -20,7 +20,7 @@ void Controller_Keyboard::OnInit() {}
 void Controller_Keyboard::OnRelease() {}
 void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+    shared_memory.header.timestamp = Core::Timing::GetTicks();
    shared_memory.header.total_entry_count = 17;
    if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 63391dbe9..4985037be 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -18,7 +18,7 @@ void Controller_Mouse::OnInit() {}
 void Controller_Mouse::OnRelease() {}
 void Controller_Mouse::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+    shared_memory.header.timestamp = Core::Timing::GetTicks();
    shared_memory.header.total_entry_count = 17;
    if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 04c8c35a8..ffdd1c593 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -308,7 +308,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
            const auto& last_entry =
                main_controller->npad[main_controller->common.last_entry_index];
-            main_controller->common.timestamp = CoreTiming::GetTicks();
+            main_controller->common.timestamp = Core::Timing::GetTicks();
            main_controller->common.last_entry_index =
                (main_controller->common.last_entry_index + 1) % 17;
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 02fcfadd9..cca4dca1d 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -22,7 +22,7 @@ void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) {
    }
    CommonHeader header{};
-    header.timestamp = CoreTiming::GetTicks();
+    header.timestamp = Core::Timing::GetTicks();
    header.total_entry_count = 17;
    header.entry_count = 0;
    header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index f666b1bd8..a7c8acc72 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -21,7 +21,7 @@ void Controller_Touchscreen::OnInit() {}
 void Controller_Touchscreen::OnRelease() {}
 void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+    shared_memory.header.timestamp = Core::Timing::GetTicks();
    shared_memory.header.total_entry_count = 17;
    if (!IsControllerActivated()) {
@@ -48,7 +48,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
        touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
        touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
        touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
-        const u64 tick = CoreTiming::GetTicks();
+        const u64 tick = Core::Timing::GetTicks();
        touch_entry.delta_time = tick - last_touch;
        last_touch = tick;
        touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index cd397c70b..eff03d14e 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -19,7 +19,7 @@ void Controller_XPad::OnRelease() {}
 void Controller_XPad::OnUpdate(u8* data, std::size_t size) {
    for (auto& xpad_entry : shared_memory.shared_memory_entries) {
-        xpad_entry.header.timestamp = CoreTiming::GetTicks();
+        xpad_entry.header.timestamp = Core::Timing::GetTicks();
        xpad_entry.header.total_entry_count = 17;
        if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 008bf3f02..79c320d04 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66;
+constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
-constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
+constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
-constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
+constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
 IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -73,14 +73,13 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
    GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);
    // Register update callbacks
-    pad_update_event =
+    pad_update_event = Core::Timing::RegisterEvent(
-        CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+        "HID::UpdatePadCallback",
-            UpdateControllers(userdata, cycles_late);
+        [this](u64 userdata, int cycles_late) { UpdateControllers(userdata, cycles_late); });
-        });
    // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
-    CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
+    Core::Timing::ScheduleEvent(pad_update_ticks, pad_update_event);
    ReloadInputDevices();
 }
@@ -94,7 +93,7 @@ void IAppletResource::DeactivateController(HidController controller) {
 }
 IAppletResource ::~IAppletResource() {
-    CoreTiming::UnscheduleEvent(pad_update_event, 0);
+    Core::Timing::UnscheduleEvent(pad_update_event, 0);
 }
 void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -114,7 +113,7 @@ void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
        controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
    }
-    CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+    Core::Timing::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
 }
 class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index eca27c056..6d897c842 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -7,7 +7,7 @@
 #include "controllers/controller_base.h"
 #include "core/hle/service/service.h"
-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
 }
@@ -66,7 +66,7 @@ private:
    Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
-    CoreTiming::EventType* pad_update_event;
+    Core::Timing::EventType* pad_update_event;
    std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
        controllers{};
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 3c7f8b1ee..b427d4068 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 5};
    rb.Push(RESULT_SUCCESS);
-    rb.PushRaw<u64>(CoreTiming::GetTicks());
+    rb.PushRaw<u64>(Core::Timing::GetTicks());
    rb.PushRaw<u32>(0);
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index d57a54ee8..88d80ba06 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -184,7 +184,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
    IoctlGetGpuTime params{};
    std::memcpy(&params, input.data(), input.size());
-    params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks());
+    params.gpu_time = Core::Timing::cyclesToNs(Core::Timing::GetTicks());
    std::memcpy(output.data(), &params, output.size());
    return 0;
 }
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index b171f256c..ab90d591e 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -13,10 +13,6 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/writable_event.h"
-namespace CoreTiming {
-struct EventType;
-}
 namespace Service::NVFlinger {
 struct IGBPBuffer {
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index cde06916d..ce1b59860 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -25,21 +25,21 @@
 namespace Service::NVFlinger {
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 NVFlinger::NVFlinger() {
    // Schedule the screen composition events
    composition_event =
-        CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        Core::Timing::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
            Compose();
-            CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event);
+            Core::Timing::ScheduleEvent(frame_ticks - cycles_late, composition_event);
        });
-    CoreTiming::ScheduleEvent(frame_ticks, composition_event);
+    Core::Timing::ScheduleEvent(frame_ticks, composition_event);
 }
 NVFlinger::~NVFlinger() {
-    CoreTiming::UnscheduleEvent(composition_event, 0);
+    Core::Timing::UnscheduleEvent(composition_event, 0);
 }
 void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 4c55e99f4..6d8bcbd30 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -14,7 +14,7 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
 }
@@ -115,8 +115,8 @@ private:
    /// layers.
    u32 next_buffer_queue_id = 1;
-    /// CoreTiming event that handles screen composition.
+    /// Event that handles screen composition.
-    CoreTiming::EventType* composition_event;
+    Core::Timing::EventType* composition_event;
 };
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index c13640ad8..efebd1b24 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -106,8 +106,8 @@ private:
    void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Time, "called");
-        SteadyClockTimePoint steady_clock_time_point{
+        const SteadyClockTimePoint steady_clock_time_point{
-            CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000};
+            Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000};
        IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
        rb.Push(RESULT_SUCCESS);
        rb.PushRaw(steady_clock_time_point);
@@ -282,7 +282,7 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
    }
    const SteadyClockTimePoint steady_clock_time_point{
-        CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}};
+        Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000, {}};
    CalendarTime calendar_time{};
    calendar_time.year = tm->tm_year + 1900;
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 2242c14cf..77607a755 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -31,10 +31,10 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
 class ScopeInit final {
 public:
    ScopeInit() {
-        CoreTiming::Init();
+        Core::Timing::Init();
    }
    ~ScopeInit() {
-        CoreTiming::Shutdown();
+        Core::Timing::Shutdown();
    }
 };
@@ -44,37 +44,37 @@ static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
    expected_callback = CB_IDS[idx];
    lateness = expected_lateness;
-    CoreTiming::AddTicks(CoreTiming::GetDowncount() -
+    // Pretend we executed X cycles of instructions.
-                         cpu_downcount); // Pretend we executed X cycles of instructions.
+    Core::Timing::AddTicks(Core::Timing::GetDowncount() - cpu_downcount);
-    CoreTiming::Advance();
+    Core::Timing::Advance();
    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
-    REQUIRE(downcount == CoreTiming::GetDowncount());
+    REQUIRE(downcount == Core::Timing::GetDowncount());
 }
 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
    ScopeInit guard;
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>);
    // Enter slice 0
-    CoreTiming::Advance();
+    Core::Timing::Advance();
    // D -> B -> C -> A -> E
-    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    REQUIRE(1000 == CoreTiming::GetDowncount());
+    REQUIRE(1000 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]);
+    Core::Timing::ScheduleEvent(500, cb_b, CB_IDS[1]);
-    REQUIRE(500 == CoreTiming::GetDowncount());
+    REQUIRE(500 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]);
+    Core::Timing::ScheduleEvent(800, cb_c, CB_IDS[2]);
-    REQUIRE(500 == CoreTiming::GetDowncount());
+    REQUIRE(500 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]);
+    Core::Timing::ScheduleEvent(100, cb_d, CB_IDS[3]);
-    REQUIRE(100 == CoreTiming::GetDowncount());
+    REQUIRE(100 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]);
+    Core::Timing::ScheduleEvent(1200, cb_e, CB_IDS[4]);
-    REQUIRE(100 == CoreTiming::GetDowncount());
+    REQUIRE(100 == Core::Timing::GetDowncount());
    AdvanceAndCheck(3, 400);
    AdvanceAndCheck(1, 300);
@@ -86,36 +86,36 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
 TEST_CASE("CoreTiming[Threadsave]", "[core]") {
    ScopeInit guard;
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>);
    // Enter slice 0
-    CoreTiming::Advance();
+    Core::Timing::Advance();
    // D -> B -> C -> A -> E
-    CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
+    Core::Timing::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(1000);
+    Core::Timing::ForceExceptionCheck(1000);
-    REQUIRE(1000 == CoreTiming::GetDowncount());
+    REQUIRE(1000 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
+    Core::Timing::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(500);
+    Core::Timing::ForceExceptionCheck(500);
-    REQUIRE(500 == CoreTiming::GetDowncount());
+    REQUIRE(500 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
+    Core::Timing::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(800);
+    Core::Timing::ForceExceptionCheck(800);
-    REQUIRE(500 == CoreTiming::GetDowncount());
+    REQUIRE(500 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
+    Core::Timing::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(100);
+    Core::Timing::ForceExceptionCheck(100);
-    REQUIRE(100 == CoreTiming::GetDowncount());
+    REQUIRE(100 == Core::Timing::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
+    Core::Timing::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(1200);
+    Core::Timing::ForceExceptionCheck(1200);
-    REQUIRE(100 == CoreTiming::GetDowncount());
+    REQUIRE(100 == Core::Timing::GetDowncount());
    AdvanceAndCheck(3, 400);
    AdvanceAndCheck(1, 300);
@@ -143,42 +143,42 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
    ScopeInit guard;
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>);
+    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", FifoCallback<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>);
+    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", FifoCallback<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>);
+    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", FifoCallback<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>);
+    Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", FifoCallback<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>);
+    Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", FifoCallback<4>);
-    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]);
+    Core::Timing::ScheduleEvent(1000, cb_c, CB_IDS[2]);
-    CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]);
+    Core::Timing::ScheduleEvent(1000, cb_d, CB_IDS[3]);
-    CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]);
+    Core::Timing::ScheduleEvent(1000, cb_e, CB_IDS[4]);
    // Enter slice 0
-    CoreTiming::Advance();
+    Core::Timing::Advance();
-    REQUIRE(1000 == CoreTiming::GetDowncount());
+    REQUIRE(1000 == Core::Timing::GetDowncount());
    callbacks_ran_flags = 0;
    counter = 0;
    lateness = 0;
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
+    Core::Timing::AddTicks(Core::Timing::GetDowncount());
-    CoreTiming::Advance();
+    Core::Timing::Advance();
-    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount());
    REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
 }
-TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
+TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
    ScopeInit guard;
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
    // Enter slice 0
-    CoreTiming::Advance();
+    Core::Timing::Advance();
-    CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]);
+    Core::Timing::ScheduleEvent(100, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]);
+    Core::Timing::ScheduleEvent(200, cb_b, CB_IDS[1]);
    AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
    AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
@@ -192,9 +192,10 @@ static void RescheduleCallback(u64 userdata, s64 cycles_late) {
    REQUIRE(reschedules >= 0);
    REQUIRE(lateness == cycles_late);
-    if (reschedules > 0)
+    if (reschedules > 0) {
-        CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata),
+        Core::Timing::ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
-                                  userdata);
+                                    userdata);
+    }
 }
 } // namespace ChainSchedulingTest
@@ -203,35 +204,35 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
    ScopeInit guard;
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_rs =
+    Core::Timing::EventType* cb_rs =
-        CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback);
+        Core::Timing::RegisterEvent("callbackReschedule", RescheduleCallback);
    // Enter slice 0
-    CoreTiming::Advance();
+    Core::Timing::Advance();
-    CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]);
+    Core::Timing::ScheduleEvent(800, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]);
+    Core::Timing::ScheduleEvent(2200, cb_c, CB_IDS[2]);
-    CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
+    Core::Timing::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
-    REQUIRE(800 == CoreTiming::GetDowncount());
+    REQUIRE(800 == Core::Timing::GetDowncount());
    reschedules = 3;
    AdvanceAndCheck(0, 200);  // cb_a
    AdvanceAndCheck(1, 1000); // cb_b, cb_rs
    REQUIRE(2 == reschedules);
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
+    Core::Timing::AddTicks(Core::Timing::GetDowncount());
-    CoreTiming::Advance(); // cb_rs
+    Core::Timing::Advance(); // cb_rs
    REQUIRE(1 == reschedules);
-    REQUIRE(200 == CoreTiming::GetDowncount());
+    REQUIRE(200 == Core::Timing::GetDowncount());
    AdvanceAndCheck(2, 800); // cb_c
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
+    Core::Timing::AddTicks(Core::Timing::GetDowncount());
-    CoreTiming::Advance(); // cb_rs
+    Core::Timing::Advance(); // cb_rs
    REQUIRE(0 == reschedules);
-    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount());
 }
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 33e507e69..d35a738d5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,12 +5,12 @@ add_library(video_core STATIC
    debug_utils/debug_utils.h
    engines/fermi_2d.cpp
    engines/fermi_2d.h
+    engines/kepler_compute.cpp
+    engines/kepler_compute.h
    engines/kepler_memory.cpp
    engines/kepler_memory.h
    engines/maxwell_3d.cpp
    engines/maxwell_3d.h
-    engines/maxwell_compute.cpp
-    engines/maxwell_compute.h
    engines/maxwell_dma.cpp
    engines/maxwell_dma.h
    engines/shader_bytecode.h
@@ -101,6 +101,16 @@ add_library(video_core STATIC
    video_core.h
 )
+if (ENABLE_VULKAN)
+    target_sources(video_core PRIVATE
+        renderer_vulkan/declarations.h
+        renderer_vulkan/vk_device.cpp
+        renderer_vulkan/vk_device.h)
+    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
+endif()
 create_target_directory_groups(video_core)
 target_link_libraries(video_core PUBLIC common core)
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
new file mode 100644
index 000000000..4ca856b6b
--- /dev/null
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,34 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/memory_manager.h"
+namespace Tegra::Engines {
+KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::~KeplerCompute() = default;
+void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
+    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+               "Invalid KeplerCompute register, increase the size of the Regs structure");
+    regs.reg_array[method_call.method] = method_call.argument;
+    switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(launch):
+        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
+        // kernels)
+        UNREACHABLE_MSG("Compute shaders are not implemented");
+        break;
+    default:
+        break;
+    }
+}
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h
index 1d71f11bd..df0a32e0f 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,47 +10,48 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
 namespace Tegra::Engines {
-#define MAXWELL_COMPUTE_REG_INDEX(field_name)                                                      \
+#define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
-    (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32))
+    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class MaxwellCompute final {
+class KeplerCompute final {
 public:
-    MaxwellCompute() = default;
+    explicit KeplerCompute(MemoryManager& memory_manager);
-    ~MaxwellCompute() = default;
+    ~KeplerCompute();
+    static constexpr std::size_t NumConstBuffers = 8;
    struct Regs {
        static constexpr std::size_t NUM_REGS = 0xCF8;
        union {
            struct {
-                INSERT_PADDING_WORDS(0x281);
+                INSERT_PADDING_WORDS(0xAF);
-                union {
+                u32 launch;
-                    u32 compute_end;
-                    BitField<0, 1, u32> unknown;
-                } compute;
-                INSERT_PADDING_WORDS(0xA76);
+                INSERT_PADDING_WORDS(0xC48);
            };
            std::array<u32, NUM_REGS> reg_array;
        };
    } regs{};
    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
-                  "MaxwellCompute Regs has wrong size");
+                  "KeplerCompute Regs has wrong size");
+    MemoryManager& memory_manager;
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);
 };
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4,                      \
+    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                  "Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(compute, 0x281);
+ASSERT_REG_POSITION(launch, 0xAF);
 #undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 10eae6a65..19b6b14b2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
            LongQueryResult query_result{};
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = CoreTiming::GetTicks();
+            query_result.timestamp = Core::Timing::GetTicks();
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
deleted file mode 100644
index 656db6a61..000000000
--- a/src/video_core/engines/maxwell_compute.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_compute.h"
-namespace Tegra::Engines {
-void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
-               "Invalid MaxwellCompute register, increase the size of the Regs structure");
-    regs.reg_array[method_call.method] = method_call.argument;
-    switch (method_call.method) {
-    case MAXWELL_COMPUTE_REG_INDEX(compute): {
-        LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
-        UNREACHABLE();
-        break;
-    }
-    default:
-        break;
-    }
-}
-} // namespace Tegra::Engines
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 269df9437..1f425f90b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -186,7 +186,7 @@ enum class SubOp : u64 {
 };
 enum class F2iRoundingOp : u64 {
-    None = 0,
+    RoundEven = 0,
    Floor = 1,
    Ceil = 2,
    Trunc = 3,
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d3d32a359..3d00c308b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,9 +6,9 @@
 #include "core/core_timing.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"
@@ -18,6 +18,7 @@ namespace Tegra {
 u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    switch (format) {
    case PixelFormat::ABGR8:
+    case PixelFormat::BGRA8:
        return 4;
    default:
        return 4;
@@ -31,7 +32,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
 }
@@ -245,8 +246,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
    case EngineID::MAXWELL_B:
        maxwell_3d->CallMethod(method_call);
        break;
-    case EngineID::MAXWELL_COMPUTE_B:
+    case EngineID::KEPLER_COMPUTE_B:
-        maxwell_compute->CallMethod(method_call);
+        kepler_compute->CallMethod(method_call);
        break;
    case EngineID::MAXWELL_DMA_COPY_A:
        maxwell_dma->CallMethod(method_call);
@@ -282,7 +283,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.sequence = regs.semaphore_sequence;
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
-        block.timestamp = CoreTiming::GetTicks();
+        block.timestamp = Core::Timing::GetTicks();
        Memory::WriteBlock(*address, &block, sizeof(block));
    } else {
        const auto address =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fb8975811..a482196ea 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -80,6 +80,7 @@ class DebugContext;
 struct FramebufferConfig {
    enum class PixelFormat : u32 {
        ABGR8 = 1,
+        BGRA8 = 5,
    };
    /**
@@ -102,15 +103,15 @@ struct FramebufferConfig {
 namespace Engines {
 class Fermi2D;
 class Maxwell3D;
-class MaxwellCompute;
 class MaxwellDMA;
+class KeplerCompute;
 class KeplerMemory;
 } // namespace Engines
 enum class EngineID {
    FERMI_TWOD_A = 0x902D, // 2D Engine
    MAXWELL_B = 0xB197,    // 3D Engine
-    MAXWELL_COMPUTE_B = 0xB1C0,
+    KEPLER_COMPUTE_B = 0xB1C0,
    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
    MAXWELL_DMA_COPY_A = 0xB0B5,
 };
@@ -208,7 +209,7 @@ private:
    /// 2D engine
    std::unique_ptr<Engines::Fermi2D> fermi_2d;
    /// Compute engine
-    std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
    /// DMA engine
    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
    /// Inline memory engine
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 70e124dc4..db18f4dbe 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -616,17 +616,8 @@ private:
    std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
        std::string value = VisitOperand(operation, operand_index);
        switch (type) {
-        case Type::Bool:
+        case Type::HalfFloat: {
-        case Type::Bool2:
-        case Type::Float:
-            return value;
-        case Type::Int:
-            return "ftoi(" + value + ')';
-        case Type::Uint:
-            return "ftou(" + value + ')';
-        case Type::HalfFloat:
            const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
            if (!half_meta) {
                value = "toHalf2(" + value + ')';
@@ -643,6 +634,26 @@ private:
                return "vec2(toHalf2(" + value + ")[1])";
            }
        }
+        default:
+            return CastOperand(value, type);
+        }
+    }
+    std::string CastOperand(const std::string& value, Type type) const {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return "ftoi(" + value + ')';
+        case Type::Uint:
+            return "ftou(" + value + ')';
+        case Type::HalfFloat:
+            // Can't be handled as a stand-alone value
+            UNREACHABLE();
+            return value;
+        }
        UNREACHABLE();
        return value;
    }
@@ -650,6 +661,7 @@ private:
    std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
        switch (type) {
        case Type::Bool:
+        case Type::Bool2:
        case Type::Float:
            if (needs_parenthesis) {
                return '(' + value + ')';
@@ -719,45 +731,51 @@ private:
        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
        ASSERT(meta);
+        const std::size_t count = operation.GetOperandsCount();
+        const bool has_array = meta->sampler.IsArray();
+        const bool has_shadow = meta->sampler.IsShadow();
        std::string expr = func;
        expr += '(';
        expr += GetSampler(meta->sampler);
        expr += ", ";
-        expr += coord_constructors[meta->coords_count - 1];
+        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
+        for (std::size_t i = 0; i < count; ++i) {
-            const bool is_extra = i >= meta->coords_count;
+            expr += Visit(operation[i]);
-            const bool is_array = i == meta->array_index;
-            std::string operand = [&]() {
-                if (is_extra && is_extra_int) {
-                    if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
-                        return std::to_string(static_cast<s32>(immediate->GetValue()));
-                    } else {
-                        return "ftoi(" + Visit(operation[i]) + ')';
-                    }
-                } else {
-                    return Visit(operation[i]);
-                }
-            }();
-            if (is_array) {
-                ASSERT(!is_extra);
-                operand = "float(ftoi(" + operand + "))";
-            }
-            expr += operand;
+            const std::size_t next = i + 1;
+            if (next < count || has_array || has_shadow)
-            if (i + 1 == meta->coords_count) {
+                expr += ", ";
-                expr += ')';
+        }
-            }
+        if (has_array) {
-            if (i + 1 < count) {
+            expr += "float(ftoi(" + Visit(meta->array) + "))";
+        }
+        if (has_shadow) {
+            if (has_array)
                expr += ", ";
+            expr += Visit(meta->depth_compare);
+        }
+        expr += ')';
+        for (const Node extra : meta->extras) {
+            expr += ", ";
+            if (is_extra_int) {
+                if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
+                    // Inline the string as an immediate integer in GLSL (some extra arguments are
+                    // required to be constant)
+                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+                } else {
+                    expr += "ftoi(" + Visit(extra) + ')';
+                }
+            } else {
+                expr += Visit(extra);
            }
        }
        expr += ')';
        return expr;
    }
@@ -1134,7 +1152,7 @@ private:
                                  Type::HalfFloat);
    }
-    std::string F4Texture(Operation operation) {
+    std::string Texture(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);
@@ -1145,7 +1163,7 @@ private:
        return expr + GetSwizzle(meta->element);
    }
-    std::string F4TextureLod(Operation operation) {
+    std::string TextureLod(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);
@@ -1156,7 +1174,7 @@ private:
        return expr + GetSwizzle(meta->element);
    }
-    std::string F4TextureGather(Operation operation) {
+    std::string TextureGather(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);
@@ -1164,7 +1182,7 @@ private:
               GetSwizzle(meta->element);
    }
-    std::string F4TextureQueryDimensions(Operation operation) {
+    std::string TextureQueryDimensions(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);
@@ -1184,7 +1202,7 @@ private:
        return "0";
    }
-    std::string F4TextureQueryLod(Operation operation) {
+    std::string TextureQueryLod(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);
@@ -1195,29 +1213,33 @@ private:
        return "0";
    }
-    std::string F4TexelFetch(Operation operation) {
+    std::string TexelFetch(Operation operation) {
        constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
        ASSERT(meta);
+        UNIMPLEMENTED_IF(meta->sampler.IsArray());
+        const std::size_t count = operation.GetOperandsCount();
        std::string expr = "texelFetch(";
        expr += GetSampler(meta->sampler);
        expr += ", ";
-        expr += constructors[meta->coords_count - 1];
+        expr += constructors.at(operation.GetOperandsCount() - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
+        for (std::size_t i = 0; i < count; ++i) {
            expr += VisitOperand(operation, i, Type::Int);
+            const std::size_t next = i + 1;
-            if (i + 1 == meta->coords_count) {
+            if (next == count)
                expr += ')';
-            }
+            else if (next < count)
-            if (i + 1 < count) {
                expr += ", ";
-            }
+        }
+        for (std::size_t i = 0; i < meta->extras.size(); ++i) {
+            expr += ", ";
+            expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
        }
        expr += ')';
        return expr + GetSwizzle(meta->element);
    }
@@ -1454,12 +1476,12 @@ private:
        &GLSLDecompiler::Logical2HNotEqual,
        &GLSLDecompiler::Logical2HGreaterEqual,
-        &GLSLDecompiler::F4Texture,
+        &GLSLDecompiler::Texture,
-        &GLSLDecompiler::F4TextureLod,
+        &GLSLDecompiler::TextureLod,
-        &GLSLDecompiler::F4TextureGather,
+        &GLSLDecompiler::TextureGather,
-        &GLSLDecompiler::F4TextureQueryDimensions,
+        &GLSLDecompiler::TextureQueryDimensions,
-        &GLSLDecompiler::F4TextureQueryLod,
+        &GLSLDecompiler::TextureQueryLod,
-        &GLSLDecompiler::F4TexelFetch,
+        &GLSLDecompiler::TexelFetch,
        &GLSLDecompiler::Branch,
        &GLSLDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6476a9e1a..cca2ed708 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default;
 void RendererOpenGL::SwapBuffers(
    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-    Core::System::GetInstance().GetPerfStats().EndSystemFrame();
+    system.GetPerfStats().EndSystemFrame();
    // Maintain the rasterizer's state as a priority
    OpenGLState prev_state = OpenGLState::GetCurState();
@@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers(
    render_window.PollEvents();
-    Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
+    system.FrameLimiter().DoFrameLimiting(Core::Timing::GetGlobalTimeUs());
-    Core::System::GetInstance().GetPerfStats().BeginSystemFrame();
+    system.GetPerfStats().BeginSystemFrame();
    // Restore the rasterizer state
    prev_state.Apply();
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
new file mode 100644
index 000000000..ba25b5bc7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#pragma once
+#include <vulkan/vulkan.hpp>
+namespace Vulkan {
+// vulkan.hpp unique handlers use DispatchLoaderStatic
+template <typename T>
+using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
+using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
+using UniqueBuffer = UniqueHandle<vk::Buffer>;
+using UniqueBufferView = UniqueHandle<vk::BufferView>;
+using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
+using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
+using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
+using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
+using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
+using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
+using UniqueDevice = UniqueHandle<vk::Device>;
+using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
+using UniqueEvent = UniqueHandle<vk::Event>;
+using UniqueFence = UniqueHandle<vk::Fence>;
+using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
+using UniqueImage = UniqueHandle<vk::Image>;
+using UniqueImageView = UniqueHandle<vk::ImageView>;
+using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
+using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
+using UniquePipeline = UniqueHandle<vk::Pipeline>;
+using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
+using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
+using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
+using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
+using UniqueSampler = UniqueHandle<vk::Sampler>;
+using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
+using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
+using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
+using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
+using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
new file mode 100644
index 000000000..78a4e5f0e
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,231 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#include <map>
+#include <optional>
+#include <set>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+namespace Vulkan {
+namespace Alternatives {
+constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
+    vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
+constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
+    vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+} // namespace Alternatives
+constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
+    switch (format) {
+    case vk::Format::eD24UnormS8Uint:
+        return Alternatives::Depth24UnormS8Uint.data();
+    case vk::Format::eD16UnormS8Uint:
+        return Alternatives::Depth16UnormS8Uint.data();
+    default:
+        return nullptr;
+    }
+}
+constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
+                                                   FormatType format_type) {
+    switch (format_type) {
+    case FormatType::Linear:
+        return properties.linearTilingFeatures;
+    case FormatType::Optimal:
+        return properties.optimalTilingFeatures;
+    case FormatType::Buffer:
+        return properties.bufferFeatures;
+    default:
+        return {};
+    }
+}
+VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                   vk::SurfaceKHR surface)
+    : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
+    SetupFamilies(dldi, surface);
+    SetupProperties(dldi);
+}
+VKDevice::~VKDevice() = default;
+bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
+    const auto queue_cis = GetDeviceQueueCreateInfos();
+    vk::PhysicalDeviceFeatures device_features{};
+    const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+    const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
+                                         0, nullptr, static_cast<u32>(extensions.size()),
+                                         extensions.data(), &device_features);
+    vk::Device dummy_logical;
+    if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
+        return false;
+    }
+    dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
+    logical = UniqueDevice(
+        dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+    graphics_queue = logical->getQueue(graphics_family, 0, dld);
+    present_queue = logical->getQueue(present_family, 0, dld);
+    return true;
+}
+vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
+                                        vk::FormatFeatureFlags wanted_usage,
+                                        FormatType format_type) const {
+    if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
+        return wanted_format;
+    }
+    // The wanted format is not supported by hardware, search for alternatives
+    const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
+    if (alternatives == nullptr) {
+        LOG_CRITICAL(Render_Vulkan,
+                     "Format={} with usage={} and type={} has no defined alternatives and host "
+                     "hardware does not support it",
+                     static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                     static_cast<u32>(format_type));
+        UNREACHABLE();
+        return wanted_format;
+    }
+    std::size_t i = 0;
+    for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
+         alternative = alternatives[++i]) {
+        if (!IsFormatSupported(alternative, wanted_usage, format_type))
+            continue;
+        LOG_WARNING(Render_Vulkan,
+                    "Emulating format={} with alternative format={} with usage={} and type={}",
+                    static_cast<u32>(wanted_format), static_cast<u32>(alternative),
+                    static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
+        return alternative;
+    }
+    // No alternatives found, panic
+    LOG_CRITICAL(Render_Vulkan,
+                 "Format={} with usage={} and type={} is not supported by the host hardware and "
+                 "doesn't support any of the alternatives",
+                 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                 static_cast<u32>(format_type));
+    UNREACHABLE();
+    return wanted_format;
+}
+bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                 FormatType format_type) const {
+    const auto it = format_properties.find(wanted_format);
+    if (it == format_properties.end()) {
+        LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
+                     static_cast<u32>(wanted_format));
+        UNREACHABLE();
+        return true;
+    }
+    const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+    return (supported_usage & wanted_usage) == wanted_usage;
+}
+bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                          vk::SurfaceKHR surface) {
+    const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
+    bool has_swapchain{};
+    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+        has_swapchain |= prop.extensionName == swapchain_extension;
+    }
+    if (!has_swapchain) {
+        // The device doesn't support creating swapchains.
+        return false;
+    }
+    bool has_graphics{}, has_present{};
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        const auto& family = queue_family_properties[i];
+        if (family.queueCount == 0)
+            continue;
+        has_graphics |=
+            (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
+        has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
+    }
+    if (!has_graphics || !has_present) {
+        // The device doesn't have a graphics and present queue.
+        return false;
+    }
+    // TODO(Rodrigo): Check if the device matches all requeriments.
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    if (props.limits.maxUniformBufferRange < 65536) {
+        return false;
+    }
+    // Device is suitable.
+    return true;
+}
+void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
+    std::optional<u32> graphics_family_, present_family_;
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        if (graphics_family_ && present_family_)
+            break;
+        const auto& queue_family = queue_family_properties[i];
+        if (queue_family.queueCount == 0)
+            continue;
+        if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
+            graphics_family_ = i;
+        if (physical.getSurfaceSupportKHR(i, surface, dldi))
+            present_family_ = i;
+    }
+    ASSERT(graphics_family_ && present_family_);
+    graphics_family = *graphics_family_;
+    present_family = *present_family_;
+}
+void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    device_type = props.deviceType;
+    uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+}
+std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+    static const float QUEUE_PRIORITY = 1.f;
+    std::set<u32> unique_queue_families = {graphics_family, present_family};
+    std::vector<vk::DeviceQueueCreateInfo> queue_cis;
+    for (u32 queue_family : unique_queue_families)
+        queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
+    return queue_cis;
+}
+std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+    std::map<vk::Format, vk::FormatProperties> format_properties;
+    const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+        format_properties.emplace(format, physical.getFormatProperties(format, dldi));
+    };
+    AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
+    AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
+    AddFormatQuery(vk::Format::eD32Sfloat);
+    AddFormatQuery(vk::Format::eD16UnormS8Uint);
+    AddFormatQuery(vk::Format::eD24UnormS8Uint);
+    AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+    return format_properties;
+}
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
new file mode 100644
index 000000000..e87c7a508
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#pragma once
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+namespace Vulkan {
+/// Format usage descriptor
+enum class FormatType { Linear, Optimal, Buffer };
+/// Handles data specific to a physical device.
+class VKDevice final {
+public:
+    explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                      vk::SurfaceKHR surface);
+    ~VKDevice();
+    /// Initializes the device. Returns true on success.
+    bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
+    /**
+     * Returns a format supported by the device for the passed requeriments.
+     * @param wanted_format The ideal format to be returned. It may not be the returned format.
+     * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
+     * @param format_type Format type usage.
+     * @returns A format supported by the device.
+     */
+    vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                  FormatType format_type) const;
+    /// Returns the dispatch loader with direct function pointers of the device
+    const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
+        return dld;
+    }
+    /// Returns the logical device
+    vk::Device GetLogical() const {
+        return logical.get();
+    }
+    /// Returns the physical device.
+    vk::PhysicalDevice GetPhysical() const {
+        return physical;
+    }
+    /// Returns the main graphics queue.
+    vk::Queue GetGraphicsQueue() const {
+        return graphics_queue;
+    }
+    /// Returns the main present queue.
+    vk::Queue GetPresentQueue() const {
+        return present_queue;
+    }
+    /// Returns main graphics queue family index.
+    u32 GetGraphicsFamily() const {
+        return graphics_family;
+    }
+    /// Returns main present queue family index.
+    u32 GetPresentFamily() const {
+        return present_family;
+    }
+    /// Returns if the device is integrated with the host CPU
+    bool IsIntegrated() const {
+        return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
+    }
+    /// Returns uniform buffer alignment requeriment
+    u64 GetUniformBufferAlignment() const {
+        return uniform_buffer_alignment;
+    }
+    /// Checks if the physical device is suitable.
+    static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                           vk::SurfaceKHR surface);
+private:
+    /// Sets up queue families.
+    void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
+    /// Sets up device properties.
+    void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
+    /// Returns a list of queue initialization descriptors.
+    std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
+    /// Returns true if a format is supported.
+    bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                           FormatType format_type) const;
+    /// Returns the device properties for Vulkan formats.
+    static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+        const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
+    const vk::PhysicalDevice physical;  ///< Physical device
+    vk::DispatchLoaderDynamic dld;      ///< Device function pointers
+    UniqueDevice logical;               ///< Logical device
+    vk::Queue graphics_queue;           ///< Main graphics queue
+    vk::Queue present_queue;            ///< Main present queue
+    u32 graphics_family{};              ///< Main graphics queue family index
+    u32 present_family{};               ///< Main present queue family index
+    vk::PhysicalDeviceType device_type; ///< Physical device type
+    u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment
+    std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+};
+} // namespace Vulkan
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 38bb692d6..9fd4b273e 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod
    SetRegister(bb, dest, value);
 }
-} // namespace VideoCommon::Shader
-\ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index a992f73f8..55a6fbbf2 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
        value = [&]() {
            switch (instr.conversion.f2i.rounding) {
-            case Tegra::Shader::F2iRoundingOp::None:
+            case Tegra::Shader::F2iRoundingOp::RoundEven:
-                return value;
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Floor:
                return Operation(OperationCode::FFloor, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Ceil:
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    return pc;
 }
-} // namespace VideoCommon::Shader
-\ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e006f8138..55ec601ff 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -306,7 +306,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    case OpCode::Id::TLD4S: {
        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
                             "AOFFI is not implemented");
        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
        }
@@ -315,9 +314,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        const Node op_a = GetRegister(instr.gpr8);
        const Node op_b = GetRegister(instr.gpr20);
-        std::vector<Node> coords;
        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        std::vector<Node> coords;
        if (depth_compare) {
            // Note: TLD4S coordinate encoding works just like TEXS's
            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
@@ -328,18 +326,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            coords.push_back(op_a);
            coords.push_back(op_b);
        }
-        const auto num_coords = static_cast<u32>(coords.size());
+        std::vector<Node> extras;
-        coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+        extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
        const auto& sampler =
            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
-            auto params = coords;
+            auto coords_copy = coords;
-            MetaTexture meta{sampler, element, num_coords};
+            MetaTexture meta{sampler, {}, {}, extras, element};
-            values[element] =
+            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
-                Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
        }
        WriteTexsInstructionFloat(bb, instr, values);
@@ -360,12 +357,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        switch (instr.txq.query_type) {
        case Tegra::Shader::TextureQueryType::Dimension: {
            for (u32 element = 0; element < 4; ++element) {
-                if (instr.txq.IsComponentEnabled(element)) {
+                if (!instr.txq.IsComponentEnabled(element)) {
-                    MetaTexture meta{sampler, element};
+                    continue;
-                    const Node value = Operation(OperationCode::F4TextureQueryDimensions,
-                                                 std::move(meta), GetRegister(instr.gpr8));
-                    SetTemporal(bb, indexer++, value);
                }
+                MetaTexture meta{sampler, {}, {}, {}, element};
+                const Node value =
+                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+                SetTemporal(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
@@ -412,9 +410,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        for (u32 element = 0; element < 2; ++element) {
            auto params = coords;
-            MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
+            MetaTexture meta{sampler, {}, {}, {}, element};
-            const Node value =
+            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
-                Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
            SetTemporal(bb, element, value);
        }
        for (u32 element = 0; element < 2; ++element) {
@@ -432,7 +429,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
        }
        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
@@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
 }
 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
-                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                               TextureProcessMode process_mode, std::vector<Node> coords,
-                               std::size_t array_offset, std::size_t bias_offset,
+                               Node array, Node depth_compare, u32 bias_offset) {
-                               std::vector<Node>&& coords) {
+    const bool is_array = array;
-    UNIMPLEMENTED_IF_MSG(
+    const bool is_shadow = depth_compare;
-        (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
-            (texture_type == TextureType::TextureCube && is_array && depth_compare),
-        "This method is not supported.");
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
+                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
+                         "This method is not supported.");
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                            process_mode == TextureProcessMode::LL ||
@@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
    // LOD selection (either via bias or explicit textureLod) not supported in GL for
    // sampler2DArrayShadow and samplerCubeArrayShadow.
    const bool gl_lod_supported =
-        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
+        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
-          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
    const OperationCode read_method =
-        lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
+        lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
-    std::optional<u32> array_offset_value;
+    std::vector<Node> extras;
-    if (is_array)
-        array_offset_value = static_cast<u32>(array_offset);
-    const auto coords_count = static_cast<u32>(coords.size());
    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
        if (process_mode == TextureProcessMode::LZ) {
-            coords.push_back(Immediate(0.0f));
+            extras.push_back(Immediate(0.0f));
        } else {
            // If present, lod or bias are always stored in the register indexed by the gpr20
            // field with an offset depending on the usage of the other registers
-            coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+            extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
        }
    }
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
+        auto copy_coords = coords;
-        MetaTexture meta{sampler, element, coords_count, array_offset_value};
+        MetaTexture meta{sampler, array, depth_compare, extras, element};
-        values[element] = Operation(read_method, std::move(meta), std::move(params));
+        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }
    return values;
@@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
    }
-    // 1D.DC in opengl the 2nd component is ignored.
+    // 1D.DC in OpenGL the 2nd component is ignored.
    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
        coords.push_back(Immediate(0.0f));
    }
-    std::size_t array_offset{};
-    if (is_array) {
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
-        array_offset = coords.size();
-        coords.push_back(GetRegister(array_register));
+    Node dc{};
-    }
    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
-        // or in the next register if lod or bias are used
+        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        coords.push_back(GetRegister(depth_register));
+        dc = GetRegister(depth_register);
-    }
-    // Fill ignored coordinates
-    while (coords.size() < total_coord_count) {
-        coords.push_back(Immediate(0));
    }
-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
-                          0, std::move(coords));
 }
 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
            ? static_cast<u64>(instr.gpr20.Value())
            : coord_register + 1;
+    const u32 bias_offset = coord_count > 2 ? 1 : 0;
    std::vector<Node> coords;
    for (std::size_t i = 0; i < coord_count; ++i) {
@@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }
-    std::size_t array_offset{};
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
-    if (is_array) {
-        array_offset = coords.size();
+    Node dc{};
-        coords.push_back(GetRegister(array_register));
-    }
    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
-        // or in the next register if lod or bias are used
+        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        coords.push_back(GetRegister(depth_register));
+        dc = GetRegister(depth_register);
-    }
-    // Fill ignored coordinates
-    while (coords.size() < total_coord_count) {
-        coords.push_back(Immediate(0));
    }
-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
-                          (coord_count > 2 ? 1 : 0), std::move(coords));
 }
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
@@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    const u64 coord_register = array_register + (is_array ? 1 : 0);
    std::vector<Node> coords;
+    for (size_t i = 0; i < coord_count; ++i)
-    for (size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
-    }
-    std::optional<u32> array_offset;
-    if (is_array) {
-        array_offset = static_cast<u32>(coords.size());
-        coords.push_back(GetRegister(array_register));
-    }
    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
+        auto coords_copy = coords;
-        MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
+        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
-        values[element] =
+        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
-            Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
    }
    return values;
@@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
    const std::size_t type_coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
    // If enabled arrays index is always stored in the gpr8 field
@@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
            : coord_register + 1;
    std::vector<Node> coords;
    for (std::size_t i = 0; i < type_coord_count; ++i) {
        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }
-    std::optional<u32> array_offset;
-    if (is_array) {
-        array_offset = static_cast<u32>(coords.size());
-        coords.push_back(GetRegister(array_register));
-    }
-    const auto coords_count = static_cast<u32>(coords.size());
-    if (lod_enabled) {
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
-        // When lod is used always is in grp20
+    // When lod is used always is in gpr20
-        coords.push_back(GetRegister(instr.gpr20));
+    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
-    } else {
-        coords.push_back(Immediate(0));
-    }
    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
+        auto coords_copy = coords;
-        MetaTexture meta{sampler, element, coords_count, array_offset};
+        MetaTexture meta{sampler, array, {}, {lod}, element};
-        values[element] =
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
-            Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
    }
    return values;
 }
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1d4fbef53..52c7f2c4e 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -156,12 +156,12 @@ enum class OperationCode {
    Logical2HNotEqual,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    F4Texture,                /// (MetaTexture, float[N] coords, float[M] params) -> float4
+    Texture,                /// (MetaTexture, float[N] coords) -> float4
-    F4TextureLod,             /// (MetaTexture, float[N] coords, float[M] params) -> float4
+    TextureLod,             /// (MetaTexture, float[N] coords) -> float4
-    F4TextureGather,          /// (MetaTexture, float[N] coords, float[M] params) -> float4
+    TextureGather,          /// (MetaTexture, float[N] coords) -> float4
-    F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4
+    TextureQueryDimensions, /// (MetaTexture, float a) -> float4
-    F4TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
+    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
-    F4TexelFetch,             /// (MetaTexture, int[N], int) -> float4
+    TexelFetch,             /// (MetaTexture, int[N], int) -> float4
    Branch,        /// (uint branch_target) -> void
    PushFlowStack, /// (uint branch_target) -> void
@@ -288,9 +288,10 @@ struct MetaHalfArithmetic {
 struct MetaTexture {
    const Sampler& sampler;
+    Node array{};
+    Node depth_compare{};
+    std::vector<Node> extras;
    u32 element{};
-    u32 coords_count{};
-    std::optional<u32> array_index;
 };
 constexpr MetaArithmetic PRECISE = {true};
@@ -754,9 +755,8 @@ private:
        bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                         Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
+                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         bool is_array, std::size_t array_offset, std::size_t bias_offset,
+                         Node array, Node depth_compare, u32 bias_offset);
-                         std::vector<Node>&& coords);
    Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                         u64 byte_height);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 2f6612a35..044ba116a 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -426,6 +426,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
    switch (format) {
    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
        return PixelFormat::ABGR8U;
+    case Tegra::FramebufferConfig::PixelFormat::BGRA8:
+        return PixelFormat::BGRA8;
    default:
        LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
        UNREACHABLE();