summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/alignment.h60
-rw-r--r--src/core/CMakeLists.txt3
-rw-r--r--src/core/core.cpp12
-rw-r--r--src/core/core.h10
-rw-r--r--src/core/hardware_interrupt_manager.cpp30
-rw-r--r--src/core/hardware_interrupt_manager.h31
-rw-r--r--src/core/hle/kernel/code_set.h3
-rw-r--r--src/core/hle/kernel/physical_memory.h19
-rw-r--r--src/core/hle/kernel/process.cpp6
-rw-r--r--src/core/hle/kernel/shared_memory.cpp6
-rw-r--r--src/core/hle/kernel/shared_memory.h13
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp2
-rw-r--r--src/core/hle/kernel/transfer_memory.h3
-rw-r--r--src/core/hle/kernel/vm_manager.cpp15
-rw-r--r--src/core/hle/kernel/vm_manager.h9
-rw-r--r--src/core/hle/service/audio/audio.cpp6
-rw-r--r--src/core/hle/service/audio/audio.h6
-rw-r--r--src/core/hle/service/audio/audout_u.cpp36
-rw-r--r--src/core/hle/service/audio/audout_u.h12
-rw-r--r--src/core/hle/service/audio/audren_u.cpp200
-rw-r--r--src/core/hle/service/audio/audren_u.h25
-rw-r--r--src/core/hle/service/ns/pl_u.cpp12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp152
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp44
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h41
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h5
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp48
-rw-r--r--src/core/hle/service/nvdrv/interface.h4
-rw-r--r--src/core/hle/service/nvdrv/nvdata.h48
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp59
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h88
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp23
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h11
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp23
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h4
-rw-r--r--src/core/hle/service/service.cpp4
-rw-r--r--src/core/hle/service/vi/vi.cpp48
-rw-r--r--src/core/loader/elf.cpp2
-rw-r--r--src/core/loader/kip.cpp2
-rw-r--r--src/core/loader/nro.cpp2
-rw-r--r--src/core/loader/nso.cpp2
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache.h299
-rw-r--r--src/video_core/buffer_cache/buffer_block.h76
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h447
-rw-r--r--src/video_core/buffer_cache/map_interval.h89
-rw-r--r--src/video_core/dma_pusher.cpp1
-rw-r--r--src/video_core/engines/fermi_2d.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp15
-rw-r--r--src/video_core/engines/maxwell_dma.cpp8
-rw-r--r--src/video_core/engines/maxwell_dma.h9
-rw-r--r--src/video_core/engines/shader_bytecode.h25
-rw-r--r--src/video_core/gpu.cpp76
-rw-r--r--src/video_core/gpu.h49
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h3
-rw-r--r--src/video_core/gpu_synch.cpp2
-rw-r--r--src/video_core/gpu_synch.h4
-rw-r--r--src/video_core/gpu_thread.cpp27
-rw-r--r--src/video_core/gpu_thread.h32
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h39
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp67
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp43
-rw-r--r--src/video_core/shader/control_flow.cpp47
-rw-r--r--src/video_core/shader/control_flow.h30
-rw-r--r--src/video_core/shader/decode.cpp5
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp13
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp30
-rw-r--r--src/video_core/shader/decode/ffma.cpp10
-rw-r--r--src/video_core/shader/decode/float_set.cpp1
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp10
-rw-r--r--src/video_core/shader/decode/hfma2.cpp4
-rw-r--r--src/video_core/shader/decode/integer_set.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/other.cpp13
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp1
-rw-r--r--src/video_core/shader/decode/warp.cpp55
-rw-r--r--src/video_core/shader/node.h30
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h4
-rw-r--r--src/video_core/shader/track.cpp4
-rw-r--r--src/video_core/texture_cache/surface_base.cpp5
-rw-r--r--src/video_core/texture_cache/surface_params.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h2
-rw-r--r--src/video_core/textures/texture.h2
-rw-r--r--src/yuzu/CMakeLists.txt55
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_debug.cpp4
-rw-r--r--src/yuzu/configuration/configure_gamelist.cpp2
-rw-r--r--src/yuzu/configuration/configure_general.cpp2
-rw-r--r--src/yuzu/configuration/configure_input.cpp12
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp10
-rw-r--r--src/yuzu/configuration/configure_input_simple.cpp4
-rw-r--r--src/yuzu/configuration/configure_mouse_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_per_general.cpp2
-rw-r--r--src/yuzu/configuration/configure_profile_manager.cpp8
-rw-r--r--src/yuzu/configuration/configure_touchscreen_advanced.cpp2
-rw-r--r--src/yuzu/configuration/configure_web.cpp2
-rw-r--r--src/yuzu/debugger/console.cpp2
-rw-r--r--src/yuzu/discord_impl.cpp2
-rw-r--r--src/yuzu/game_list.cpp2
-rw-r--r--src/yuzu/game_list_p.h2
-rw-r--r--src/yuzu/game_list_worker.cpp2
-rw-r--r--src/yuzu/hotkeys.cpp2
-rw-r--r--src/yuzu/main.cpp17
-rw-r--r--src/yuzu/uisettings.cpp (renamed from src/yuzu/ui_settings.cpp)2
-rw-r--r--src/yuzu/uisettings.h (renamed from src/yuzu/ui_settings.h)0
-rw-r--r--src/yuzu_tester/yuzu.cpp3
139 files changed, 2185 insertions, 908 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2b4266f29..01abdb3bb 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -55,6 +55,7 @@ add_custom_command(OUTPUT scm_rev.cpp
55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" 55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
56 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
57 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/warp.cpp"
58 "${VIDEO_CORE}/shader/decode/xmad.cpp" 59 "${VIDEO_CORE}/shader/decode/xmad.cpp"
59 "${VIDEO_CORE}/shader/control_flow.cpp" 60 "${VIDEO_CORE}/shader/control_flow.cpp"
60 "${VIDEO_CORE}/shader/control_flow.h" 61 "${VIDEO_CORE}/shader/control_flow.h"
diff --git a/src/common/alignment.h b/src/common/alignment.h
index 617b14d9b..88d5d3a65 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -3,6 +3,7 @@
3#pragma once 3#pragma once
4 4
5#include <cstddef> 5#include <cstddef>
6#include <memory>
6#include <type_traits> 7#include <type_traits>
7 8
8namespace Common { 9namespace Common {
@@ -37,4 +38,63 @@ constexpr bool IsWordAligned(T value) {
37 return (value & 0b11) == 0; 38 return (value & 0b11) == 0;
38} 39}
39 40
41template <typename T, std::size_t Align = 16>
42class AlignmentAllocator {
43public:
44 using value_type = T;
45 using size_type = std::size_t;
46 using difference_type = std::ptrdiff_t;
47
48 using pointer = T*;
49 using const_pointer = const T*;
50
51 using reference = T&;
52 using const_reference = const T&;
53
54public:
55 pointer address(reference r) noexcept {
56 return std::addressof(r);
57 }
58
59 const_pointer address(const_reference r) const noexcept {
60 return std::addressof(r);
61 }
62
63 pointer allocate(size_type n) {
64 return static_cast<pointer>(::operator new (n, std::align_val_t{Align}));
65 }
66
67 void deallocate(pointer p, size_type) {
68 ::operator delete (p, std::align_val_t{Align});
69 }
70
71 void construct(pointer p, const value_type& wert) {
72 new (p) value_type(wert);
73 }
74
75 void destroy(pointer p) {
76 p->~value_type();
77 }
78
79 size_type max_size() const noexcept {
80 return size_type(-1) / sizeof(value_type);
81 }
82
83 template <typename T2>
84 struct rebind {
85 using other = AlignmentAllocator<T2, Align>;
86 };
87
88 bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept {
89 return !(*this == other);
90 }
91
92 // Returns true if and only if storage allocated from *this
93 // can be deallocated from other, and vice versa.
94 // Always returns true for stateless allocators.
95 bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept {
96 return true;
97 }
98};
99
40} // namespace Common 100} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f4325f0f8..5462decee 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -111,6 +111,8 @@ add_library(core STATIC
111 frontend/scope_acquire_window_context.h 111 frontend/scope_acquire_window_context.h
112 gdbstub/gdbstub.cpp 112 gdbstub/gdbstub.cpp
113 gdbstub/gdbstub.h 113 gdbstub/gdbstub.h
114 hardware_interrupt_manager.cpp
115 hardware_interrupt_manager.h
114 hle/ipc.h 116 hle/ipc.h
115 hle/ipc_helpers.h 117 hle/ipc_helpers.h
116 hle/kernel/address_arbiter.cpp 118 hle/kernel/address_arbiter.cpp
@@ -372,6 +374,7 @@ add_library(core STATIC
372 hle/service/nvdrv/devices/nvmap.h 374 hle/service/nvdrv/devices/nvmap.h
373 hle/service/nvdrv/interface.cpp 375 hle/service/nvdrv/interface.cpp
374 hle/service/nvdrv/interface.h 376 hle/service/nvdrv/interface.h
377 hle/service/nvdrv/nvdata.h
375 hle/service/nvdrv/nvdrv.cpp 378 hle/service/nvdrv/nvdrv.cpp
376 hle/service/nvdrv/nvdrv.h 379 hle/service/nvdrv/nvdrv.h
377 hle/service/nvdrv/nvmemp.cpp 380 hle/service/nvdrv/nvmemp.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 4aceee785..20d64f3b0 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -19,6 +19,7 @@
19#include "core/file_sys/vfs_concat.h" 19#include "core/file_sys/vfs_concat.h"
20#include "core/file_sys/vfs_real.h" 20#include "core/file_sys/vfs_real.h"
21#include "core/gdbstub/gdbstub.h" 21#include "core/gdbstub/gdbstub.h"
22#include "core/hardware_interrupt_manager.h"
22#include "core/hle/kernel/client_port.h" 23#include "core/hle/kernel/client_port.h"
23#include "core/hle/kernel/kernel.h" 24#include "core/hle/kernel/kernel.h"
24#include "core/hle/kernel/process.h" 25#include "core/hle/kernel/process.h"
@@ -151,7 +152,7 @@ struct System::Impl {
151 if (!renderer->Init()) { 152 if (!renderer->Init()) {
152 return ResultStatus::ErrorVideoCore; 153 return ResultStatus::ErrorVideoCore;
153 } 154 }
154 155 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
155 gpu_core = VideoCore::CreateGPU(system); 156 gpu_core = VideoCore::CreateGPU(system);
156 157
157 is_powered_on = true; 158 is_powered_on = true;
@@ -298,6 +299,7 @@ struct System::Impl {
298 std::unique_ptr<VideoCore::RendererBase> renderer; 299 std::unique_ptr<VideoCore::RendererBase> renderer;
299 std::unique_ptr<Tegra::GPU> gpu_core; 300 std::unique_ptr<Tegra::GPU> gpu_core;
300 std::shared_ptr<Tegra::DebugContext> debug_context; 301 std::shared_ptr<Tegra::DebugContext> debug_context;
302 std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager;
301 CpuCoreManager cpu_core_manager; 303 CpuCoreManager cpu_core_manager;
302 bool is_powered_on = false; 304 bool is_powered_on = false;
303 305
@@ -444,6 +446,14 @@ const Tegra::GPU& System::GPU() const {
444 return *impl->gpu_core; 446 return *impl->gpu_core;
445} 447}
446 448
449Core::Hardware::InterruptManager& System::InterruptManager() {
450 return *impl->interrupt_manager;
451}
452
453const Core::Hardware::InterruptManager& System::InterruptManager() const {
454 return *impl->interrupt_manager;
455}
456
447VideoCore::RendererBase& System::Renderer() { 457VideoCore::RendererBase& System::Renderer() {
448 return *impl->renderer; 458 return *impl->renderer;
449} 459}
diff --git a/src/core/core.h b/src/core/core.h
index 8ebb385ac..0138d93b0 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -70,6 +70,10 @@ namespace Core::Timing {
70class CoreTiming; 70class CoreTiming;
71} 71}
72 72
73namespace Core::Hardware {
74class InterruptManager;
75}
76
73namespace Core { 77namespace Core {
74 78
75class ARM_Interface; 79class ARM_Interface;
@@ -234,6 +238,12 @@ public:
234 /// Provides a constant reference to the core timing instance. 238 /// Provides a constant reference to the core timing instance.
235 const Timing::CoreTiming& CoreTiming() const; 239 const Timing::CoreTiming& CoreTiming() const;
236 240
241 /// Provides a reference to the interrupt manager instance.
242 Core::Hardware::InterruptManager& InterruptManager();
243
244 /// Provides a constant reference to the interrupt manager instance.
245 const Core::Hardware::InterruptManager& InterruptManager() const;
246
237 /// Provides a reference to the kernel instance. 247 /// Provides a reference to the kernel instance.
238 Kernel::KernelCore& Kernel(); 248 Kernel::KernelCore& Kernel();
239 249
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp
new file mode 100644
index 000000000..c2115db2d
--- /dev/null
+++ b/src/core/hardware_interrupt_manager.cpp
@@ -0,0 +1,30 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/core_timing.h"
7#include "core/hardware_interrupt_manager.h"
8#include "core/hle/service/nvdrv/interface.h"
9#include "core/hle/service/sm/sm.h"
10
11namespace Core::Hardware {
12
13InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
14 gpu_interrupt_event =
15 system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) {
16 auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
17 const u32 syncpt = static_cast<u32>(message >> 32);
18 const u32 value = static_cast<u32>(message);
19 nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
20 });
21}
22
23InterruptManager::~InterruptManager() = default;
24
25void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
26 const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value;
27 system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg);
28}
29
30} // namespace Core::Hardware
diff --git a/src/core/hardware_interrupt_manager.h b/src/core/hardware_interrupt_manager.h
new file mode 100644
index 000000000..494db883a
--- /dev/null
+++ b/src/core/hardware_interrupt_manager.h
@@ -0,0 +1,31 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Core {
10class System;
11}
12
13namespace Core::Timing {
14struct EventType;
15}
16
17namespace Core::Hardware {
18
19class InterruptManager {
20public:
21 explicit InterruptManager(Core::System& system);
22 ~InterruptManager();
23
24 void GPUInterruptSyncpt(u32 syncpoint_id, u32 value);
25
26private:
27 Core::System& system;
28 Core::Timing::EventType* gpu_interrupt_event{};
29};
30
31} // namespace Core::Hardware
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index 879957dcb..d8ad54030 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/physical_memory.h"
11 12
12namespace Kernel { 13namespace Kernel {
13 14
@@ -77,7 +78,7 @@ struct CodeSet final {
77 } 78 }
78 79
79 /// The overall data that backs this code set. 80 /// The overall data that backs this code set.
80 std::vector<u8> memory; 81 Kernel::PhysicalMemory memory;
81 82
82 /// The segments that comprise this code set. 83 /// The segments that comprise this code set.
83 std::array<Segment, 3> segments; 84 std::array<Segment, 3> segments;
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
new file mode 100644
index 000000000..090565310
--- /dev/null
+++ b/src/core/hle/kernel/physical_memory.h
@@ -0,0 +1,19 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/alignment.h"
8
9namespace Kernel {
10
11// This encapsulation serves 2 purposes:
12// - First, to encapsulate host physical memory under a single type and set an
13// standard for managing it.
14// - Second to ensure all host backing memory used is aligned to 256 bytes due
15// to strict alignment restrictions on GPU memory.
16
17using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
18
19} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 92169a97b..e80a12ac3 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -247,7 +247,7 @@ VAddr Process::CreateTLSRegion() {
247 ASSERT(region_address.Succeeded()); 247 ASSERT(region_address.Succeeded());
248 248
249 const auto map_result = vm_manager.MapMemoryBlock( 249 const auto map_result = vm_manager.MapMemoryBlock(
250 *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0, 250 *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0,
251 Memory::PAGE_SIZE, MemoryState::ThreadLocal); 251 Memory::PAGE_SIZE, MemoryState::ThreadLocal);
252 ASSERT(map_result.Succeeded()); 252 ASSERT(map_result.Succeeded());
253 253
@@ -277,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
277} 277}
278 278
279void Process::LoadModule(CodeSet module_, VAddr base_addr) { 279void Process::LoadModule(CodeSet module_, VAddr base_addr) {
280 const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory)); 280 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
281 281
282 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, 282 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
283 MemoryState memory_state) { 283 MemoryState memory_state) {
@@ -327,7 +327,7 @@ void Process::AllocateMainThreadStack(u64 stack_size) {
327 // Allocate and map the main thread stack 327 // Allocate and map the main thread stack
328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; 328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
329 vm_manager 329 vm_manager
330 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), 330 .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size),
331 0, main_thread_stack_size, MemoryState::Stack) 331 0, main_thread_stack_size, MemoryState::Stack)
332 .Unwrap(); 332 .Unwrap();
333} 333}
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index f15c5ee36..a815c4eea 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
28 shared_memory->other_permissions = other_permissions; 28 shared_memory->other_permissions = other_permissions;
29 29
30 if (address == 0) { 30 if (address == 0) {
31 shared_memory->backing_block = std::make_shared<std::vector<u8>>(size); 31 shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size);
32 shared_memory->backing_block_offset = 0; 32 shared_memory->backing_block_offset = 0;
33 33
34 // Refresh the address mappings for the current process. 34 // Refresh the address mappings for the current process.
@@ -59,8 +59,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
59} 59}
60 60
61SharedPtr<SharedMemory> SharedMemory::CreateForApplet( 61SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
62 KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size, 62 KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
63 MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { 63 u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
64 SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); 64 SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));
65 65
66 shared_memory->owner_process = nullptr; 66 shared_memory->owner_process = nullptr;
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index c2b6155e1..01ca6dcd2 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -10,6 +10,7 @@
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
13#include "core/hle/kernel/physical_memory.h"
13#include "core/hle/kernel/process.h" 14#include "core/hle/kernel/process.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15 16
@@ -62,12 +63,10 @@ public:
62 * block. 63 * block.
63 * @param name Optional object name, used for debugging purposes. 64 * @param name Optional object name, used for debugging purposes.
64 */ 65 */
65 static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel, 66 static SharedPtr<SharedMemory> CreateForApplet(
66 std::shared_ptr<std::vector<u8>> heap_block, 67 KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
67 std::size_t offset, u64 size, 68 u64 size, MemoryPermission permissions, MemoryPermission other_permissions,
68 MemoryPermission permissions, 69 std::string name = "Unknown Applet");
69 MemoryPermission other_permissions,
70 std::string name = "Unknown Applet");
71 70
72 std::string GetTypeName() const override { 71 std::string GetTypeName() const override {
73 return "SharedMemory"; 72 return "SharedMemory";
@@ -135,7 +134,7 @@ private:
135 ~SharedMemory() override; 134 ~SharedMemory() override;
136 135
137 /// Backing memory for this shared memory block. 136 /// Backing memory for this shared memory block.
138 std::shared_ptr<std::vector<u8>> backing_block; 137 std::shared_ptr<PhysicalMemory> backing_block;
139 /// Offset into the backing block for this shared memory. 138 /// Offset into the backing block for this shared memory.
140 std::size_t backing_block_offset = 0; 139 std::size_t backing_block_offset = 0;
141 /// Size of the memory block. Page-aligned. 140 /// Size of the memory block. Page-aligned.
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 26c4e5e67..1113c815e 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
47 return ERR_INVALID_STATE; 47 return ERR_INVALID_STATE;
48 } 48 }
49 49
50 backing_block = std::make_shared<std::vector<u8>>(size); 50 backing_block = std::make_shared<PhysicalMemory>(size);
51 51
52 const auto map_state = owner_permissions == MemoryPermission::None 52 const auto map_state = owner_permissions == MemoryPermission::None
53 ? MemoryState::TransferMemoryIsolated 53 ? MemoryState::TransferMemoryIsolated
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
index a140b1e2b..6be9dc094 100644
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "core/hle/kernel/object.h" 10#include "core/hle/kernel/object.h"
11#include "core/hle/kernel/physical_memory.h"
11 12
12union ResultCode; 13union ResultCode;
13 14
@@ -82,7 +83,7 @@ private:
82 ~TransferMemory() override; 83 ~TransferMemory() override;
83 84
84 /// Memory block backing this instance. 85 /// Memory block backing this instance.
85 std::shared_ptr<std::vector<u8>> backing_block; 86 std::shared_ptr<PhysicalMemory> backing_block;
86 87
87 /// The base address for the memory managed by this instance. 88 /// The base address for the memory managed by this instance.
88 VAddr base_address = 0; 89 VAddr base_address = 0;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 4f45fb03b..40cea1e7c 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -5,6 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <iterator> 6#include <iterator>
7#include <utility> 7#include <utility>
8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
10#include "common/memory_hook.h" 11#include "common/memory_hook.h"
@@ -103,7 +104,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
103} 104}
104 105
105ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, 106ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
106 std::shared_ptr<std::vector<u8>> block, 107 std::shared_ptr<PhysicalMemory> block,
107 std::size_t offset, u64 size, 108 std::size_t offset, u64 size,
108 MemoryState state, VMAPermission perm) { 109 MemoryState state, VMAPermission perm) {
109 ASSERT(block != nullptr); 110 ASSERT(block != nullptr);
@@ -260,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
260 261
261 if (heap_memory == nullptr) { 262 if (heap_memory == nullptr) {
262 // Initialize heap 263 // Initialize heap
263 heap_memory = std::make_shared<std::vector<u8>>(size); 264 heap_memory = std::make_shared<PhysicalMemory>(size);
264 heap_end = heap_region_base + size; 265 heap_end = heap_region_base + size;
265 } else { 266 } else {
266 UnmapRange(heap_region_base, GetCurrentHeapSize()); 267 UnmapRange(heap_region_base, GetCurrentHeapSize());
@@ -341,7 +342,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
341 const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); 342 const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
342 if (vma.state == MemoryState::Unmapped) { 343 if (vma.state == MemoryState::Unmapped) {
343 const auto map_res = 344 const auto map_res =
344 MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0, 345 MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0,
345 map_size, MemoryState::Heap, VMAPermission::ReadWrite); 346 map_size, MemoryState::Heap, VMAPermission::ReadWrite);
346 result = map_res.Code(); 347 result = map_res.Code();
347 if (result.IsError()) { 348 if (result.IsError()) {
@@ -442,7 +443,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
442 if (result.IsError()) { 443 if (result.IsError()) {
443 for (const auto [map_address, map_size] : unmapped_regions) { 444 for (const auto [map_address, map_size] : unmapped_regions) {
444 const auto remap_res = 445 const auto remap_res =
445 MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0, 446 MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0,
446 map_size, MemoryState::Heap, VMAPermission::None); 447 map_size, MemoryState::Heap, VMAPermission::None);
447 ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); 448 ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
448 } 449 }
@@ -593,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
593 ASSERT_MSG(vma_offset + size <= vma->second.size, 594 ASSERT_MSG(vma_offset + size <= vma->second.size,
594 "Shared memory exceeds bounds of mapped block"); 595 "Shared memory exceeds bounds of mapped block");
595 596
596 const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block; 597 const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block;
597 const std::size_t backing_block_offset = vma->second.offset + vma_offset; 598 const std::size_t backing_block_offset = vma->second.offset + vma_offset;
598 599
599 CASCADE_RESULT(auto new_vma, 600 CASCADE_RESULT(auto new_vma,
@@ -606,7 +607,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
606 return RESULT_SUCCESS; 607 return RESULT_SUCCESS;
607} 608}
608 609
609void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { 610void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) {
610 // If this ever proves to have a noticeable performance impact, allow users of the function to 611 // If this ever proves to have a noticeable performance impact, allow users of the function to
611 // specify a specific range of addresses to limit the scan to. 612 // specify a specific range of addresses to limit the scan to.
612 for (const auto& p : vma_map) { 613 for (const auto& p : vma_map) {
@@ -764,7 +765,7 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
764 right.backing_block->begin() + right.offset + right.size); 765 right.backing_block->begin() + right.offset + right.size);
765 } else { 766 } else {
766 // Slow case: make a new memory block for left and right. 767 // Slow case: make a new memory block for left and right.
767 auto new_memory = std::make_shared<std::vector<u8>>(); 768 auto new_memory = std::make_shared<PhysicalMemory>();
768 new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, 769 new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
769 left.backing_block->begin() + left.offset + left.size); 770 left.backing_block->begin() + left.offset + left.size);
770 new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, 771 new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 0aecb7499..b18cde619 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h" 12#include "common/memory_hook.h"
13#include "common/page_table.h" 13#include "common/page_table.h"
14#include "core/hle/kernel/physical_memory.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15#include "core/memory.h" 16#include "core/memory.h"
16 17
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 291
291 // Settings for type = AllocatedMemoryBlock 292 // Settings for type = AllocatedMemoryBlock
292 /// Memory block backing this VMA. 293 /// Memory block backing this VMA.
293 std::shared_ptr<std::vector<u8>> backing_block = nullptr; 294 std::shared_ptr<PhysicalMemory> backing_block = nullptr;
294 /// Offset into the backing_memory the mapping starts from. 295 /// Offset into the backing_memory the mapping starts from.
295 std::size_t offset = 0; 296 std::size_t offset = 0;
296 297
@@ -348,7 +349,7 @@ public:
348 * @param size Size of the mapping. 349 * @param size Size of the mapping.
349 * @param state MemoryState tag to attach to the VMA. 350 * @param state MemoryState tag to attach to the VMA.
350 */ 351 */
351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, 352 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block,
352 std::size_t offset, u64 size, MemoryState state, 353 std::size_t offset, u64 size, MemoryState state,
353 VMAPermission perm = VMAPermission::ReadWrite); 354 VMAPermission perm = VMAPermission::ReadWrite);
354 355
@@ -547,7 +548,7 @@ public:
547 * Scans all VMAs and updates the page table range of any that use the given vector as backing 548 * Scans all VMAs and updates the page table range of any that use the given vector as backing
548 * memory. This should be called after any operation that causes reallocation of the vector. 549 * memory. This should be called after any operation that causes reallocation of the vector.
549 */ 550 */
550 void RefreshMemoryBlockMappings(const std::vector<u8>* block); 551 void RefreshMemoryBlockMappings(const PhysicalMemory* block);
551 552
552 /// Dumps the address space layout to the log, for debugging 553 /// Dumps the address space layout to the log, for debugging
553 void LogLayout() const; 554 void LogLayout() const;
@@ -777,7 +778,7 @@ private:
777 // the entire virtual address space extents that bound the allocations, including any holes. 778 // the entire virtual address space extents that bound the allocations, including any holes.
778 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous 779 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
779 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. 780 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
780 std::shared_ptr<std::vector<u8>> heap_memory; 781 std::shared_ptr<PhysicalMemory> heap_memory;
781 782
782 // The end of the currently allocated heap. This is not an inclusive 783 // The end of the currently allocated heap. This is not an inclusive
783 // end of the range. This is essentially 'base_address + current_size'. 784 // end of the range. This is essentially 'base_address + current_size'.
diff --git a/src/core/hle/service/audio/audio.cpp b/src/core/hle/service/audio/audio.cpp
index 128df7db5..1781bec83 100644
--- a/src/core/hle/service/audio/audio.cpp
+++ b/src/core/hle/service/audio/audio.cpp
@@ -19,16 +19,16 @@
19 19
20namespace Service::Audio { 20namespace Service::Audio {
21 21
22void InstallInterfaces(SM::ServiceManager& service_manager) { 22void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
23 std::make_shared<AudCtl>()->InstallAsService(service_manager); 23 std::make_shared<AudCtl>()->InstallAsService(service_manager);
24 std::make_shared<AudOutA>()->InstallAsService(service_manager); 24 std::make_shared<AudOutA>()->InstallAsService(service_manager);
25 std::make_shared<AudOutU>()->InstallAsService(service_manager); 25 std::make_shared<AudOutU>(system)->InstallAsService(service_manager);
26 std::make_shared<AudInA>()->InstallAsService(service_manager); 26 std::make_shared<AudInA>()->InstallAsService(service_manager);
27 std::make_shared<AudInU>()->InstallAsService(service_manager); 27 std::make_shared<AudInU>()->InstallAsService(service_manager);
28 std::make_shared<AudRecA>()->InstallAsService(service_manager); 28 std::make_shared<AudRecA>()->InstallAsService(service_manager);
29 std::make_shared<AudRecU>()->InstallAsService(service_manager); 29 std::make_shared<AudRecU>()->InstallAsService(service_manager);
30 std::make_shared<AudRenA>()->InstallAsService(service_manager); 30 std::make_shared<AudRenA>()->InstallAsService(service_manager);
31 std::make_shared<AudRenU>()->InstallAsService(service_manager); 31 std::make_shared<AudRenU>(system)->InstallAsService(service_manager);
32 std::make_shared<CodecCtl>()->InstallAsService(service_manager); 32 std::make_shared<CodecCtl>()->InstallAsService(service_manager);
33 std::make_shared<HwOpus>()->InstallAsService(service_manager); 33 std::make_shared<HwOpus>()->InstallAsService(service_manager);
34 34
diff --git a/src/core/hle/service/audio/audio.h b/src/core/hle/service/audio/audio.h
index f5bd3bf5f..b6d13912e 100644
--- a/src/core/hle/service/audio/audio.h
+++ b/src/core/hle/service/audio/audio.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Core {
8class System;
9}
10
7namespace Service::SM { 11namespace Service::SM {
8class ServiceManager; 12class ServiceManager;
9} 13}
@@ -11,6 +15,6 @@ class ServiceManager;
11namespace Service::Audio { 15namespace Service::Audio {
12 16
13/// Registers all Audio services with the specified service manager. 17/// Registers all Audio services with the specified service manager.
14void InstallInterfaces(SM::ServiceManager& service_manager); 18void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
15 19
16} // namespace Service::Audio 20} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 7db6eb08d..fb84a8f13 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -40,8 +40,8 @@ enum class AudioState : u32 {
40 40
41class IAudioOut final : public ServiceFramework<IAudioOut> { 41class IAudioOut final : public ServiceFramework<IAudioOut> {
42public: 42public:
43 IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core, std::string&& device_name, 43 IAudioOut(Core::System& system, AudoutParams audio_params, AudioCore::AudioOut& audio_core,
44 std::string&& unique_name) 44 std::string&& device_name, std::string&& unique_name)
45 : ServiceFramework("IAudioOut"), audio_core(audio_core), 45 : ServiceFramework("IAudioOut"), audio_core(audio_core),
46 device_name(std::move(device_name)), audio_params(audio_params) { 46 device_name(std::move(device_name)), audio_params(audio_params) {
47 // clang-format off 47 // clang-format off
@@ -65,7 +65,6 @@ public:
65 RegisterHandlers(functions); 65 RegisterHandlers(functions);
66 66
67 // This is the event handle used to check if the audio buffer was released 67 // This is the event handle used to check if the audio buffer was released
68 auto& system = Core::System::GetInstance();
69 buffer_event = Kernel::WritableEvent::CreateEventPair( 68 buffer_event = Kernel::WritableEvent::CreateEventPair(
70 system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); 69 system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
71 70
@@ -212,6 +211,22 @@ private:
212 Kernel::EventPair buffer_event; 211 Kernel::EventPair buffer_event;
213}; 212};
214 213
214AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} {
215 // clang-format off
216 static const FunctionInfo functions[] = {
217 {0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
218 {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
219 {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
220 {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"},
221 };
222 // clang-format on
223
224 RegisterHandlers(functions);
225 audio_core = std::make_unique<AudioCore::AudioOut>();
226}
227
228AudOutU::~AudOutU() = default;
229
215void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { 230void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
216 LOG_DEBUG(Service_Audio, "called"); 231 LOG_DEBUG(Service_Audio, "called");
217 232
@@ -248,7 +263,7 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
248 263
249 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; 264 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())};
250 auto audio_out_interface = std::make_shared<IAudioOut>( 265 auto audio_out_interface = std::make_shared<IAudioOut>(
251 params, *audio_core, std::move(device_name), std::move(unique_name)); 266 system, params, *audio_core, std::move(device_name), std::move(unique_name));
252 267
253 IPC::ResponseBuilder rb{ctx, 6, 0, 1}; 268 IPC::ResponseBuilder rb{ctx, 6, 0, 1};
254 rb.Push(RESULT_SUCCESS); 269 rb.Push(RESULT_SUCCESS);
@@ -256,20 +271,9 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
256 rb.Push<u32>(params.channel_count); 271 rb.Push<u32>(params.channel_count);
257 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); 272 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16));
258 rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); 273 rb.Push<u32>(static_cast<u32>(AudioState::Stopped));
259 rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface); 274 rb.PushIpcInterface<IAudioOut>(audio_out_interface);
260 275
261 audio_out_interfaces.push_back(std::move(audio_out_interface)); 276 audio_out_interfaces.push_back(std::move(audio_out_interface));
262} 277}
263 278
264AudOutU::AudOutU() : ServiceFramework("audout:u") {
265 static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
266 {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
267 {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
268 {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}};
269 RegisterHandlers(functions);
270 audio_core = std::make_unique<AudioCore::AudioOut>();
271}
272
273AudOutU::~AudOutU() = default;
274
275} // namespace Service::Audio 279} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h
index aed4c43b2..c9f532ccd 100644
--- a/src/core/hle/service/audio/audout_u.h
+++ b/src/core/hle/service/audio/audout_u.h
@@ -11,6 +11,10 @@ namespace AudioCore {
11class AudioOut; 11class AudioOut;
12} 12}
13 13
14namespace Core {
15class System;
16}
17
14namespace Kernel { 18namespace Kernel {
15class HLERequestContext; 19class HLERequestContext;
16} 20}
@@ -21,15 +25,17 @@ class IAudioOut;
21 25
22class AudOutU final : public ServiceFramework<AudOutU> { 26class AudOutU final : public ServiceFramework<AudOutU> {
23public: 27public:
24 AudOutU(); 28 explicit AudOutU(Core::System& system_);
25 ~AudOutU() override; 29 ~AudOutU() override;
26 30
27private: 31private:
32 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx);
33 void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
34
28 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; 35 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces;
29 std::unique_ptr<AudioCore::AudioOut> audio_core; 36 std::unique_ptr<AudioCore::AudioOut> audio_core;
30 37
31 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); 38 Core::System& system;
32 void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
33}; 39};
34 40
35} // namespace Service::Audio 41} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 679299f68..5b0b7f17e 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -5,6 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <memory> 7#include <memory>
8#include <string_view>
8 9
9#include "audio_core/audio_renderer.h" 10#include "audio_core/audio_renderer.h"
10#include "common/alignment.h" 11#include "common/alignment.h"
@@ -25,7 +26,7 @@ namespace Service::Audio {
25 26
26class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { 27class IAudioRenderer final : public ServiceFramework<IAudioRenderer> {
27public: 28public:
28 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params, 29 explicit IAudioRenderer(Core::System& system, AudioCore::AudioRendererParameter audren_params,
29 const std::size_t instance_number) 30 const std::size_t instance_number)
30 : ServiceFramework("IAudioRenderer") { 31 : ServiceFramework("IAudioRenderer") {
31 // clang-format off 32 // clang-format off
@@ -46,7 +47,6 @@ public:
46 // clang-format on 47 // clang-format on
47 RegisterHandlers(functions); 48 RegisterHandlers(functions);
48 49
49 auto& system = Core::System::GetInstance();
50 system_event = Kernel::WritableEvent::CreateEventPair( 50 system_event = Kernel::WritableEvent::CreateEventPair(
51 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); 51 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
52 renderer = std::make_unique<AudioCore::AudioRenderer>( 52 renderer = std::make_unique<AudioCore::AudioRenderer>(
@@ -160,7 +160,8 @@ private:
160 160
161class IAudioDevice final : public ServiceFramework<IAudioDevice> { 161class IAudioDevice final : public ServiceFramework<IAudioDevice> {
162public: 162public:
163 IAudioDevice() : ServiceFramework("IAudioDevice") { 163 explicit IAudioDevice(Core::System& system, u32_le revision_num)
164 : ServiceFramework("IAudioDevice"), revision{revision_num} {
164 static const FunctionInfo functions[] = { 165 static const FunctionInfo functions[] = {
165 {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, 166 {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"},
166 {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, 167 {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"},
@@ -178,7 +179,7 @@ public:
178 }; 179 };
179 RegisterHandlers(functions); 180 RegisterHandlers(functions);
180 181
181 auto& kernel = Core::System::GetInstance().Kernel(); 182 auto& kernel = system.Kernel();
182 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, 183 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
183 "IAudioOutBufferReleasedEvent"); 184 "IAudioOutBufferReleasedEvent");
184 185
@@ -189,15 +190,47 @@ public:
189 } 190 }
190 191
191private: 192private:
193 using AudioDeviceName = std::array<char, 256>;
194 static constexpr std::array<std::string_view, 4> audio_device_names{{
195 "AudioStereoJackOutput",
196 "AudioBuiltInSpeakerOutput",
197 "AudioTvOutput",
198 "AudioUsbDeviceOutput",
199 }};
200 enum class DeviceType {
201 AHUBHeadphones,
202 AHUBSpeakers,
203 HDA,
204 USBOutput,
205 };
206
192 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { 207 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
193 LOG_WARNING(Service_Audio, "(STUBBED) called"); 208 LOG_DEBUG(Service_Audio, "called");
194 209
195 constexpr std::array<char, 15> audio_interface{{"AudioInterface"}}; 210 const bool usb_output_supported =
196 ctx.WriteBuffer(audio_interface); 211 IsFeatureSupported(AudioFeatures::AudioUSBDeviceOutput, revision);
212 const std::size_t count = ctx.GetWriteBufferSize() / sizeof(AudioDeviceName);
213
214 std::vector<AudioDeviceName> name_buffer;
215 name_buffer.reserve(audio_device_names.size());
216
217 for (std::size_t i = 0; i < count && i < audio_device_names.size(); i++) {
218 const auto type = static_cast<DeviceType>(i);
219
220 if (!usb_output_supported && type == DeviceType::USBOutput) {
221 continue;
222 }
223
224 const auto& device_name = audio_device_names[i];
225 auto& entry = name_buffer.emplace_back();
226 device_name.copy(entry.data(), device_name.size());
227 }
228
229 ctx.WriteBuffer(name_buffer);
197 230
198 IPC::ResponseBuilder rb{ctx, 3}; 231 IPC::ResponseBuilder rb{ctx, 3};
199 rb.Push(RESULT_SUCCESS); 232 rb.Push(RESULT_SUCCESS);
200 rb.Push<u32>(1); 233 rb.Push(static_cast<u32>(name_buffer.size()));
201 } 234 }
202 235
203 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { 236 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
@@ -216,12 +249,16 @@ private:
216 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { 249 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
217 LOG_WARNING(Service_Audio, "(STUBBED) called"); 250 LOG_WARNING(Service_Audio, "(STUBBED) called");
218 251
219 constexpr std::array<char, 12> audio_interface{{"AudioDevice"}}; 252 // Currently set to always be TV audio output.
220 ctx.WriteBuffer(audio_interface); 253 const auto& device_name = audio_device_names[2];
221 254
222 IPC::ResponseBuilder rb{ctx, 3}; 255 AudioDeviceName out_device_name{};
256 device_name.copy(out_device_name.data(), device_name.size());
257
258 ctx.WriteBuffer(out_device_name);
259
260 IPC::ResponseBuilder rb{ctx, 2};
223 rb.Push(RESULT_SUCCESS); 261 rb.Push(RESULT_SUCCESS);
224 rb.Push<u32>(1);
225 } 262 }
226 263
227 void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { 264 void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) {
@@ -250,12 +287,13 @@ private:
250 rb.PushCopyObjects(audio_output_device_switch_event.readable); 287 rb.PushCopyObjects(audio_output_device_switch_event.readable);
251 } 288 }
252 289
290 u32_le revision = 0;
253 Kernel::EventPair buffer_event; 291 Kernel::EventPair buffer_event;
254 Kernel::EventPair audio_output_device_switch_event; 292 Kernel::EventPair audio_output_device_switch_event;
255 293
256}; // namespace Audio 294}; // namespace Audio
257 295
258AudRenU::AudRenU() : ServiceFramework("audren:u") { 296AudRenU::AudRenU(Core::System& system_) : ServiceFramework("audren:u"), system{system_} {
259 // clang-format off 297 // clang-format off
260 static const FunctionInfo functions[] = { 298 static const FunctionInfo functions[] = {
261 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 299 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
@@ -328,7 +366,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
328 }; 366 };
329 367
330 // Calculates the portion of the size related to the mix data (and the sorting thereof). 368 // Calculates the portion of the size related to the mix data (and the sorting thereof).
331 const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { 369 const auto calculate_mix_info_size = [](const AudioCore::AudioRendererParameter& params) {
332 // The size of the mixing info data structure. 370 // The size of the mixing info data structure.
333 constexpr u64 mix_info_size = 0x940; 371 constexpr u64 mix_info_size = 0x940;
334 372
@@ -400,7 +438,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
400 438
401 // Calculates the part of the size related to the splitter context. 439 // Calculates the part of the size related to the splitter context.
402 const auto calculate_splitter_context_size = 440 const auto calculate_splitter_context_size =
403 [this](const AudioCore::AudioRendererParameter& params) -> u64 { 441 [](const AudioCore::AudioRendererParameter& params) -> u64 {
404 if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 442 if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
405 return 0; 443 return 0;
406 } 444 }
@@ -447,7 +485,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
447 }; 485 };
448 486
449 // Calculates the part of the size related to performance statistics. 487 // Calculates the part of the size related to performance statistics.
450 const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { 488 const auto calculate_perf_size = [](const AudioCore::AudioRendererParameter& params) {
451 // Extra size value appended to the end of the calculation. 489 // Extra size value appended to the end of the calculation.
452 constexpr u64 appended = 128; 490 constexpr u64 appended = 128;
453 491
@@ -474,78 +512,76 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
474 }; 512 };
475 513
476 // Calculates the part of the size that relates to the audio command buffer. 514 // Calculates the part of the size that relates to the audio command buffer.
477 const auto calculate_command_buffer_size = 515 const auto calculate_command_buffer_size = [](const AudioCore::AudioRendererParameter& params) {
478 [this](const AudioCore::AudioRendererParameter& params) { 516 constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
479 constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
480 517
481 if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { 518 if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
482 constexpr u64 command_buffer_size = 0x18000; 519 constexpr u64 command_buffer_size = 0x18000;
483 520
484 return command_buffer_size + alignment; 521 return command_buffer_size + alignment;
485 } 522 }
486 523
487 // When the variadic command buffer is supported, this means 524 // When the variadic command buffer is supported, this means
488 // the command generator for the audio renderer can issue commands 525 // the command generator for the audio renderer can issue commands
489 // that are (as one would expect), variable in size. So what we need to do 526 // that are (as one would expect), variable in size. So what we need to do
490 // is determine the maximum possible size for a few command data structures 527 // is determine the maximum possible size for a few command data structures
491 // then multiply them by the amount of present commands indicated by the given 528 // then multiply them by the amount of present commands indicated by the given
492 // respective audio parameters. 529 // respective audio parameters.
493 530
494 constexpr u64 max_biquad_filters = 2; 531 constexpr u64 max_biquad_filters = 2;
495 constexpr u64 max_mix_buffers = 24; 532 constexpr u64 max_mix_buffers = 24;
496 533
497 constexpr u64 biquad_filter_command_size = 0x2C; 534 constexpr u64 biquad_filter_command_size = 0x2C;
498 535
499 constexpr u64 depop_mix_command_size = 0x24; 536 constexpr u64 depop_mix_command_size = 0x24;
500 constexpr u64 depop_setup_command_size = 0x50; 537 constexpr u64 depop_setup_command_size = 0x50;
501 538
502 constexpr u64 effect_command_max_size = 0x540; 539 constexpr u64 effect_command_max_size = 0x540;
503 540
504 constexpr u64 mix_command_size = 0x1C; 541 constexpr u64 mix_command_size = 0x1C;
505 constexpr u64 mix_ramp_command_size = 0x24; 542 constexpr u64 mix_ramp_command_size = 0x24;
506 constexpr u64 mix_ramp_grouped_command_size = 0x13C; 543 constexpr u64 mix_ramp_grouped_command_size = 0x13C;
507 544
508 constexpr u64 perf_command_size = 0x28; 545 constexpr u64 perf_command_size = 0x28;
509 546
510 constexpr u64 sink_command_size = 0x130; 547 constexpr u64 sink_command_size = 0x130;
511 548
512 constexpr u64 submix_command_max_size = 549 constexpr u64 submix_command_max_size =
513 depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; 550 depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
514 551
515 constexpr u64 volume_command_size = 0x1C; 552 constexpr u64 volume_command_size = 0x1C;
516 constexpr u64 volume_ramp_command_size = 0x20; 553 constexpr u64 volume_ramp_command_size = 0x20;
517 554
518 constexpr u64 voice_biquad_filter_command_size = 555 constexpr u64 voice_biquad_filter_command_size =
519 biquad_filter_command_size * max_biquad_filters; 556 biquad_filter_command_size * max_biquad_filters;
520 constexpr u64 voice_data_command_size = 0x9C; 557 constexpr u64 voice_data_command_size = 0x9C;
521 const u64 voice_command_max_size = 558 const u64 voice_command_max_size =
522 (params.splitter_count * depop_setup_command_size) + 559 (params.splitter_count * depop_setup_command_size) +
523 (voice_data_command_size + voice_biquad_filter_command_size + 560 (voice_data_command_size + voice_biquad_filter_command_size + volume_ramp_command_size +
524 volume_ramp_command_size + mix_ramp_grouped_command_size); 561 mix_ramp_grouped_command_size);
525 562
526 // Now calculate the individual elements that comprise the size and add them together. 563 // Now calculate the individual elements that comprise the size and add them together.
527 const u64 effect_commands_size = params.effect_count * effect_command_max_size; 564 const u64 effect_commands_size = params.effect_count * effect_command_max_size;
528 565
529 const u64 final_mix_commands_size = 566 const u64 final_mix_commands_size =
530 depop_mix_command_size + volume_command_size * max_mix_buffers; 567 depop_mix_command_size + volume_command_size * max_mix_buffers;
531 568
532 const u64 perf_commands_size = 569 const u64 perf_commands_size =
533 perf_command_size * 570 perf_command_size * (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
534 (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
535 571
536 const u64 sink_commands_size = params.sink_count * sink_command_size; 572 const u64 sink_commands_size = params.sink_count * sink_command_size;
537 573
538 const u64 splitter_commands_size = 574 const u64 splitter_commands_size =
539 params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; 575 params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
540 576
541 const u64 submix_commands_size = params.submix_count * submix_command_max_size; 577 const u64 submix_commands_size = params.submix_count * submix_command_max_size;
542 578
543 const u64 voice_commands_size = params.voice_count * voice_command_max_size; 579 const u64 voice_commands_size = params.voice_count * voice_command_max_size;
544 580
545 return effect_commands_size + final_mix_commands_size + perf_commands_size + 581 return effect_commands_size + final_mix_commands_size + perf_commands_size +
546 sink_commands_size + splitter_commands_size + submix_commands_size + 582 sink_commands_size + splitter_commands_size + submix_commands_size +
547 voice_commands_size + alignment; 583 voice_commands_size + alignment;
548 }; 584 };
549 585
550 IPC::RequestParser rp{ctx}; 586 IPC::RequestParser rp{ctx};
551 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); 587 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
@@ -578,12 +614,16 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
578} 614}
579 615
580void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { 616void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
581 LOG_DEBUG(Service_Audio, "called"); 617 IPC::RequestParser rp{ctx};
618 const u64 aruid = rp.Pop<u64>();
582 619
583 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 620 LOG_DEBUG(Service_Audio, "called. aruid={:016X}", aruid);
584 621
622 // Revisionless variant of GetAudioDeviceServiceWithRevisionInfo that
623 // always assumes the initial release revision (REV1).
624 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
585 rb.Push(RESULT_SUCCESS); 625 rb.Push(RESULT_SUCCESS);
586 rb.PushIpcInterface<Audio::IAudioDevice>(); 626 rb.PushIpcInterface<IAudioDevice>(system, Common::MakeMagic('R', 'E', 'V', '1'));
587} 627}
588 628
589void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { 629void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
@@ -593,13 +633,19 @@ void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
593} 633}
594 634
595void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 635void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
596 LOG_WARNING(Service_Audio, "(STUBBED) called"); 636 struct Parameters {
637 u32 revision;
638 u64 aruid;
639 };
597 640
598 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 641 IPC::RequestParser rp{ctx};
642 const auto [revision, aruid] = rp.PopRaw<Parameters>();
643
644 LOG_DEBUG(Service_Audio, "called. revision={:08X}, aruid={:016X}", revision, aruid);
599 645
646 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
600 rb.Push(RESULT_SUCCESS); 647 rb.Push(RESULT_SUCCESS);
601 rb.PushIpcInterface<Audio::IAudioDevice>(); // TODO(ogniK): Figure out what is different 648 rb.PushIpcInterface<IAudioDevice>(system, revision);
602 // based on the current revision
603} 649}
604 650
605void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { 651void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
@@ -608,14 +654,16 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
608 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 654 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
609 655
610 rb.Push(RESULT_SUCCESS); 656 rb.Push(RESULT_SUCCESS);
611 rb.PushIpcInterface<IAudioRenderer>(params, audren_instance_count++); 657 rb.PushIpcInterface<IAudioRenderer>(system, params, audren_instance_count++);
612} 658}
613 659
614bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 660bool IsFeatureSupported(AudioFeatures feature, u32_le revision) {
615 // Byte swap 661 // Byte swap
616 const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); 662 const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
617 663
618 switch (feature) { 664 switch (feature) {
665 case AudioFeatures::AudioUSBDeviceOutput:
666 return version_num >= 4U;
619 case AudioFeatures::Splitter: 667 case AudioFeatures::Splitter:
620 return version_num >= 2U; 668 return version_num >= 2U;
621 case AudioFeatures::PerformanceMetricsVersion2: 669 case AudioFeatures::PerformanceMetricsVersion2:
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 49f2733cf..4e0ccc792 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -6,6 +6,10 @@
6 6
7#include "core/hle/service/service.h" 7#include "core/hle/service/service.h"
8 8
9namespace Core {
10class System;
11}
12
9namespace Kernel { 13namespace Kernel {
10class HLERequestContext; 14class HLERequestContext;
11} 15}
@@ -14,7 +18,7 @@ namespace Service::Audio {
14 18
15class AudRenU final : public ServiceFramework<AudRenU> { 19class AudRenU final : public ServiceFramework<AudRenU> {
16public: 20public:
17 explicit AudRenU(); 21 explicit AudRenU(Core::System& system_);
18 ~AudRenU() override; 22 ~AudRenU() override;
19 23
20private: 24private:
@@ -26,14 +30,19 @@ private:
26 30
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); 31 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28 32
29 enum class AudioFeatures : u32 {
30 Splitter,
31 PerformanceMetricsVersion2,
32 VariadicCommandBuffer,
33 };
34
35 bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
36 std::size_t audren_instance_count = 0; 33 std::size_t audren_instance_count = 0;
34 Core::System& system;
37}; 35};
38 36
37// Describes a particular audio feature that may be supported in a particular revision.
38enum class AudioFeatures : u32 {
39 AudioUSBDeviceOutput,
40 Splitter,
41 PerformanceMetricsVersion2,
42 VariadicCommandBuffer,
43};
44
45// Tests if a particular audio feature is supported with a given audio revision.
46bool IsFeatureSupported(AudioFeatures feature, u32_le revision);
47
39} // namespace Service::Audio 48} // namespace Service::Audio
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index ad176f89d..2a522136d 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -77,7 +77,7 @@ enum class LoadState : u32 {
77 Done = 1, 77 Done = 1,
78}; 78};
79 79
80static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, 80static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output,
81 std::size_t& offset) { 81 std::size_t& offset) {
82 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, 82 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
83 "Shared fonts exceeds 17mb!"); 83 "Shared fonts exceeds 17mb!");
@@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou
94 offset += transformed_font.size() * sizeof(u32); 94 offset += transformed_font.size() * sizeof(u32);
95} 95}
96 96
97static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output, 97static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output,
98 std::size_t& offset) { 98 std::size_t& offset) {
99 ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); 99 ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
100 const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; 100 const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
@@ -121,7 +121,7 @@ struct PL_U::Impl {
121 return shared_font_regions.at(index); 121 return shared_font_regions.at(index);
122 } 122 }
123 123
124 void BuildSharedFontsRawRegions(const std::vector<u8>& input) { 124 void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) {
125 // As we can derive the xor key we can just populate the offsets 125 // As we can derive the xor key we can just populate the offsets
126 // based on the shared memory dump 126 // based on the shared memory dump
127 unsigned cur_offset = 0; 127 unsigned cur_offset = 0;
@@ -144,7 +144,7 @@ struct PL_U::Impl {
144 Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; 144 Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
145 145
146 /// Backing memory for the shared font data 146 /// Backing memory for the shared font data
147 std::shared_ptr<std::vector<u8>> shared_font; 147 std::shared_ptr<Kernel::PhysicalMemory> shared_font;
148 148
149 // Automatically populated based on shared_fonts dump or system archives. 149 // Automatically populated based on shared_fonts dump or system archives.
150 std::vector<FontRegion> shared_font_regions; 150 std::vector<FontRegion> shared_font_regions;
@@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
166 // Rebuild shared fonts from data ncas 166 // Rebuild shared fonts from data ncas
167 if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), 167 if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
168 FileSys::ContentRecordType::Data)) { 168 FileSys::ContentRecordType::Data)) {
169 impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE); 169 impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE);
170 for (auto font : SHARED_FONTS) { 170 for (auto font : SHARED_FONTS) {
171 const auto nca = 171 const auto nca =
172 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); 172 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data);
@@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
207 } 207 }
208 208
209 } else { 209 } else {
210 impl->shared_font = std::make_shared<std::vector<u8>>( 210 impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(
211 SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size 211 SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size
212 212
213 const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); 213 const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir);
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 4f6042b00..5b8248433 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -8,6 +8,11 @@
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/swap.h" 10#include "common/swap.h"
11#include "core/hle/service/nvdrv/nvdata.h"
12
13namespace Core {
14class System;
15}
11 16
12namespace Service::Nvidia::Devices { 17namespace Service::Nvidia::Devices {
13 18
@@ -15,7 +20,7 @@ namespace Service::Nvidia::Devices {
15/// implement the ioctl interface. 20/// implement the ioctl interface.
16class nvdevice { 21class nvdevice {
17public: 22public:
18 nvdevice() = default; 23 explicit nvdevice(Core::System& system) : system{system} {};
19 virtual ~nvdevice() = default; 24 virtual ~nvdevice() = default;
20 union Ioctl { 25 union Ioctl {
21 u32_le raw; 26 u32_le raw;
@@ -33,7 +38,11 @@ public:
33 * @param output A buffer where the output data will be written to. 38 * @param output A buffer where the output data will be written to.
34 * @returns The result code of the ioctl. 39 * @returns The result code of the ioctl.
35 */ 40 */
36 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0; 41 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
42 IoctlCtrl& ctrl) = 0;
43
44protected:
45 Core::System& system;
37}; 46};
38 47
39} // namespace Service::Nvidia::Devices 48} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 20c7c39aa..76494f0b7 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -13,10 +13,12 @@
13 13
14namespace Service::Nvidia::Devices { 14namespace Service::Nvidia::Devices {
15 15
16nvdisp_disp0::nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 16nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
17nvdisp_disp0 ::~nvdisp_disp0() = default; 18nvdisp_disp0 ::~nvdisp_disp0() = default;
18 19
19u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 20u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) {
20 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 22 UNIMPLEMENTED_MSG("Unimplemented ioctl");
21 return 0; 23 return 0;
22} 24}
@@ -34,9 +36,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
34 addr, offset, width, height, stride, static_cast<PixelFormat>(format), 36 addr, offset, width, height, stride, static_cast<PixelFormat>(format),
35 transform, crop_rect}; 37 transform, crop_rect};
36 38
37 auto& instance = Core::System::GetInstance(); 39 system.GetPerfStats().EndGameFrame();
38 instance.GetPerfStats().EndGameFrame(); 40 system.GPU().SwapBuffers(framebuffer);
39 instance.GPU().SwapBuffers(framebuffer);
40} 41}
41 42
42} // namespace Service::Nvidia::Devices 43} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index 12f3ef825..e79e490ff 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -17,10 +17,11 @@ class nvmap;
17 17
18class nvdisp_disp0 final : public nvdevice { 18class nvdisp_disp0 final : public nvdevice {
19public: 19public:
20 explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvdisp_disp0() override; 21 ~nvdisp_disp0() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
24 IoctlCtrl& ctrl) override;
24 25
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 26 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 27 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index af62d33d2..24ab3f2e9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -22,10 +22,12 @@ enum {
22}; 22};
23} 23}
24 24
25nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 25nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
26 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
26nvhost_as_gpu::~nvhost_as_gpu() = default; 27nvhost_as_gpu::~nvhost_as_gpu() = default;
27 28
28u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 29u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
30 IoctlCtrl& ctrl) {
29 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 31 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
30 command.raw, input.size(), output.size()); 32 command.raw, input.size(), output.size());
31 33
@@ -65,7 +67,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
65 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, 67 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
66 params.page_size, params.flags); 68 params.page_size, params.flags);
67 69
68 auto& gpu = Core::System::GetInstance().GPU(); 70 auto& gpu = system.GPU();
69 const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; 71 const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
70 if (params.flags & 1) { 72 if (params.flags & 1) {
71 params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); 73 params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1);
@@ -85,7 +87,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
85 std::vector<IoctlRemapEntry> entries(num_entries); 87 std::vector<IoctlRemapEntry> entries(num_entries);
86 std::memcpy(entries.data(), input.data(), input.size()); 88 std::memcpy(entries.data(), input.data(), input.size());
87 89
88 auto& gpu = Core::System::GetInstance().GPU(); 90 auto& gpu = system.GPU();
89 for (const auto& entry : entries) { 91 for (const auto& entry : entries) {
90 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", 92 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
91 entry.offset, entry.nvmap_handle, entry.pages); 93 entry.offset, entry.nvmap_handle, entry.pages);
@@ -136,7 +138,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
136 // case to prevent unexpected behavior. 138 // case to prevent unexpected behavior.
137 ASSERT(object->id == params.nvmap_handle); 139 ASSERT(object->id == params.nvmap_handle);
138 140
139 auto& gpu = Core::System::GetInstance().GPU(); 141 auto& gpu = system.GPU();
140 142
141 if (params.flags & 1) { 143 if (params.flags & 1) {
142 params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); 144 params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size);
@@ -173,8 +175,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
173 return 0; 175 return 0;
174 } 176 }
175 177
176 params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, 178 params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size);
177 itr->second.size);
178 buffer_mappings.erase(itr->second.offset); 179 buffer_mappings.erase(itr->second.offset);
179 180
180 std::memcpy(output.data(), &params, output.size()); 181 std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index eb14b1da8..30ca5f4c3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -17,10 +17,11 @@ class nvmap;
17 17
18class nvhost_as_gpu final : public nvdevice { 18class nvhost_as_gpu final : public nvdevice {
19public: 19public:
20 explicit nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvhost_as_gpu() override; 21 ~nvhost_as_gpu() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
24 IoctlCtrl& ctrl) override;
24 25
25private: 26private:
26 enum class IoctlCommand : u32_le { 27 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b39fb9ef9..9a66a5f88 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -7,14 +7,20 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h"
11#include "core/hle/kernel/readable_event.h"
12#include "core/hle/kernel/writable_event.h"
10#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" 13#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
14#include "video_core/gpu.h"
11 15
12namespace Service::Nvidia::Devices { 16namespace Service::Nvidia::Devices {
13 17
14nvhost_ctrl::nvhost_ctrl() = default; 18nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
19 : nvdevice(system), events_interface{events_interface} {}
15nvhost_ctrl::~nvhost_ctrl() = default; 20nvhost_ctrl::~nvhost_ctrl() = default;
16 21
17u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 22u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 IoctlCtrl& ctrl) {
18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 24 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
19 command.raw, input.size(), output.size()); 25 command.raw, input.size(), output.size());
20 26
@@ -22,11 +28,15 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<
22 case IoctlCommand::IocGetConfigCommand: 28 case IoctlCommand::IocGetConfigCommand:
23 return NvOsGetConfigU32(input, output); 29 return NvOsGetConfigU32(input, output);
24 case IoctlCommand::IocCtrlEventWaitCommand: 30 case IoctlCommand::IocCtrlEventWaitCommand:
25 return IocCtrlEventWait(input, output, false); 31 return IocCtrlEventWait(input, output, false, ctrl);
26 case IoctlCommand::IocCtrlEventWaitAsyncCommand: 32 case IoctlCommand::IocCtrlEventWaitAsyncCommand:
27 return IocCtrlEventWait(input, output, true); 33 return IocCtrlEventWait(input, output, true, ctrl);
28 case IoctlCommand::IocCtrlEventRegisterCommand: 34 case IoctlCommand::IocCtrlEventRegisterCommand:
29 return IocCtrlEventRegister(input, output); 35 return IocCtrlEventRegister(input, output);
36 case IoctlCommand::IocCtrlEventUnregisterCommand:
37 return IocCtrlEventUnregister(input, output);
38 case IoctlCommand::IocCtrlEventSignalCommand:
39 return IocCtrlEventSignal(input, output);
30 } 40 }
31 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 41 UNIMPLEMENTED_MSG("Unimplemented ioctl");
32 return 0; 42 return 0;
@@ -41,23 +51,137 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>&
41} 51}
42 52
43u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, 53u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
44 bool is_async) { 54 bool is_async, IoctlCtrl& ctrl) {
45 IocCtrlEventWaitParams params{}; 55 IocCtrlEventWaitParams params{};
46 std::memcpy(&params, input.data(), sizeof(params)); 56 std::memcpy(&params, input.data(), sizeof(params));
47 LOG_WARNING(Service_NVDRV, 57 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
48 "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}", 58 params.syncpt_id, params.threshold, params.timeout, is_async);
49 params.syncpt_id, params.threshold, params.timeout, is_async);
50 59
51 // TODO(Subv): Implement actual syncpt waiting. 60 if (params.syncpt_id >= MaxSyncPoints) {
52 params.value = 0; 61 return NvResult::BadParameter;
62 }
63
64 auto& gpu = system.GPU();
65 // This is mostly to take into account unimplemented features. As synced
66 // gpu is always synced.
67 if (!gpu.IsAsync()) {
68 return NvResult::Success;
69 }
70 auto lock = gpu.LockSync();
71 const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
72 const s32 diff = current_syncpoint_value - params.threshold;
73 if (diff >= 0) {
74 params.value = current_syncpoint_value;
75 std::memcpy(output.data(), &params, sizeof(params));
76 return NvResult::Success;
77 }
78 const u32 target_value = current_syncpoint_value - diff;
79
80 if (!is_async) {
81 params.value = 0;
82 }
83
84 if (params.timeout == 0) {
85 std::memcpy(output.data(), &params, sizeof(params));
86 return NvResult::Timeout;
87 }
88
89 u32 event_id;
90 if (is_async) {
91 event_id = params.value & 0x00FF;
92 if (event_id >= MaxNvEvents) {
93 std::memcpy(output.data(), &params, sizeof(params));
94 return NvResult::BadParameter;
95 }
96 } else {
97 if (ctrl.fresh_call) {
98 const auto result = events_interface.GetFreeEvent();
99 if (result) {
100 event_id = *result;
101 } else {
102 LOG_CRITICAL(Service_NVDRV, "No Free Events available!");
103 event_id = params.value & 0x00FF;
104 }
105 } else {
106 event_id = ctrl.event_id;
107 }
108 }
109
110 EventState status = events_interface.status[event_id];
111 if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) {
112 events_interface.SetEventStatus(event_id, EventState::Waiting);
113 events_interface.assigned_syncpt[event_id] = params.syncpt_id;
114 events_interface.assigned_value[event_id] = target_value;
115 if (is_async) {
116 params.value = params.syncpt_id << 4;
117 } else {
118 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
119 }
120 params.value |= event_id;
121 events_interface.events[event_id].writable->Clear();
122 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
123 if (!is_async && ctrl.fresh_call) {
124 ctrl.must_delay = true;
125 ctrl.timeout = params.timeout;
126 ctrl.event_id = event_id;
127 return NvResult::Timeout;
128 }
129 std::memcpy(output.data(), &params, sizeof(params));
130 return NvResult::Timeout;
131 }
53 std::memcpy(output.data(), &params, sizeof(params)); 132 std::memcpy(output.data(), &params, sizeof(params));
54 return 0; 133 return NvResult::BadParameter;
55} 134}
56 135
57u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { 136u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
58 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 137 IocCtrlEventRegisterParams params{};
59 // TODO(bunnei): Implement this. 138 std::memcpy(&params, input.data(), sizeof(params));
60 return 0; 139 const u32 event_id = params.user_event_id & 0x00FF;
140 LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
141 if (event_id >= MaxNvEvents) {
142 return NvResult::BadParameter;
143 }
144 if (events_interface.registered[event_id]) {
145 return NvResult::BadParameter;
146 }
147 events_interface.RegisterEvent(event_id);
148 return NvResult::Success;
149}
150
151u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) {
152 IocCtrlEventUnregisterParams params{};
153 std::memcpy(&params, input.data(), sizeof(params));
154 const u32 event_id = params.user_event_id & 0x00FF;
155 LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
156 if (event_id >= MaxNvEvents) {
157 return NvResult::BadParameter;
158 }
159 if (!events_interface.registered[event_id]) {
160 return NvResult::BadParameter;
161 }
162 events_interface.UnregisterEvent(event_id);
163 return NvResult::Success;
164}
165
166u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
167 IocCtrlEventSignalParams params{};
168 std::memcpy(&params, input.data(), sizeof(params));
169 // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
170 // It is believed from RE to cancel the GPU Event. However, better research is required
171 u32 event_id = params.user_event_id & 0x00FF;
172 LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
173 if (event_id >= MaxNvEvents) {
174 return NvResult::BadParameter;
175 }
176 if (events_interface.status[event_id] == EventState::Waiting) {
177 auto& gpu = system.GPU();
178 if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
179 events_interface.assigned_value[event_id])) {
180 events_interface.LiberateEvent(event_id);
181 events_interface.events[event_id].writable->Signal();
182 }
183 }
184 return NvResult::Success;
61} 185}
62 186
63} // namespace Service::Nvidia::Devices 187} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 6d0de2212..14e6e7e57 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -8,15 +8,17 @@
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/service/nvdrv/devices/nvdevice.h" 10#include "core/hle/service/nvdrv/devices/nvdevice.h"
11#include "core/hle/service/nvdrv/nvdrv.h"
11 12
12namespace Service::Nvidia::Devices { 13namespace Service::Nvidia::Devices {
13 14
14class nvhost_ctrl final : public nvdevice { 15class nvhost_ctrl final : public nvdevice {
15public: 16public:
16 nvhost_ctrl(); 17 explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
17 ~nvhost_ctrl() override; 18 ~nvhost_ctrl() override;
18 19
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 20 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) override;
20 22
21private: 23private:
22 enum class IoctlCommand : u32_le { 24 enum class IoctlCommand : u32_le {
@@ -132,9 +134,16 @@ private:
132 134
133 u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); 135 u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
134 136
135 u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); 137 u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async,
138 IoctlCtrl& ctrl);
136 139
137 u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); 140 u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
141
142 u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
143
144 u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
145
146 EventInterface& events_interface;
138}; 147};
139 148
140} // namespace Service::Nvidia::Devices 149} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 0e28755bd..988effd90 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -12,10 +12,11 @@
12 12
13namespace Service::Nvidia::Devices { 13namespace Service::Nvidia::Devices {
14 14
15nvhost_ctrl_gpu::nvhost_ctrl_gpu() = default; 15nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; 16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
17 17
18u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 18u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
19 IoctlCtrl& ctrl) {
19 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 20 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
20 command.raw, input.size(), output.size()); 21 command.raw, input.size(), output.size());
21 22
@@ -185,7 +186,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
185 186
186 IoctlGetGpuTime params{}; 187 IoctlGetGpuTime params{};
187 std::memcpy(&params, input.data(), input.size()); 188 std::memcpy(&params, input.data(), input.size());
188 const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); 189 const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks());
189 params.gpu_time = static_cast<u64_le>(ns.count()); 190 params.gpu_time = static_cast<u64_le>(ns.count());
190 std::memcpy(output.data(), &params, output.size()); 191 std::memcpy(output.data(), &params, output.size());
191 return 0; 192 return 0;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 240435eea..2b035ae3f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_ctrl_gpu final : public nvdevice { 14class nvhost_ctrl_gpu final : public nvdevice {
15public: 15public:
16 nvhost_ctrl_gpu(); 16 explicit nvhost_ctrl_gpu(Core::System& system);
17 ~nvhost_ctrl_gpu() override; 17 ~nvhost_ctrl_gpu() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 8ce7bc7a5..241dac881 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -13,10 +13,12 @@
13 13
14namespace Service::Nvidia::Devices { 14namespace Service::Nvidia::Devices {
15 15
16nvhost_gpu::nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 16nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
17nvhost_gpu::~nvhost_gpu() = default; 18nvhost_gpu::~nvhost_gpu() = default;
18 19
19u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 20u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) {
20 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 22 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
21 command.raw, input.size(), output.size()); 23 command.raw, input.size(), output.size());
22 24
@@ -119,8 +121,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
119 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, 121 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
120 params.unk3); 122 params.unk3);
121 123
122 params.fence_out.id = 0; 124 auto& gpu = system.GPU();
123 params.fence_out.value = 0; 125 params.fence_out.id = assigned_syncpoints;
126 params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
127 assigned_syncpoints++;
124 std::memcpy(output.data(), &params, output.size()); 128 std::memcpy(output.data(), &params, output.size());
125 return 0; 129 return 0;
126} 130}
@@ -143,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
143 IoctlSubmitGpfifo params{}; 147 IoctlSubmitGpfifo params{};
144 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 148 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
145 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 149 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
146 params.address, params.num_entries, params.flags); 150 params.address, params.num_entries, params.flags.raw);
147 151
148 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + 152 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
149 params.num_entries * sizeof(Tegra::CommandListHeader), 153 params.num_entries * sizeof(Tegra::CommandListHeader),
@@ -153,10 +157,18 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 157 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
154 params.num_entries * sizeof(Tegra::CommandListHeader)); 158 params.num_entries * sizeof(Tegra::CommandListHeader));
155 159
156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); 160 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
161 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
162
163 auto& gpu = system.GPU();
164 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
165 if (params.flags.increment.Value()) {
166 params.fence_out.value += current_syncpoint_value;
167 } else {
168 params.fence_out.value = current_syncpoint_value;
169 }
170 gpu.PushGPUEntries(std::move(entries));
157 171
158 params.fence_out.id = 0;
159 params.fence_out.value = 0;
160 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo)); 172 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
161 return 0; 173 return 0;
162} 174}
@@ -168,16 +180,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
168 IoctlSubmitGpfifo params{}; 180 IoctlSubmitGpfifo params{};
169 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 181 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
170 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 182 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
171 params.address, params.num_entries, params.flags); 183 params.address, params.num_entries, params.flags.raw);
172 184
173 Tegra::CommandList entries(params.num_entries); 185 Tegra::CommandList entries(params.num_entries);
174 Memory::ReadBlock(params.address, entries.data(), 186 Memory::ReadBlock(params.address, entries.data(),
175 params.num_entries * sizeof(Tegra::CommandListHeader)); 187 params.num_entries * sizeof(Tegra::CommandListHeader));
176 188
177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); 189 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
190 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
191
192 auto& gpu = system.GPU();
193 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
194 if (params.flags.increment.Value()) {
195 params.fence_out.value += current_syncpoint_value;
196 } else {
197 params.fence_out.value = current_syncpoint_value;
198 }
199 gpu.PushGPUEntries(std::move(entries));
178 200
179 params.fence_out.id = 0;
180 params.fence_out.value = 0;
181 std::memcpy(output.data(), &params, output.size()); 201 std::memcpy(output.data(), &params, output.size());
182 return 0; 202 return 0;
183} 203}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 62beb5c0c..d2e8fbae9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,6 +10,7 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/swap.h" 11#include "common/swap.h"
12#include "core/hle/service/nvdrv/devices/nvdevice.h" 12#include "core/hle/service/nvdrv/devices/nvdevice.h"
13#include "core/hle/service/nvdrv/nvdata.h"
13 14
14namespace Service::Nvidia::Devices { 15namespace Service::Nvidia::Devices {
15 16
@@ -20,10 +21,11 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
20 21
21class nvhost_gpu final : public nvdevice { 22class nvhost_gpu final : public nvdevice {
22public: 23public:
23 explicit nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev); 24 explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
24 ~nvhost_gpu() override; 25 ~nvhost_gpu() override;
25 26
26 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 27 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
28 IoctlCtrl& ctrl) override;
27 29
28private: 30private:
29 enum class IoctlCommand : u32_le { 31 enum class IoctlCommand : u32_le {
@@ -113,11 +115,7 @@ private:
113 static_assert(sizeof(IoctlGetErrorNotification) == 16, 115 static_assert(sizeof(IoctlGetErrorNotification) == 16,
114 "IoctlGetErrorNotification is incorrect size"); 116 "IoctlGetErrorNotification is incorrect size");
115 117
116 struct IoctlFence { 118 static_assert(sizeof(Fence) == 8, "Fence is incorrect size");
117 u32_le id;
118 u32_le value;
119 };
120 static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size");
121 119
122 struct IoctlAllocGpfifoEx { 120 struct IoctlAllocGpfifoEx {
123 u32_le num_entries; 121 u32_le num_entries;
@@ -132,13 +130,13 @@ private:
132 static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); 130 static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size");
133 131
134 struct IoctlAllocGpfifoEx2 { 132 struct IoctlAllocGpfifoEx2 {
135 u32_le num_entries; // in 133 u32_le num_entries; // in
136 u32_le flags; // in 134 u32_le flags; // in
137 u32_le unk0; // in (1 works) 135 u32_le unk0; // in (1 works)
138 IoctlFence fence_out; // out 136 Fence fence_out; // out
139 u32_le unk1; // in 137 u32_le unk1; // in
140 u32_le unk2; // in 138 u32_le unk2; // in
141 u32_le unk3; // in 139 u32_le unk3; // in
142 }; 140 };
143 static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); 141 static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size");
144 142
@@ -153,10 +151,16 @@ private:
153 struct IoctlSubmitGpfifo { 151 struct IoctlSubmitGpfifo {
154 u64_le address; // pointer to gpfifo entry structs 152 u64_le address; // pointer to gpfifo entry structs
155 u32_le num_entries; // number of fence objects being submitted 153 u32_le num_entries; // number of fence objects being submitted
156 u32_le flags; 154 union {
157 IoctlFence fence_out; // returned new fence object for others to wait on 155 u32_le raw;
158 }; 156 BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
159 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence), 157 BitField<1, 1, u32_le> add_increment; // append an increment to the list
158 BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
159 BitField<8, 1, u32_le> increment; // increment the returned fence
160 } flags;
161 Fence fence_out; // returned new fence object for others to wait on
162 };
163 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
160 "IoctlSubmitGpfifo is incorrect size"); 164 "IoctlSubmitGpfifo is incorrect size");
161 165
162 struct IoctlGetWaitbase { 166 struct IoctlGetWaitbase {
@@ -184,6 +188,7 @@ private:
184 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); 188 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
185 189
186 std::shared_ptr<nvmap> nvmap_dev; 190 std::shared_ptr<nvmap> nvmap_dev;
191 u32 assigned_syncpoints{};
187}; 192};
188 193
189} // namespace Service::Nvidia::Devices 194} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index f5e8ea7c3..f572ad30f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_nvdec::nvhost_nvdec() = default; 13nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {}
14nvhost_nvdec::~nvhost_nvdec() = default; 14nvhost_nvdec::~nvhost_nvdec() = default;
15 15
16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 0e7b284f8..2710f0511 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_nvdec final : public nvdevice { 14class nvhost_nvdec final : public nvdevice {
15public: 15public:
16 nvhost_nvdec(); 16 explicit nvhost_nvdec(Core::System& system);
17 ~nvhost_nvdec() override; 17 ~nvhost_nvdec() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 3e0951ab0..38282956f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_nvjpg::nvhost_nvjpg() = default; 13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
14nvhost_nvjpg::~nvhost_nvjpg() = default; 14nvhost_nvjpg::~nvhost_nvjpg() = default;
15 15
16u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 89fd5e95e..379766693 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_nvjpg final : public nvdevice { 14class nvhost_nvjpg final : public nvdevice {
15public: 15public:
16 nvhost_nvjpg(); 16 explicit nvhost_nvjpg(Core::System& system);
17 ~nvhost_nvjpg() override; 17 ~nvhost_nvjpg() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index d544f0f31..70e8091db 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_vic::nvhost_vic() = default; 13nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {}
14nvhost_vic::~nvhost_vic() = default; 14nvhost_vic::~nvhost_vic() = default;
15 15
16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index fc24c3f9c..7d111977e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_vic final : public nvdevice { 14class nvhost_vic final : public nvdevice {
15public: 15public:
16 nvhost_vic(); 16 explicit nvhost_vic(Core::System& system);
17 ~nvhost_vic() override; 17 ~nvhost_vic() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 1ec796fc6..223b496b7 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -18,7 +18,7 @@ enum {
18}; 18};
19} 19}
20 20
21nvmap::nvmap() = default; 21nvmap::nvmap(Core::System& system) : nvdevice(system) {}
22nvmap::~nvmap() = default; 22nvmap::~nvmap() = default;
23 23
24VAddr nvmap::GetObjectAddress(u32 handle) const { 24VAddr nvmap::GetObjectAddress(u32 handle) const {
@@ -28,7 +28,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const {
28 return object->addr; 28 return object->addr;
29} 29}
30 30
31u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 31u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
32 IoctlCtrl& ctrl) {
32 switch (static_cast<IoctlCommand>(command.raw)) { 33 switch (static_cast<IoctlCommand>(command.raw)) {
33 case IoctlCommand::Create: 34 case IoctlCommand::Create:
34 return IocCreate(input, output); 35 return IocCreate(input, output);
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 396230c19..bf4a101c2 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -16,13 +16,14 @@ namespace Service::Nvidia::Devices {
16 16
17class nvmap final : public nvdevice { 17class nvmap final : public nvdevice {
18public: 18public:
19 nvmap(); 19 explicit nvmap(Core::System& system);
20 ~nvmap() override; 20 ~nvmap() override;
21 21
22 /// Returns the allocated address of an nvmap object given its handle. 22 /// Returns the allocated address of an nvmap object given its handle.
23 VAddr GetObjectAddress(u32 handle) const; 23 VAddr GetObjectAddress(u32 handle) const;
24 24
25 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 25 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
26 IoctlCtrl& ctrl) override;
26 27
27 /// Represents an nvmap object. 28 /// Represents an nvmap object.
28 struct Object { 29 struct Object {
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index b60fc748b..d5be64ed2 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -8,12 +8,18 @@
8#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/kernel.h" 9#include "core/hle/kernel/kernel.h"
10#include "core/hle/kernel/readable_event.h" 10#include "core/hle/kernel/readable_event.h"
11#include "core/hle/kernel/thread.h"
11#include "core/hle/kernel/writable_event.h" 12#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/interface.h" 13#include "core/hle/service/nvdrv/interface.h"
14#include "core/hle/service/nvdrv/nvdata.h"
13#include "core/hle/service/nvdrv/nvdrv.h" 15#include "core/hle/service/nvdrv/nvdrv.h"
14 16
15namespace Service::Nvidia { 17namespace Service::Nvidia {
16 18
19void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
20 nvdrv->SignalSyncpt(syncpoint_id, value);
21}
22
17void NVDRV::Open(Kernel::HLERequestContext& ctx) { 23void NVDRV::Open(Kernel::HLERequestContext& ctx) {
18 LOG_DEBUG(Service_NVDRV, "called"); 24 LOG_DEBUG(Service_NVDRV, "called");
19 25
@@ -36,11 +42,31 @@ void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) {
36 42
37 std::vector<u8> output(ctx.GetWriteBufferSize()); 43 std::vector<u8> output(ctx.GetWriteBufferSize());
38 44
45 IoctlCtrl ctrl{};
46
47 u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output, ctrl);
48
49 if (ctrl.must_delay) {
50 ctrl.fresh_call = false;
51 ctx.SleepClientThread(
52 "NVServices::DelayedResponse", ctrl.timeout,
53 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
54 Kernel::ThreadWakeupReason reason) {
55 IoctlCtrl ctrl2{ctrl};
56 std::vector<u8> output2 = output;
57 u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output2, ctrl2);
58 ctx.WriteBuffer(output2);
59 IPC::ResponseBuilder rb{ctx, 3};
60 rb.Push(RESULT_SUCCESS);
61 rb.Push(result);
62 },
63 nvdrv->GetEventWriteable(ctrl.event_id));
64 } else {
65 ctx.WriteBuffer(output);
66 }
39 IPC::ResponseBuilder rb{ctx, 3}; 67 IPC::ResponseBuilder rb{ctx, 3};
40 rb.Push(RESULT_SUCCESS); 68 rb.Push(RESULT_SUCCESS);
41 rb.Push(nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output)); 69 rb.Push(result);
42
43 ctx.WriteBuffer(output);
44} 70}
45 71
46void NVDRV::Close(Kernel::HLERequestContext& ctx) { 72void NVDRV::Close(Kernel::HLERequestContext& ctx) {
@@ -66,13 +92,19 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
66void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { 92void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
67 IPC::RequestParser rp{ctx}; 93 IPC::RequestParser rp{ctx};
68 u32 fd = rp.Pop<u32>(); 94 u32 fd = rp.Pop<u32>();
69 u32 event_id = rp.Pop<u32>(); 95 // TODO(Blinkhawk): Figure the meaning of the flag at bit 16
96 u32 event_id = rp.Pop<u32>() & 0x000000FF;
70 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); 97 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
71 98
72 IPC::ResponseBuilder rb{ctx, 3, 1}; 99 IPC::ResponseBuilder rb{ctx, 3, 1};
73 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
74 rb.PushCopyObjects(query_event.readable); 101 if (event_id < MaxNvEvents) {
75 rb.Push<u32>(0); 102 rb.PushCopyObjects(nvdrv->GetEvent(event_id));
103 rb.Push<u32>(NvResult::Success);
104 } else {
105 rb.Push<u32>(0);
106 rb.Push<u32>(NvResult::BadParameter);
107 }
76} 108}
77 109
78void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { 110void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) {
@@ -127,10 +159,6 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
127 {13, &NVDRV::FinishInitialize, "FinishInitialize"}, 159 {13, &NVDRV::FinishInitialize, "FinishInitialize"},
128 }; 160 };
129 RegisterHandlers(functions); 161 RegisterHandlers(functions);
130
131 auto& kernel = Core::System::GetInstance().Kernel();
132 query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
133 "NVDRV::query_event");
134} 162}
135 163
136NVDRV::~NVDRV() = default; 164NVDRV::~NVDRV() = default;
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index 5b4889910..10a0ecd52 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -19,6 +19,8 @@ public:
19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name); 19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
20 ~NVDRV() override; 20 ~NVDRV() override;
21 21
22 void SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value);
23
22private: 24private:
23 void Open(Kernel::HLERequestContext& ctx); 25 void Open(Kernel::HLERequestContext& ctx);
24 void Ioctl(Kernel::HLERequestContext& ctx); 26 void Ioctl(Kernel::HLERequestContext& ctx);
@@ -33,8 +35,6 @@ private:
33 std::shared_ptr<Module> nvdrv; 35 std::shared_ptr<Module> nvdrv;
34 36
35 u64 pid{}; 37 u64 pid{};
36
37 Kernel::EventPair query_event;
38}; 38};
39 39
40} // namespace Service::Nvidia 40} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
new file mode 100644
index 000000000..ac03cbc23
--- /dev/null
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -0,0 +1,48 @@
1#pragma once
2
3#include <array>
4#include "common/common_types.h"
5
6namespace Service::Nvidia {
7
8constexpr u32 MaxSyncPoints = 192;
9constexpr u32 MaxNvEvents = 64;
10
11struct Fence {
12 s32 id;
13 u32 value;
14};
15
16static_assert(sizeof(Fence) == 8, "Fence has wrong size");
17
18struct MultiFence {
19 u32 num_fences;
20 std::array<Fence, 4> fences;
21};
22
23enum NvResult : u32 {
24 Success = 0,
25 BadParameter = 4,
26 Timeout = 5,
27 ResourceError = 15,
28};
29
30enum class EventState {
31 Free = 0,
32 Registered = 1,
33 Waiting = 2,
34 Busy = 3,
35};
36
37struct IoctlCtrl {
38 // First call done to the servioce for services that call itself again after a call.
39 bool fresh_call{true};
40 // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
41 bool must_delay{};
42 // Timeout for the delay
43 s64 timeout{};
44 // NV Event Id
45 s32 event_id{-1};
46};
47
48} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 6e4b8f2c6..2011a226a 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -4,7 +4,10 @@
4 4
5#include <utility> 5#include <utility>
6 6
7#include <fmt/format.h>
7#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/readable_event.h"
10#include "core/hle/kernel/writable_event.h"
8#include "core/hle/service/nvdrv/devices/nvdevice.h" 11#include "core/hle/service/nvdrv/devices/nvdevice.h"
9#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 12#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
10#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 13#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
@@ -22,8 +25,9 @@
22 25
23namespace Service::Nvidia { 26namespace Service::Nvidia {
24 27
25void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger) { 28void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
26 auto module_ = std::make_shared<Module>(); 29 Core::System& system) {
30 auto module_ = std::make_shared<Module>(system);
27 std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); 31 std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager);
28 std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); 32 std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager);
29 std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); 33 std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager);
@@ -32,17 +36,25 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
32 nvflinger.SetNVDrvInstance(module_); 36 nvflinger.SetNVDrvInstance(module_);
33} 37}
34 38
35Module::Module() { 39Module::Module(Core::System& system) {
36 auto nvmap_dev = std::make_shared<Devices::nvmap>(); 40 auto& kernel = system.Kernel();
37 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); 41 for (u32 i = 0; i < MaxNvEvents; i++) {
38 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev); 42 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
39 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); 43 events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(
44 kernel, Kernel::ResetType::Automatic, event_label);
45 events_interface.status[i] = EventState::Free;
46 events_interface.registered[i] = false;
47 }
48 auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
49 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
50 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
51 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
40 devices["/dev/nvmap"] = nvmap_dev; 52 devices["/dev/nvmap"] = nvmap_dev;
41 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); 53 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
42 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); 54 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
43 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); 55 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system);
44 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); 56 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
45 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); 57 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system);
46} 58}
47 59
48Module::~Module() = default; 60Module::~Module() = default;
@@ -59,12 +71,13 @@ u32 Module::Open(const std::string& device_name) {
59 return fd; 71 return fd;
60} 72}
61 73
62u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) { 74u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output,
75 IoctlCtrl& ctrl) {
63 auto itr = open_files.find(fd); 76 auto itr = open_files.find(fd);
64 ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); 77 ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device");
65 78
66 auto& device = itr->second; 79 auto& device = itr->second;
67 return device->ioctl({command}, input, output); 80 return device->ioctl({command}, input, output, ctrl);
68} 81}
69 82
70ResultCode Module::Close(u32 fd) { 83ResultCode Module::Close(u32 fd) {
@@ -77,4 +90,22 @@ ResultCode Module::Close(u32 fd) {
77 return RESULT_SUCCESS; 90 return RESULT_SUCCESS;
78} 91}
79 92
93void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
94 for (u32 i = 0; i < MaxNvEvents; i++) {
95 if (events_interface.assigned_syncpt[i] == syncpoint_id &&
96 events_interface.assigned_value[i] == value) {
97 events_interface.LiberateEvent(i);
98 events_interface.events[i].writable->Signal();
99 }
100 }
101}
102
103Kernel::SharedPtr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
104 return events_interface.events[event_id].readable;
105}
106
107Kernel::SharedPtr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
108 return events_interface.events[event_id].writable;
109}
110
80} // namespace Service::Nvidia 111} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 53564f696..a339ab672 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -8,8 +8,14 @@
8#include <unordered_map> 8#include <unordered_map>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/nvdata.h"
11#include "core/hle/service/service.h" 13#include "core/hle/service/service.h"
12 14
15namespace Core {
16class System;
17}
18
13namespace Service::NVFlinger { 19namespace Service::NVFlinger {
14class NVFlinger; 20class NVFlinger;
15} 21}
@@ -20,16 +26,72 @@ namespace Devices {
20class nvdevice; 26class nvdevice;
21} 27}
22 28
23struct IoctlFence { 29struct EventInterface {
24 u32 id; 30 // Mask representing currently busy events
25 u32 value; 31 u64 events_mask{};
32 // Each kernel event associated to an NV event
33 std::array<Kernel::EventPair, MaxNvEvents> events;
34 // The status of the current NVEvent
35 std::array<EventState, MaxNvEvents> status{};
36 // Tells if an NVEvent is registered or not
37 std::array<bool, MaxNvEvents> registered{};
38 // When an NVEvent is waiting on GPU interrupt, this is the sync_point
39 // associated with it.
40 std::array<u32, MaxNvEvents> assigned_syncpt{};
41 // This is the value of the GPU interrupt for which the NVEvent is waiting
42 // for.
43 std::array<u32, MaxNvEvents> assigned_value{};
44 // Constant to denote an unasigned syncpoint.
45 static constexpr u32 unassigned_syncpt = 0xFFFFFFFF;
46 std::optional<u32> GetFreeEvent() const {
47 u64 mask = events_mask;
48 for (u32 i = 0; i < MaxNvEvents; i++) {
49 const bool is_free = (mask & 0x1) == 0;
50 if (is_free) {
51 if (status[i] == EventState::Registered || status[i] == EventState::Free) {
52 return {i};
53 }
54 }
55 mask = mask >> 1;
56 }
57 return {};
58 }
59 void SetEventStatus(const u32 event_id, EventState new_status) {
60 EventState old_status = status[event_id];
61 if (old_status == new_status) {
62 return;
63 }
64 status[event_id] = new_status;
65 if (new_status == EventState::Registered) {
66 registered[event_id] = true;
67 }
68 if (new_status == EventState::Waiting || new_status == EventState::Busy) {
69 events_mask |= (1ULL << event_id);
70 }
71 }
72 void RegisterEvent(const u32 event_id) {
73 registered[event_id] = true;
74 if (status[event_id] == EventState::Free) {
75 status[event_id] = EventState::Registered;
76 }
77 }
78 void UnregisterEvent(const u32 event_id) {
79 registered[event_id] = false;
80 if (status[event_id] == EventState::Registered) {
81 status[event_id] = EventState::Free;
82 }
83 }
84 void LiberateEvent(const u32 event_id) {
85 status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free;
86 events_mask &= ~(1ULL << event_id);
87 assigned_syncpt[event_id] = unassigned_syncpt;
88 assigned_value[event_id] = 0;
89 }
26}; 90};
27 91
28static_assert(sizeof(IoctlFence) == 8, "IoctlFence has wrong size");
29
30class Module final { 92class Module final {
31public: 93public:
32 Module(); 94 Module(Core::System& system);
33 ~Module(); 95 ~Module();
34 96
35 /// Returns a pointer to one of the available devices, identified by its name. 97 /// Returns a pointer to one of the available devices, identified by its name.
@@ -44,10 +106,17 @@ public:
44 /// Opens a device node and returns a file descriptor to it. 106 /// Opens a device node and returns a file descriptor to it.
45 u32 Open(const std::string& device_name); 107 u32 Open(const std::string& device_name);
46 /// Sends an ioctl command to the specified file descriptor. 108 /// Sends an ioctl command to the specified file descriptor.
47 u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output); 109 u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output,
110 IoctlCtrl& ctrl);
48 /// Closes a device file descriptor and returns operation success. 111 /// Closes a device file descriptor and returns operation success.
49 ResultCode Close(u32 fd); 112 ResultCode Close(u32 fd);
50 113
114 void SignalSyncpt(const u32 syncpoint_id, const u32 value);
115
116 Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent(u32 event_id) const;
117
118 Kernel::SharedPtr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
119
51private: 120private:
52 /// Id to use for the next open file descriptor. 121 /// Id to use for the next open file descriptor.
53 u32 next_fd = 1; 122 u32 next_fd = 1;
@@ -57,9 +126,12 @@ private:
57 126
58 /// Mapping of device node names to their implementation. 127 /// Mapping of device node names to their implementation.
59 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; 128 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
129
130 EventInterface events_interface;
60}; 131};
61 132
62/// Registers all NVDRV services with the specified service manager. 133/// Registers all NVDRV services with the specified service manager.
63void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger); 134void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
135 Core::System& system);
64 136
65} // namespace Service::Nvidia 137} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 5731e815f..e1a07d3ee 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -34,7 +34,8 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
34 buffer_wait_event.writable->Signal(); 34 buffer_wait_event.writable->Signal();
35} 35}
36 36
37std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { 37std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
38 u32 height) {
38 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { 39 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
39 // Only consider free buffers. Buffers become free once again after they've been Acquired 40 // Only consider free buffers. Buffers become free once again after they've been Acquired
40 // and Released by the compositor, see the NVFlinger::Compose method. 41 // and Released by the compositor, see the NVFlinger::Compose method.
@@ -51,7 +52,7 @@ std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
51 } 52 }
52 53
53 itr->status = Buffer::Status::Dequeued; 54 itr->status = Buffer::Status::Dequeued;
54 return itr->slot; 55 return {{itr->slot, &itr->multi_fence}};
55} 56}
56 57
57const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { 58const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
@@ -63,7 +64,8 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 64}
64 65
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 66void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const Common::Rectangle<int>& crop_rect) { 67 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
68 Service::Nvidia::MultiFence& multi_fence) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 69 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 70 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 71 ASSERT(itr != queue.end());
@@ -71,12 +73,21 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
71 itr->status = Buffer::Status::Queued; 73 itr->status = Buffer::Status::Queued;
72 itr->transform = transform; 74 itr->transform = transform;
73 itr->crop_rect = crop_rect; 75 itr->crop_rect = crop_rect;
76 itr->swap_interval = swap_interval;
77 itr->multi_fence = multi_fence;
78 queue_sequence.push_back(slot);
74} 79}
75 80
76std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { 81std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
77 auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) { 82 auto itr = queue.end();
78 return buffer.status == Buffer::Status::Queued; 83 // Iterate to find a queued buffer matching the requested slot.
79 }); 84 while (itr == queue.end() && !queue_sequence.empty()) {
85 u32 slot = queue_sequence.front();
86 itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) {
87 return buffer.status == Buffer::Status::Queued && buffer.slot == slot;
88 });
89 queue_sequence.pop_front();
90 }
80 if (itr == queue.end()) 91 if (itr == queue.end())
81 return {}; 92 return {};
82 itr->status = Buffer::Status::Acquired; 93 itr->status = Buffer::Status::Acquired;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index e1ccb6171..356bedb81 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <list>
7#include <optional> 8#include <optional>
8#include <vector> 9#include <vector>
9 10
@@ -12,6 +13,7 @@
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/writable_event.h" 15#include "core/hle/kernel/writable_event.h"
16#include "core/hle/service/nvdrv/nvdata.h"
15 17
16namespace Service::NVFlinger { 18namespace Service::NVFlinger {
17 19
@@ -68,13 +70,17 @@ public:
68 IGBPBuffer igbp_buffer; 70 IGBPBuffer igbp_buffer;
69 BufferTransformFlags transform; 71 BufferTransformFlags transform;
70 Common::Rectangle<int> crop_rect; 72 Common::Rectangle<int> crop_rect;
73 u32 swap_interval;
74 Service::Nvidia::MultiFence multi_fence;
71 }; 75 };
72 76
73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 77 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
74 std::optional<u32> DequeueBuffer(u32 width, u32 height); 78 std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width,
79 u32 height);
75 const IGBPBuffer& RequestBuffer(u32 slot) const; 80 const IGBPBuffer& RequestBuffer(u32 slot) const;
76 void QueueBuffer(u32 slot, BufferTransformFlags transform, 81 void QueueBuffer(u32 slot, BufferTransformFlags transform,
77 const Common::Rectangle<int>& crop_rect); 82 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
83 Service::Nvidia::MultiFence& multi_fence);
78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 84 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
79 void ReleaseBuffer(u32 slot); 85 void ReleaseBuffer(u32 slot);
80 u32 Query(QueryType type); 86 u32 Query(QueryType type);
@@ -92,6 +98,7 @@ private:
92 u64 layer_id; 98 u64 layer_id;
93 99
94 std::vector<Buffer> queue; 100 std::vector<Buffer> queue;
101 std::list<u32> queue_sequence;
95 Kernel::EventPair buffer_wait_event; 102 Kernel::EventPair buffer_wait_event;
96}; 103};
97 104
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 3c5c53e24..f9db79370 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -37,15 +37,14 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
37 displays.emplace_back(4, "Null"); 37 displays.emplace_back(4, "Null");
38 38
39 // Schedule the screen composition events 39 // Schedule the screen composition events
40 const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks; 40 composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata,
41 41 s64 cycles_late) {
42 composition_event = core_timing.RegisterEvent( 42 Compose();
43 "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) { 43 const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks();
44 Compose(); 44 this->core_timing.ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), composition_event);
45 this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event); 45 });
46 }); 46
47 47 core_timing.ScheduleEvent(frame_ticks, composition_event);
48 core_timing.ScheduleEvent(ticks, composition_event);
49} 48}
50 49
51NVFlinger::~NVFlinger() { 50NVFlinger::~NVFlinger() {
@@ -206,8 +205,14 @@ void NVFlinger::Compose() {
206 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 205 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
207 buffer->get().transform, buffer->get().crop_rect); 206 buffer->get().transform, buffer->get().crop_rect);
208 207
208 swap_interval = buffer->get().swap_interval;
209 buffer_queue.ReleaseBuffer(buffer->get().slot); 209 buffer_queue.ReleaseBuffer(buffer->get().slot);
210 } 210 }
211} 211}
212 212
213s64 NVFlinger::GetNextTicks() const {
214 constexpr s64 max_hertz = 120LL;
215 return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
216}
217
213} // namespace Service::NVFlinger 218} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index c0a83fffb..988be8726 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -74,6 +74,8 @@ public:
74 /// finished. 74 /// finished.
75 void Compose(); 75 void Compose();
76 76
77 s64 GetNextTicks() const;
78
77private: 79private:
78 /// Finds the display identified by the specified ID. 80 /// Finds the display identified by the specified ID.
79 VI::Display* FindDisplay(u64 display_id); 81 VI::Display* FindDisplay(u64 display_id);
@@ -98,6 +100,8 @@ private:
98 /// layers. 100 /// layers.
99 u32 next_buffer_queue_id = 1; 101 u32 next_buffer_queue_id = 1;
100 102
103 u32 swap_interval = 1;
104
101 /// Event that handles screen composition. 105 /// Event that handles screen composition.
102 Core::Timing::EventType* composition_event; 106 Core::Timing::EventType* composition_event;
103 107
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 7eefd733f..3a0f8c3f6 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -206,7 +206,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
206 AM::InstallInterfaces(*sm, nv_flinger, system); 206 AM::InstallInterfaces(*sm, nv_flinger, system);
207 AOC::InstallInterfaces(*sm); 207 AOC::InstallInterfaces(*sm);
208 APM::InstallInterfaces(system); 208 APM::InstallInterfaces(system);
209 Audio::InstallInterfaces(*sm); 209 Audio::InstallInterfaces(*sm, system);
210 BCAT::InstallInterfaces(*sm); 210 BCAT::InstallInterfaces(*sm);
211 BPC::InstallInterfaces(*sm); 211 BPC::InstallInterfaces(*sm);
212 BtDrv::InstallInterfaces(*sm); 212 BtDrv::InstallInterfaces(*sm);
@@ -236,7 +236,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
236 NIM::InstallInterfaces(*sm); 236 NIM::InstallInterfaces(*sm);
237 NPNS::InstallInterfaces(*sm); 237 NPNS::InstallInterfaces(*sm);
238 NS::InstallInterfaces(*sm); 238 NS::InstallInterfaces(*sm);
239 Nvidia::InstallInterfaces(*sm, *nv_flinger); 239 Nvidia::InstallInterfaces(*sm, *nv_flinger, system);
240 PCIe::InstallInterfaces(*sm); 240 PCIe::InstallInterfaces(*sm);
241 PCTL::InstallInterfaces(*sm); 241 PCTL::InstallInterfaces(*sm);
242 PCV::InstallInterfaces(*sm); 242 PCV::InstallInterfaces(*sm);
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index f1fa6ccd1..199b30635 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -21,6 +21,7 @@
21#include "core/hle/kernel/readable_event.h" 21#include "core/hle/kernel/readable_event.h"
22#include "core/hle/kernel/thread.h" 22#include "core/hle/kernel/thread.h"
23#include "core/hle/kernel/writable_event.h" 23#include "core/hle/kernel/writable_event.h"
24#include "core/hle/service/nvdrv/nvdata.h"
24#include "core/hle/service/nvdrv/nvdrv.h" 25#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 26#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 27#include "core/hle/service/nvflinger/nvflinger.h"
@@ -328,32 +329,22 @@ public:
328 Data data; 329 Data data;
329}; 330};
330 331
331struct BufferProducerFence {
332 u32 is_valid;
333 std::array<Nvidia::IoctlFence, 4> fences;
334};
335static_assert(sizeof(BufferProducerFence) == 36, "BufferProducerFence has wrong size");
336
337class IGBPDequeueBufferResponseParcel : public Parcel { 332class IGBPDequeueBufferResponseParcel : public Parcel {
338public: 333public:
339 explicit IGBPDequeueBufferResponseParcel(u32 slot) : slot(slot) {} 334 explicit IGBPDequeueBufferResponseParcel(u32 slot, Service::Nvidia::MultiFence& multi_fence)
335 : slot(slot), multi_fence(multi_fence) {}
340 ~IGBPDequeueBufferResponseParcel() override = default; 336 ~IGBPDequeueBufferResponseParcel() override = default;
341 337
342protected: 338protected:
343 void SerializeData() override { 339 void SerializeData() override {
344 // TODO(Subv): Find out how this Fence is used.
345 BufferProducerFence fence = {};
346 fence.is_valid = 1;
347 for (auto& fence_ : fence.fences)
348 fence_.id = -1;
349
350 Write(slot); 340 Write(slot);
351 Write<u32_le>(1); 341 Write<u32_le>(1);
352 WriteObject(fence); 342 WriteObject(multi_fence);
353 Write<u32_le>(0); 343 Write<u32_le>(0);
354 } 344 }
355 345
356 u32_le slot; 346 u32_le slot;
347 Service::Nvidia::MultiFence multi_fence;
357}; 348};
358 349
359class IGBPRequestBufferRequestParcel : public Parcel { 350class IGBPRequestBufferRequestParcel : public Parcel {
@@ -400,12 +391,6 @@ public:
400 data = Read<Data>(); 391 data = Read<Data>();
401 } 392 }
402 393
403 struct Fence {
404 u32_le id;
405 u32_le value;
406 };
407 static_assert(sizeof(Fence) == 8, "Fence has wrong size");
408
409 struct Data { 394 struct Data {
410 u32_le slot; 395 u32_le slot;
411 INSERT_PADDING_WORDS(3); 396 INSERT_PADDING_WORDS(3);
@@ -418,15 +403,15 @@ public:
418 s32_le scaling_mode; 403 s32_le scaling_mode;
419 NVFlinger::BufferQueue::BufferTransformFlags transform; 404 NVFlinger::BufferQueue::BufferTransformFlags transform;
420 u32_le sticky_transform; 405 u32_le sticky_transform;
421 INSERT_PADDING_WORDS(2); 406 INSERT_PADDING_WORDS(1);
422 u32_le fence_is_valid; 407 u32_le swap_interval;
423 std::array<Fence, 2> fences; 408 Service::Nvidia::MultiFence multi_fence;
424 409
425 Common::Rectangle<int> GetCropRect() const { 410 Common::Rectangle<int> GetCropRect() const {
426 return {crop_left, crop_top, crop_right, crop_bottom}; 411 return {crop_left, crop_top, crop_right, crop_bottom};
427 } 412 }
428 }; 413 };
429 static_assert(sizeof(Data) == 80, "ParcelData has wrong size"); 414 static_assert(sizeof(Data) == 96, "ParcelData has wrong size");
430 415
431 Data data; 416 Data data;
432}; 417};
@@ -547,11 +532,11 @@ private:
547 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 532 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
548 const u32 width{request.data.width}; 533 const u32 width{request.data.width};
549 const u32 height{request.data.height}; 534 const u32 height{request.data.height};
550 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); 535 auto result = buffer_queue.DequeueBuffer(width, height);
551 536
552 if (slot) { 537 if (result) {
553 // Buffer is available 538 // Buffer is available
554 IGBPDequeueBufferResponseParcel response{*slot}; 539 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
555 ctx.WriteBuffer(response.Serialize()); 540 ctx.WriteBuffer(response.Serialize());
556 } else { 541 } else {
557 // Wait the current thread until a buffer becomes available 542 // Wait the current thread until a buffer becomes available
@@ -561,10 +546,10 @@ private:
561 Kernel::ThreadWakeupReason reason) { 546 Kernel::ThreadWakeupReason reason) {
562 // Repeat TransactParcel DequeueBuffer when a buffer is available 547 // Repeat TransactParcel DequeueBuffer when a buffer is available
563 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 548 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
564 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); 549 auto result = buffer_queue.DequeueBuffer(width, height);
565 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 550 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
566 551
567 IGBPDequeueBufferResponseParcel response{*slot}; 552 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
568 ctx.WriteBuffer(response.Serialize()); 553 ctx.WriteBuffer(response.Serialize());
569 IPC::ResponseBuilder rb{ctx, 2}; 554 IPC::ResponseBuilder rb{ctx, 2};
570 rb.Push(RESULT_SUCCESS); 555 rb.Push(RESULT_SUCCESS);
@@ -582,7 +567,8 @@ private:
582 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 567 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
583 568
584 buffer_queue.QueueBuffer(request.data.slot, request.data.transform, 569 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
585 request.data.GetCropRect()); 570 request.data.GetCropRect(), request.data.swap_interval,
571 request.data.multi_fence);
586 572
587 IGBPQueueBufferResponseParcel response{1280, 720}; 573 IGBPQueueBufferResponseParcel response{1280, 720};
588 ctx.WriteBuffer(response.Serialize()); 574 ctx.WriteBuffer(response.Serialize());
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6d4b02375..f1795fdd6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
295 } 295 }
296 } 296 }
297 297
298 std::vector<u8> program_image(total_image_size); 298 Kernel::PhysicalMemory program_image(total_image_size);
299 std::size_t current_image_position = 0; 299 std::size_t current_image_position = 0;
300 300
301 Kernel::CodeSet codeset; 301 Kernel::CodeSet codeset;
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 70051c13a..474b55cb1 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
69 69
70 const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); 70 const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
71 Kernel::CodeSet codeset; 71 Kernel::CodeSet codeset;
72 std::vector<u8> program_image; 72 Kernel::PhysicalMemory program_image;
73 73
74 const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, 74 const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment,
75 const std::vector<u8>& data, u32 offset) { 75 const std::vector<u8>& data, u32 offset) {
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 6a0ca389b..e92e2e06e 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
143 } 143 }
144 144
145 // Build program image 145 // Build program image
146 std::vector<u8> program_image(PageAlignSize(nro_header.file_size)); 146 Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size));
147 std::memcpy(program_image.data(), data.data(), program_image.size()); 147 std::memcpy(program_image.data(), data.data(), program_image.size());
148 if (program_image.size() != PageAlignSize(nro_header.file_size)) { 148 if (program_image.size() != PageAlignSize(nro_header.file_size)) {
149 return {}; 149 return {};
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 29311404a..70c90109f 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
89 89
90 // Build program image 90 // Build program image
91 Kernel::CodeSet codeset; 91 Kernel::CodeSet codeset;
92 std::vector<u8> program_image; 92 Kernel::PhysicalMemory program_image;
93 for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { 93 for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
94 std::vector<u8> data = 94 std::vector<u8> data =
95 file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); 95 file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 7c18c27b3..e2f85c5f1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,5 +1,7 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache.h 2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.h
3 dma_pusher.cpp 5 dma_pusher.cpp
4 dma_pusher.h 6 dma_pusher.h
5 debug_utils/debug_utils.cpp 7 debug_utils/debug_utils.cpp
@@ -100,6 +102,7 @@ add_library(video_core STATIC
100 shader/decode/integer_set.cpp 102 shader/decode/integer_set.cpp
101 shader/decode/half_set.cpp 103 shader/decode/half_set.cpp
102 shader/decode/video.cpp 104 shader/decode/video.cpp
105 shader/decode/warp.cpp
103 shader/decode/xmad.cpp 106 shader/decode/xmad.cpp
104 shader/decode/other.cpp 107 shader/decode/other.cpp
105 shader/control_flow.cpp 108 shader/control_flow.cpp
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h
deleted file mode 100644
index 6f868b8b4..000000000
--- a/src/video_core/buffer_cache.h
+++ /dev/null
@@ -1,299 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_cache.h"
20
21namespace VideoCore {
22class RasterizerInterface;
23}
24
25namespace VideoCommon {
26
27template <typename BufferStorageType>
28class CachedBuffer final : public RasterizerCacheObject {
29public:
30 explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
31 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
32 ~CachedBuffer() override = default;
33
34 VAddr GetCpuAddr() const override {
35 return cpu_addr;
36 }
37
38 std::size_t GetSizeInBytes() const override {
39 return size;
40 }
41
42 u8* GetWritableHostPtr() const {
43 return host_ptr;
44 }
45
46 std::size_t GetSize() const {
47 return size;
48 }
49
50 std::size_t GetCapacity() const {
51 return capacity;
52 }
53
54 bool IsInternalized() const {
55 return is_internal;
56 }
57
58 const BufferStorageType& GetBuffer() const {
59 return buffer;
60 }
61
62 void SetSize(std::size_t new_size) {
63 size = new_size;
64 }
65
66 void SetInternalState(bool is_internal_) {
67 is_internal = is_internal_;
68 }
69
70 BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
71 capacity = new_capacity;
72 std::swap(buffer, buffer_);
73 return buffer_;
74 }
75
76private:
77 u8* host_ptr{};
78 VAddr cpu_addr{};
79 std::size_t size{};
80 std::size_t capacity{};
81 bool is_internal{};
82 BufferStorageType buffer;
83};
84
85template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
86class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
87public:
88 using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
89 using BufferInfo = std::pair<const BufferType*, u64>;
90
91 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
92 std::unique_ptr<StreamBuffer> stream_buffer)
93 : RasterizerCache<Buffer>{rasterizer}, system{system},
94 stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
95 this->stream_buffer->GetHandle()} {}
96 ~BufferCache() = default;
97
98 void Unregister(const Buffer& entry) override {
99 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
100 if (entry->IsInternalized()) {
101 internalized_entries.erase(entry->GetCacheAddr());
102 }
103 ReserveBuffer(entry);
104 RasterizerCache<Buffer>::Unregister(entry);
105 }
106
107 void TickFrame() {
108 marked_for_destruction_index =
109 (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
110 MarkedForDestruction().clear();
111 }
112
113 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
114 bool internalize = false, bool is_written = false) {
115 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
116
117 auto& memory_manager = system.GPU().MemoryManager();
118 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
119 if (!host_ptr) {
120 return {GetEmptyBuffer(size), 0};
121 }
122 const auto cache_addr = ToCacheAddr(host_ptr);
123
124 // Cache management is a big overhead, so only cache entries with a given size.
125 // TODO: Figure out which size is the best for given games.
126 constexpr std::size_t max_stream_size = 0x800;
127 if (!internalize && size < max_stream_size &&
128 internalized_entries.find(cache_addr) == internalized_entries.end()) {
129 return StreamBufferUpload(host_ptr, size, alignment);
130 }
131
132 auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
133 if (!entry) {
134 return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
135 }
136
137 if (entry->GetSize() < size) {
138 IncreaseBufferSize(entry, size);
139 }
140 if (is_written) {
141 entry->MarkAsModified(true, *this);
142 }
143 return {ToHandle(entry->GetBuffer()), 0};
144 }
145
146 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
147 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
148 std::size_t alignment = 4) {
149 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
150 return StreamBufferUpload(raw_pointer, size, alignment);
151 }
152
153 void Map(std::size_t max_size) {
154 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
155 buffer_offset = buffer_offset_base;
156 }
157
158 /// Finishes the upload stream, returns true on bindings invalidation.
159 bool Unmap() {
160 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
161 return std::exchange(invalidated, false);
162 }
163
164 virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
165
166protected:
167 void FlushObjectInner(const Buffer& entry) override {
168 DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
169 }
170
171 virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
172
173 virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
174
175 virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
176 std::size_t size, const u8* data) = 0;
177
178 virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
179 std::size_t size, u8* data) = 0;
180
181 virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
182 std::size_t src_offset, std::size_t dst_offset,
183 std::size_t size) = 0;
184
185private:
186 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
187 std::size_t alignment) {
188 AlignBuffer(alignment);
189 const std::size_t uploaded_offset = buffer_offset;
190 std::memcpy(buffer_ptr, raw_pointer, size);
191
192 buffer_ptr += size;
193 buffer_offset += size;
194 return {&stream_buffer_handle, uploaded_offset};
195 }
196
197 BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
198 bool internalize, bool is_written) {
199 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
200 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
201 ASSERT(cpu_addr);
202
203 auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
204 entry->SetSize(size);
205 entry->SetInternalState(internalize);
206 RasterizerCache<Buffer>::Register(entry);
207
208 if (internalize) {
209 internalized_entries.emplace(ToCacheAddr(host_ptr));
210 }
211 if (is_written) {
212 entry->MarkAsModified(true, *this);
213 }
214
215 if (entry->GetCapacity() < size) {
216 MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
217 }
218
219 UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
220 return {ToHandle(entry->GetBuffer()), 0};
221 }
222
223 void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
224 const std::size_t old_size = entry->GetSize();
225 if (entry->GetCapacity() < new_size) {
226 const auto& old_buffer = entry->GetBuffer();
227 auto new_buffer = CreateBuffer(new_size);
228
229 // Copy bits from the old buffer to the new buffer.
230 CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
231 MarkedForDestruction().push_back(
232 entry->ExchangeBuffer(std::move(new_buffer), new_size));
233
234 // This buffer could have been used
235 invalidated = true;
236 }
237 // Upload the new bits.
238 const std::size_t size_diff = new_size - old_size;
239 UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
240
241 // Update entry's size in the object and in the cache.
242 Unregister(entry);
243
244 entry->SetSize(new_size);
245 RasterizerCache<Buffer>::Register(entry);
246 }
247
248 Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
249 if (auto entry = TryGetReservedBuffer(host_ptr)) {
250 return entry;
251 }
252 return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
253 }
254
255 Buffer TryGetReservedBuffer(u8* host_ptr) {
256 const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
257 if (it == buffer_reserve.end()) {
258 return {};
259 }
260 auto& reserve = it->second;
261 auto entry = reserve.back();
262 reserve.pop_back();
263 return entry;
264 }
265
266 void ReserveBuffer(Buffer entry) {
267 buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
268 }
269
270 void AlignBuffer(std::size_t alignment) {
271 // Align the offset, not the mapped pointer
272 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
273 buffer_ptr += offset_aligned - buffer_offset;
274 buffer_offset = offset_aligned;
275 }
276
277 std::vector<BufferStorageType>& MarkedForDestruction() {
278 return marked_for_destruction_ring_buffer[marked_for_destruction_index];
279 }
280
281 Core::System& system;
282
283 std::unique_ptr<StreamBuffer> stream_buffer;
284 BufferType stream_buffer_handle{};
285
286 bool invalidated = false;
287
288 u8* buffer_ptr = nullptr;
289 u64 buffer_offset = 0;
290 u64 buffer_offset_base = 0;
291
292 std::size_t marked_for_destruction_index = 0;
293 std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
294
295 std::unordered_set<CacheAddr> internalized_entries;
296 std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
297};
298
299} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
new file mode 100644
index 000000000..4b9193182
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -0,0 +1,76 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_set>
8#include <utility>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13
14namespace VideoCommon {
15
16class BufferBlock {
17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start);
20 }
21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end;
24 }
25
26 u8* GetWritableHostPtr() const {
27 return FromCacheAddr(cache_addr);
28 }
29
30 u8* GetWritableHostPtr(std::size_t offset) const {
31 return FromCacheAddr(cache_addr + offset);
32 }
33
34 std::size_t GetOffset(const CacheAddr in_addr) {
35 return static_cast<std::size_t>(in_addr - cache_addr);
36 }
37
38 CacheAddr GetCacheAddr() const {
39 return cache_addr;
40 }
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 }
50
51 std::size_t GetSize() const {
52 return size;
53 }
54
55 void SetEpoch(u64 new_epoch) {
56 epoch = new_epoch;
57 }
58
59 u64 GetEpoch() {
60 return epoch;
61 }
62
63protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr);
66 }
67 ~BufferBlock() = default;
68
69private:
70 CacheAddr cache_addr{};
71 CacheAddr cache_addr_end{};
72 std::size_t size{};
73 u64 epoch{};
74};
75
76} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
new file mode 100644
index 000000000..2442ddfd6
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -0,0 +1,447 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/buffer_cache/buffer_block.h"
19#include "video_core/buffer_cache/map_interval.h"
20#include "video_core/memory_manager.h"
21#include "video_core/rasterizer_interface.h"
22
23namespace VideoCommon {
24
25using MapInterval = std::shared_ptr<MapIntervalBase>;
26
27template <typename TBuffer, typename TBufferType, typename StreamBuffer>
28class BufferCache {
29public:
30 using BufferInfo = std::pair<const TBufferType*, u64>;
31
32 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
33 bool is_written = false) {
34 std::lock_guard lock{mutex};
35
36 auto& memory_manager = system.GPU().MemoryManager();
37 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
38 if (!host_ptr) {
39 return {GetEmptyBuffer(size), 0};
40 }
41 const auto cache_addr = ToCacheAddr(host_ptr);
42
43 // Cache management is a big overhead, so only cache entries with a given size.
44 // TODO: Figure out which size is the best for given games.
45 constexpr std::size_t max_stream_size = 0x800;
46 if (size < max_stream_size) {
47 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
48 return StreamBufferUpload(host_ptr, size, alignment);
49 }
50 }
51
52 auto block = GetBlock(cache_addr, size);
53 auto map = MapAddress(block, gpu_addr, cache_addr, size);
54 if (is_written) {
55 map->MarkAsModified(true, GetModifiedTicks());
56 if (!map->IsWritten()) {
57 map->MarkAsWritten(true);
58 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
59 }
60 } else {
61 if (map->IsWritten()) {
62 WriteBarrier();
63 }
64 }
65
66 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
67
68 return {ToHandle(block), offset};
69 }
70
71 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
72 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
73 std::size_t alignment = 4) {
74 std::lock_guard lock{mutex};
75 return StreamBufferUpload(raw_pointer, size, alignment);
76 }
77
78 void Map(std::size_t max_size) {
79 std::lock_guard lock{mutex};
80
81 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
82 buffer_offset = buffer_offset_base;
83 }
84
85 /// Finishes the upload stream, returns true on bindings invalidation.
86 bool Unmap() {
87 std::lock_guard lock{mutex};
88
89 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
90 return std::exchange(invalidated, false);
91 }
92
93 void TickFrame() {
94 ++epoch;
95 while (!pending_destruction.empty()) {
96 if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
97 break;
98 }
99 pending_destruction.pop_front();
100 }
101 }
102
103 /// Write any cached resources overlapping the specified region back to memory
104 void FlushRegion(CacheAddr addr, std::size_t size) {
105 std::lock_guard lock{mutex};
106
107 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
108 std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
109 return a->GetModificationTick() < b->GetModificationTick();
110 });
111 for (auto& object : objects) {
112 if (object->IsModified() && object->IsRegistered()) {
113 FlushMap(object);
114 }
115 }
116 }
117
118 /// Mark the specified region as being invalidated
119 void InvalidateRegion(CacheAddr addr, u64 size) {
120 std::lock_guard lock{mutex};
121
122 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
123 for (auto& object : objects) {
124 if (object->IsRegistered()) {
125 Unregister(object);
126 }
127 }
128 }
129
130 virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
131
132protected:
133 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
134 std::unique_ptr<StreamBuffer> stream_buffer)
135 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
136 stream_buffer_handle{this->stream_buffer->GetHandle()} {}
137
138 ~BufferCache() = default;
139
140 virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
141
142 virtual void WriteBarrier() = 0;
143
144 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
145
146 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
147 const u8* data) = 0;
148
149 virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
150 u8* data) = 0;
151
152 virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
153 std::size_t dst_offset, std::size_t size) = 0;
154
155 /// Register an object into the cache
156 void Register(const MapInterval& new_map, bool inherit_written = false) {
157 const CacheAddr cache_ptr = new_map->GetStart();
158 const std::optional<VAddr> cpu_addr =
159 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
160 if (!cache_ptr || !cpu_addr) {
161 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
162 new_map->GetGpuAddress());
163 return;
164 }
165 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
166 new_map->SetCpuAddress(*cpu_addr);
167 new_map->MarkAsRegistered(true);
168 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
169 mapped_addresses.insert({interval, new_map});
170 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
171 if (inherit_written) {
172 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
173 new_map->MarkAsWritten(true);
174 }
175 }
176
177 /// Unregisters an object from the cache
178 void Unregister(MapInterval& map) {
179 const std::size_t size = map->GetEnd() - map->GetStart();
180 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
181 map->MarkAsRegistered(false);
182 if (map->IsWritten()) {
183 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
184 }
185 const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
186 mapped_addresses.erase(delete_interval);
187 }
188
189private:
190 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
191 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
192 }
193
194 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
195 const CacheAddr cache_addr, const std::size_t size) {
196
197 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
198 if (overlaps.empty()) {
199 const CacheAddr cache_addr_end = cache_addr + size;
200 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
201 u8* host_ptr = FromCacheAddr(cache_addr);
202 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
203 Register(new_map);
204 return new_map;
205 }
206
207 const CacheAddr cache_addr_end = cache_addr + size;
208 if (overlaps.size() == 1) {
209 MapInterval& current_map = overlaps[0];
210 if (current_map->IsInside(cache_addr, cache_addr_end)) {
211 return current_map;
212 }
213 }
214 CacheAddr new_start = cache_addr;
215 CacheAddr new_end = cache_addr_end;
216 bool write_inheritance = false;
217 bool modified_inheritance = false;
218 // Calculate new buffer parameters
219 for (auto& overlap : overlaps) {
220 new_start = std::min(overlap->GetStart(), new_start);
221 new_end = std::max(overlap->GetEnd(), new_end);
222 write_inheritance |= overlap->IsWritten();
223 modified_inheritance |= overlap->IsModified();
224 }
225 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
226 for (auto& overlap : overlaps) {
227 Unregister(overlap);
228 }
229 UpdateBlock(block, new_start, new_end, overlaps);
230 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
231 if (modified_inheritance) {
232 new_map->MarkAsModified(true, GetModifiedTicks());
233 }
234 Register(new_map, write_inheritance);
235 return new_map;
236 }
237
238 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
239 std::vector<MapInterval>& overlaps) {
240 const IntervalType base_interval{start, end};
241 IntervalSet interval_set{};
242 interval_set.add(base_interval);
243 for (auto& overlap : overlaps) {
244 const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
245 interval_set.subtract(subtract);
246 }
247 for (auto& interval : interval_set) {
248 std::size_t size = interval.upper() - interval.lower();
249 if (size > 0) {
250 u8* host_ptr = FromCacheAddr(interval.lower());
251 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
252 }
253 }
254 }
255
256 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
257 if (size == 0) {
258 return {};
259 }
260
261 std::vector<MapInterval> objects{};
262 const IntervalType interval{addr, addr + size};
263 for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
264 objects.push_back(pair.second);
265 }
266
267 return objects;
268 }
269
270 /// Returns a ticks counter used for tracking when cached objects were last modified
271 u64 GetModifiedTicks() {
272 return ++modified_ticks;
273 }
274
275 void FlushMap(MapInterval map) {
276 std::size_t size = map->GetEnd() - map->GetStart();
277 TBuffer block = blocks[map->GetStart() >> block_page_bits];
278 u8* host_ptr = FromCacheAddr(map->GetStart());
279 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
280 map->MarkAsModified(false, 0);
281 }
282
283 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
284 std::size_t alignment) {
285 AlignBuffer(alignment);
286 const std::size_t uploaded_offset = buffer_offset;
287 std::memcpy(buffer_ptr, raw_pointer, size);
288
289 buffer_ptr += size;
290 buffer_offset += size;
291 return {&stream_buffer_handle, uploaded_offset};
292 }
293
294 void AlignBuffer(std::size_t alignment) {
295 // Align the offset, not the mapped pointer
296 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
297 buffer_ptr += offset_aligned - buffer_offset;
298 buffer_offset = offset_aligned;
299 }
300
301 TBuffer EnlargeBlock(TBuffer buffer) {
302 const std::size_t old_size = buffer->GetSize();
303 const std::size_t new_size = old_size + block_page_size;
304 const CacheAddr cache_addr = buffer->GetCacheAddr();
305 TBuffer new_buffer = CreateBlock(cache_addr, new_size);
306 CopyBlock(buffer, new_buffer, 0, 0, old_size);
307 buffer->SetEpoch(epoch);
308 pending_destruction.push_back(buffer);
309 const CacheAddr cache_addr_end = cache_addr + new_size - 1;
310 u64 page_start = cache_addr >> block_page_bits;
311 const u64 page_end = cache_addr_end >> block_page_bits;
312 while (page_start <= page_end) {
313 blocks[page_start] = new_buffer;
314 ++page_start;
315 }
316 return new_buffer;
317 }
318
319 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
320 const std::size_t size_1 = first->GetSize();
321 const std::size_t size_2 = second->GetSize();
322 const CacheAddr first_addr = first->GetCacheAddr();
323 const CacheAddr second_addr = second->GetCacheAddr();
324 const CacheAddr new_addr = std::min(first_addr, second_addr);
325 const std::size_t new_size = size_1 + size_2;
326 TBuffer new_buffer = CreateBlock(new_addr, new_size);
327 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
328 CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
329 first->SetEpoch(epoch);
330 second->SetEpoch(epoch);
331 pending_destruction.push_back(first);
332 pending_destruction.push_back(second);
333 const CacheAddr cache_addr_end = new_addr + new_size - 1;
334 u64 page_start = new_addr >> block_page_bits;
335 const u64 page_end = cache_addr_end >> block_page_bits;
336 while (page_start <= page_end) {
337 blocks[page_start] = new_buffer;
338 ++page_start;
339 }
340 return new_buffer;
341 }
342
343 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
344 TBuffer found{};
345 const CacheAddr cache_addr_end = cache_addr + size - 1;
346 u64 page_start = cache_addr >> block_page_bits;
347 const u64 page_end = cache_addr_end >> block_page_bits;
348 while (page_start <= page_end) {
349 auto it = blocks.find(page_start);
350 if (it == blocks.end()) {
351 if (found) {
352 found = EnlargeBlock(found);
353 } else {
354 const CacheAddr start_addr = (page_start << block_page_bits);
355 found = CreateBlock(start_addr, block_page_size);
356 blocks[page_start] = found;
357 }
358 } else {
359 if (found) {
360 if (found == it->second) {
361 ++page_start;
362 continue;
363 }
364 found = MergeBlocks(found, it->second);
365 } else {
366 found = it->second;
367 }
368 }
369 ++page_start;
370 }
371 return found;
372 }
373
374 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
375 u64 page_start = start >> write_page_bit;
376 const u64 page_end = end >> write_page_bit;
377 while (page_start <= page_end) {
378 auto it = written_pages.find(page_start);
379 if (it != written_pages.end()) {
380 it->second = it->second + 1;
381 } else {
382 written_pages[page_start] = 1;
383 }
384 page_start++;
385 }
386 }
387
388 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
389 u64 page_start = start >> write_page_bit;
390 const u64 page_end = end >> write_page_bit;
391 while (page_start <= page_end) {
392 auto it = written_pages.find(page_start);
393 if (it != written_pages.end()) {
394 if (it->second > 1) {
395 it->second = it->second - 1;
396 } else {
397 written_pages.erase(it);
398 }
399 }
400 page_start++;
401 }
402 }
403
404 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
405 u64 page_start = start >> write_page_bit;
406 const u64 page_end = end >> write_page_bit;
407 while (page_start <= page_end) {
408 if (written_pages.count(page_start) > 0) {
409 return true;
410 }
411 page_start++;
412 }
413 return false;
414 }
415
416 VideoCore::RasterizerInterface& rasterizer;
417 Core::System& system;
418 std::unique_ptr<StreamBuffer> stream_buffer;
419
420 TBufferType stream_buffer_handle{};
421
422 bool invalidated = false;
423
424 u8* buffer_ptr = nullptr;
425 u64 buffer_offset = 0;
426 u64 buffer_offset_base = 0;
427
428 using IntervalSet = boost::icl::interval_set<CacheAddr>;
429 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
430 using IntervalType = typename IntervalCache::interval_type;
431 IntervalCache mapped_addresses{};
432
433 static constexpr u64 write_page_bit{11};
434 std::unordered_map<u64, u32> written_pages{};
435
436 static constexpr u64 block_page_bits{21};
437 static constexpr u64 block_page_size{1 << block_page_bits};
438 std::unordered_map<u64, TBuffer> blocks{};
439
440 std::list<TBuffer> pending_destruction{};
441 u64 epoch{};
442 u64 modified_ticks{};
443
444 std::recursive_mutex mutex;
445};
446
447} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
new file mode 100644
index 000000000..3a104d5cd
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/gpu.h"
9
10namespace VideoCommon {
11
12class MapIntervalBase {
13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16
17 void SetCpuAddress(VAddr new_cpu_addr) {
18 cpu_addr = new_cpu_addr;
19 }
20
21 VAddr GetCpuAddress() const {
22 return cpu_addr;
23 }
24
25 GPUVAddr GetGpuAddress() const {
26 return gpu_addr;
27 }
28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
30 return (start <= other_start && other_end <= end);
31 }
32
33 bool operator==(const MapIntervalBase& rhs) const {
34 return std::tie(start, end) == std::tie(rhs.start, rhs.end);
35 }
36
37 bool operator!=(const MapIntervalBase& rhs) const {
38 return !operator==(rhs);
39 }
40
41 void MarkAsRegistered(const bool registered) {
42 is_registered = registered;
43 }
44
45 bool IsRegistered() const {
46 return is_registered;
47 }
48
49 CacheAddr GetStart() const {
50 return start;
51 }
52
53 CacheAddr GetEnd() const {
54 return end;
55 }
56
57 void MarkAsModified(const bool is_modified_, const u64 tick) {
58 is_modified = is_modified_;
59 ticks = tick;
60 }
61
62 bool IsModified() const {
63 return is_modified;
64 }
65
66 u64 GetModificationTick() const {
67 return ticks;
68 }
69
70 void MarkAsWritten(const bool is_written_) {
71 is_written = is_written_;
72 }
73
74 bool IsWritten() const {
75 return is_written;
76 }
77
78private:
79 CacheAddr start;
80 CacheAddr end;
81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{};
83 bool is_written{};
84 bool is_modified{};
85 bool is_registered{};
86 u64 ticks{};
87};
88
89} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bd036cbe8..0094fd715 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -31,6 +31,7 @@ void DmaPusher::DispatchCalls() {
31 break; 31 break;
32 } 32 }
33 } 33 }
34 gpu.FlushCommands();
34} 35}
35 36
36bool DmaPusher::Step() { 37bool DmaPusher::Step() {
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0ee228e28..98a8b5337 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,8 +10,7 @@
10 10
11namespace Tegra::Engines { 11namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
14 : rasterizer{rasterizer}, memory_manager{memory_manager} {}
15 14
16void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { 15void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
17 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 16 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 05421d185..0901cf2fa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -33,7 +33,7 @@ namespace Tegra::Engines {
33 33
34class Fermi2D final { 34class Fermi2D final {
35public: 35public:
36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
37 ~Fermi2D() = default; 37 ~Fermi2D() = default;
38 38
39 /// Write the value to the register identified by method. 39 /// Write the value to the register identified by method.
@@ -145,7 +145,6 @@ public:
145 145
146private: 146private:
147 VideoCore::RasterizerInterface& rasterizer; 147 VideoCore::RasterizerInterface& rasterizer;
148 MemoryManager& memory_manager;
149 148
150 /// Performs the copy from the source surface to the destination surface as configured in the 149 /// Performs the copy from the source surface to the destination surface as configured in the
151 /// registers. 150 /// registers.
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 44279de00..fa4a7c5c1 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -15,7 +15,7 @@
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) 17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
18 : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} 18 : system{system}, upload_state{memory_manager, regs.upload} {}
19 19
20KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
21 21
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f3bc675a9..e0e25c321 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -65,7 +65,6 @@ public:
65 65
66private: 66private:
67 Core::System& system; 67 Core::System& system;
68 MemoryManager& memory_manager;
69 Upload::State upload_state; 68 Upload::State upload_state;
70}; 69};
71 70
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 74c46ec04..6a5a4f5c4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -524,9 +524,10 @@ void Maxwell3D::ProcessQueryCondition() {
524void Maxwell3D::ProcessSyncPoint() { 524void Maxwell3D::ProcessSyncPoint() {
525 const u32 sync_point = regs.sync_info.sync_point.Value(); 525 const u32 sync_point = regs.sync_info.sync_point.Value();
526 const u32 increment = regs.sync_info.increment.Value(); 526 const u32 increment = regs.sync_info.increment.Value();
527 const u32 cache_flush = regs.sync_info.unknown.Value(); 527 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
528 LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, 528 if (increment) {
529 cache_flush); 529 system.GPU().IncrementSyncPoint(sync_point);
530 }
530} 531}
531 532
532void Maxwell3D::DrawArrays() { 533void Maxwell3D::DrawArrays() {
@@ -625,10 +626,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
625 Texture::TICEntry tic_entry; 626 Texture::TICEntry tic_entry;
626 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 627 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
627 628
628 const auto r_type{tic_entry.r_type.Value()}; 629 [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
629 const auto g_type{tic_entry.g_type.Value()}; 630 [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
630 const auto b_type{tic_entry.b_type.Value()}; 631 [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
631 const auto a_type{tic_entry.a_type.Value()}; 632 [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
632 633
633 // TODO(Subv): Different data types for separate components are not supported 634 // TODO(Subv): Different data types for separate components are not supported
634 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); 635 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a71d98e36..ad8453c5f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -9,15 +9,13 @@
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
11#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
12#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
14#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
15 14
16namespace Tegra::Engines { 15namespace Tegra::Engines {
17 16
18MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
19 MemoryManager& memory_manager) 18 : system{system}, memory_manager{memory_manager} {}
20 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
21 19
22void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 20void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
23 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 21 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -39,7 +37,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
39} 37}
40 38
41void MaxwellDMA::HandleCopy() { 39void MaxwellDMA::HandleCopy() {
42 LOG_WARNING(HW_GPU, "Requested a DMA copy"); 40 LOG_TRACE(HW_GPU, "Requested a DMA copy");
43 41
44 const GPUVAddr source = regs.src_address.Address(); 42 const GPUVAddr source = regs.src_address.Address();
45 const GPUVAddr dest = regs.dst_address.Address(); 43 const GPUVAddr dest = regs.dst_address.Address();
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 17b015ca7..93808a9bb 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -20,10 +20,6 @@ namespace Tegra {
20class MemoryManager; 20class MemoryManager;
21} 21}
22 22
23namespace VideoCore {
24class RasterizerInterface;
25}
26
27namespace Tegra::Engines { 23namespace Tegra::Engines {
28 24
29/** 25/**
@@ -33,8 +29,7 @@ namespace Tegra::Engines {
33 29
34class MaxwellDMA final { 30class MaxwellDMA final {
35public: 31public:
36 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 32 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
37 MemoryManager& memory_manager);
38 ~MaxwellDMA() = default; 33 ~MaxwellDMA() = default;
39 34
40 /// Write the value to the register identified by method. 35 /// Write the value to the register identified by method.
@@ -180,8 +175,6 @@ public:
180private: 175private:
181 Core::System& system; 176 Core::System& system;
182 177
183 VideoCore::RasterizerInterface& rasterizer;
184
185 MemoryManager& memory_manager; 178 MemoryManager& memory_manager;
186 179
187 std::vector<u8> read_buffer; 180 std::vector<u8> read_buffer;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8520a0143..bc8c2a1c5 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 {
538 Output = 1, 538 Output = 1,
539}; 539};
540 540
541enum class VoteOperation : u64 {
542 All = 0, // allThreadsNV
543 Any = 1, // anyThreadNV
544 Eq = 2, // allThreadsEqualNV
545};
546
541union Instruction { 547union Instruction {
542 Instruction& operator=(const Instruction& instr) { 548 Instruction& operator=(const Instruction& instr) {
543 value = instr.value; 549 value = instr.value;
@@ -560,6 +566,18 @@ union Instruction {
560 BitField<48, 16, u64> opcode; 566 BitField<48, 16, u64> opcode;
561 567
562 union { 568 union {
569 BitField<8, 5, ConditionCode> cc;
570 BitField<13, 1, u64> trigger;
571 } nop;
572
573 union {
574 BitField<48, 2, VoteOperation> operation;
575 BitField<45, 3, u64> dest_pred;
576 BitField<39, 3, u64> value;
577 BitField<42, 1, u64> negate_value;
578 } vote;
579
580 union {
563 BitField<8, 8, Register> gpr; 581 BitField<8, 8, Register> gpr;
564 BitField<20, 24, s64> offset; 582 BitField<20, 24, s64> offset;
565 } gmem; 583 } gmem;
@@ -1018,8 +1036,6 @@ union Instruction {
1018 } f2i; 1036 } f2i;
1019 1037
1020 union { 1038 union {
1021 BitField<8, 2, Register::Size> src_size;
1022 BitField<10, 2, Register::Size> dst_size;
1023 BitField<39, 4, u64> rounding; 1039 BitField<39, 4, u64> rounding;
1024 // H0, H1 extract for F16 missing 1040 // H0, H1 extract for F16 missing
1025 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value 1041 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
@@ -1484,6 +1500,7 @@ public:
1484 SYNC, 1500 SYNC,
1485 BRK, 1501 BRK,
1486 DEPBAR, 1502 DEPBAR,
1503 VOTE,
1487 BFE_C, 1504 BFE_C,
1488 BFE_R, 1505 BFE_R,
1489 BFE_IMM, 1506 BFE_IMM,
@@ -1516,6 +1533,7 @@ public:
1516 TMML, // Texture Mip Map Level 1533 TMML, // Texture Mip Map Level
1517 SUST, // Surface Store 1534 SUST, // Surface Store
1518 EXIT, 1535 EXIT,
1536 NOP,
1519 IPA, 1537 IPA,
1520 OUT_R, // Emit vertex/primitive 1538 OUT_R, // Emit vertex/primitive
1521 ISBERD, 1539 ISBERD,
@@ -1645,6 +1663,7 @@ public:
1645 Hfma2, 1663 Hfma2,
1646 Flow, 1664 Flow,
1647 Synch, 1665 Synch,
1666 Warp,
1648 Memory, 1667 Memory,
1649 Texture, 1668 Texture,
1650 Image, 1669 Image,
@@ -1771,6 +1790,7 @@ private:
1771 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1790 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1772 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1791 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
1773 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1792 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1793 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1774 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1794 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1775 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1795 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1776 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1796 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1795,6 +1815,7 @@ private:
1795 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), 1815 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1796 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), 1816 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1797 INST("11101011001-----", Id::SUST, Type::Image, "SUST"), 1817 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
1818 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
1798 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1819 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1799 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1820 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1800 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 1821 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e25754e37..8d9db45f5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -29,14 +29,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
29 UNREACHABLE(); 29 UNREACHABLE();
30} 30}
31 31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
33 : system{system}, renderer{renderer}, is_async{is_async} {
33 auto& rasterizer{renderer.Rasterizer()}; 34 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); 35 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 36 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 37 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 38 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
38 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); 39 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); 40 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); 41 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
41} 42}
42 43
@@ -74,6 +75,55 @@ const DmaPusher& GPU::DmaPusher() const {
74 return *dma_pusher; 75 return *dma_pusher;
75} 76}
76 77
78void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
79 syncpoints[syncpoint_id]++;
80 std::lock_guard lock{sync_mutex};
81 if (!syncpt_interrupts[syncpoint_id].empty()) {
82 u32 value = syncpoints[syncpoint_id].load();
83 auto it = syncpt_interrupts[syncpoint_id].begin();
84 while (it != syncpt_interrupts[syncpoint_id].end()) {
85 if (value >= *it) {
86 TriggerCpuInterrupt(syncpoint_id, *it);
87 it = syncpt_interrupts[syncpoint_id].erase(it);
88 continue;
89 }
90 it++;
91 }
92 }
93}
94
95u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
96 return syncpoints[syncpoint_id].load();
97}
98
99void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
100 auto& interrupt = syncpt_interrupts[syncpoint_id];
101 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
102 [value](u32 in_value) { return in_value == value; });
103 if (contains) {
104 return;
105 }
106 syncpt_interrupts[syncpoint_id].emplace_back(value);
107}
108
109bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
110 std::lock_guard lock{sync_mutex};
111 auto& interrupt = syncpt_interrupts[syncpoint_id];
112 const auto iter =
113 std::find_if(interrupt.begin(), interrupt.end(),
114 [value](u32 interrupt_value) { return value == interrupt_value; });
115
116 if (iter == interrupt.end()) {
117 return false;
118 }
119 interrupt.erase(iter);
120 return true;
121}
122
123void GPU::FlushCommands() {
124 renderer.Rasterizer().FlushCommands();
125}
126
77u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { 127u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
78 ASSERT(format != RenderTargetFormat::NONE); 128 ASSERT(format != RenderTargetFormat::NONE);
79 129
@@ -151,12 +201,12 @@ enum class BufferMethods {
151 NotifyIntr = 0x8, 201 NotifyIntr = 0x8,
152 WrcacheFlush = 0x9, 202 WrcacheFlush = 0x9,
153 Unk28 = 0xA, 203 Unk28 = 0xA,
154 Unk2c = 0xB, 204 UnkCacheFlush = 0xB,
155 RefCnt = 0x14, 205 RefCnt = 0x14,
156 SemaphoreAcquire = 0x1A, 206 SemaphoreAcquire = 0x1A,
157 SemaphoreRelease = 0x1B, 207 SemaphoreRelease = 0x1B,
158 Unk70 = 0x1C, 208 FenceValue = 0x1C,
159 Unk74 = 0x1D, 209 FenceAction = 0x1D,
160 Unk78 = 0x1E, 210 Unk78 = 0x1E,
161 Unk7c = 0x1F, 211 Unk7c = 0x1F,
162 Yield = 0x20, 212 Yield = 0x20,
@@ -202,6 +252,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
202 case BufferMethods::SemaphoreAddressLow: 252 case BufferMethods::SemaphoreAddressLow:
203 case BufferMethods::SemaphoreSequence: 253 case BufferMethods::SemaphoreSequence:
204 case BufferMethods::RefCnt: 254 case BufferMethods::RefCnt:
255 case BufferMethods::UnkCacheFlush:
256 case BufferMethods::WrcacheFlush:
257 case BufferMethods::FenceValue:
258 case BufferMethods::FenceAction:
205 break; 259 break;
206 case BufferMethods::SemaphoreTrigger: { 260 case BufferMethods::SemaphoreTrigger: {
207 ProcessSemaphoreTriggerMethod(); 261 ProcessSemaphoreTriggerMethod();
@@ -212,21 +266,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
212 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); 266 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
213 break; 267 break;
214 } 268 }
215 case BufferMethods::WrcacheFlush: {
216 // TODO(Kmather73): Research and implement this method.
217 LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
218 break;
219 }
220 case BufferMethods::Unk28: { 269 case BufferMethods::Unk28: {
221 // TODO(Kmather73): Research and implement this method. 270 // TODO(Kmather73): Research and implement this method.
222 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); 271 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
223 break; 272 break;
224 } 273 }
225 case BufferMethods::Unk2c: {
226 // TODO(Kmather73): Research and implement this method.
227 LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
228 break;
229 }
230 case BufferMethods::SemaphoreAcquire: { 274 case BufferMethods::SemaphoreAcquire: {
231 ProcessSemaphoreAcquire(); 275 ProcessSemaphoreAcquire();
232 break; 276 break;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 0ace0ff4f..544340ecd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -5,8 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <list>
8#include <memory> 10#include <memory>
11#include <mutex>
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/hle/service/nvdrv/nvdata.h"
10#include "core/hle/service/nvflinger/buffer_queue.h" 14#include "core/hle/service/nvflinger/buffer_queue.h"
11#include "video_core/dma_pusher.h" 15#include "video_core/dma_pusher.h"
12 16
@@ -15,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) {
15 return reinterpret_cast<CacheAddr>(host_ptr); 19 return reinterpret_cast<CacheAddr>(host_ptr);
16} 20}
17 21
22inline u8* FromCacheAddr(CacheAddr cache_addr) {
23 return reinterpret_cast<u8*>(cache_addr);
24}
25
18namespace Core { 26namespace Core {
19class System; 27class System;
20} 28}
@@ -127,7 +135,7 @@ class MemoryManager;
127 135
128class GPU { 136class GPU {
129public: 137public:
130 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); 138 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async);
131 139
132 virtual ~GPU(); 140 virtual ~GPU();
133 141
@@ -149,6 +157,8 @@ public:
149 /// Calls a GPU method. 157 /// Calls a GPU method.
150 void CallMethod(const MethodCall& method_call); 158 void CallMethod(const MethodCall& method_call);
151 159
160 void FlushCommands();
161
152 /// Returns a reference to the Maxwell3D GPU engine. 162 /// Returns a reference to the Maxwell3D GPU engine.
153 Engines::Maxwell3D& Maxwell3D(); 163 Engines::Maxwell3D& Maxwell3D();
154 164
@@ -170,6 +180,22 @@ public:
170 /// Returns a reference to the GPU DMA pusher. 180 /// Returns a reference to the GPU DMA pusher.
171 Tegra::DmaPusher& DmaPusher(); 181 Tegra::DmaPusher& DmaPusher();
172 182
183 void IncrementSyncPoint(u32 syncpoint_id);
184
185 u32 GetSyncpointValue(u32 syncpoint_id) const;
186
187 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
188
189 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
190
191 std::unique_lock<std::mutex> LockSync() {
192 return std::unique_lock{sync_mutex};
193 }
194
195 bool IsAsync() const {
196 return is_async;
197 }
198
173 /// Returns a const reference to the GPU DMA pusher. 199 /// Returns a const reference to the GPU DMA pusher.
174 const Tegra::DmaPusher& DmaPusher() const; 200 const Tegra::DmaPusher& DmaPusher() const;
175 201
@@ -200,7 +226,12 @@ public:
200 226
201 u32 semaphore_acquire; 227 u32 semaphore_acquire;
202 u32 semaphore_release; 228 u32 semaphore_release;
203 INSERT_PADDING_WORDS(0xE4); 229 u32 fence_value;
230 union {
231 BitField<4, 4, u32> operation;
232 BitField<8, 8, u32> id;
233 } fence_action;
234 INSERT_PADDING_WORDS(0xE2);
204 235
205 // Puller state 236 // Puller state
206 u32 acquire_mode; 237 u32 acquire_mode;
@@ -234,6 +265,9 @@ public:
234 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 265 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
235 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 266 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
236 267
268protected:
269 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
270
237private: 271private:
238 void ProcessBindMethod(const MethodCall& method_call); 272 void ProcessBindMethod(const MethodCall& method_call);
239 void ProcessSemaphoreTriggerMethod(); 273 void ProcessSemaphoreTriggerMethod();
@@ -251,6 +285,7 @@ private:
251 285
252protected: 286protected:
253 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 287 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
288 Core::System& system;
254 VideoCore::RendererBase& renderer; 289 VideoCore::RendererBase& renderer;
255 290
256private: 291private:
@@ -268,6 +303,14 @@ private:
268 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 303 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
269 /// Inline memory engine 304 /// Inline memory engine
270 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 305 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
306
307 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
308
309 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
310
311 std::mutex sync_mutex;
312
313 const bool is_async;
271}; 314};
272 315
273#define ASSERT_REG_POSITION(field_name, position) \ 316#define ASSERT_REG_POSITION(field_name, position) \
@@ -280,6 +323,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
280ASSERT_REG_POSITION(reference_count, 0x14); 323ASSERT_REG_POSITION(reference_count, 0x14);
281ASSERT_REG_POSITION(semaphore_acquire, 0x1A); 324ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
282ASSERT_REG_POSITION(semaphore_release, 0x1B); 325ASSERT_REG_POSITION(semaphore_release, 0x1B);
326ASSERT_REG_POSITION(fence_value, 0x1C);
327ASSERT_REG_POSITION(fence_action, 0x1D);
283 328
284ASSERT_REG_POSITION(acquire_mode, 0x100); 329ASSERT_REG_POSITION(acquire_mode, 0x100);
285ASSERT_REG_POSITION(acquire_source, 0x101); 330ASSERT_REG_POSITION(acquire_source, 0x101);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index d4e2553a9..ea67be831 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
6#include "core/hardware_interrupt_manager.h"
5#include "video_core/gpu_asynch.h" 7#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h" 8#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h" 9#include "video_core/renderer_base.h"
@@ -9,7 +11,7 @@
9namespace VideoCommon { 11namespace VideoCommon {
10 12
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) 13GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : GPU(system, renderer), gpu_thread{system} {} 14 : GPU(system, renderer, true), gpu_thread{system} {}
13 15
14GPUAsynch::~GPUAsynch() = default; 16GPUAsynch::~GPUAsynch() = default;
15 17
@@ -38,4 +40,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
38 gpu_thread.FlushAndInvalidateRegion(addr, size); 40 gpu_thread.FlushAndInvalidateRegion(addr, size);
39} 41}
40 42
43void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
44 auto& interrupt_manager = system.InterruptManager();
45 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
46}
47
41} // namespace VideoCommon 48} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 30be74cba..36377d677 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -27,6 +27,9 @@ public:
27 void InvalidateRegion(CacheAddr addr, u64 size) override; 27 void InvalidateRegion(CacheAddr addr, u64 size) override;
28 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 28 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
29 29
30protected:
31 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
32
30private: 33private:
31 GPUThread::ThreadManager gpu_thread; 34 GPUThread::ThreadManager gpu_thread;
32}; 35};
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 45e43b1dc..d4ead9c47 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -8,7 +8,7 @@
8namespace VideoCommon { 8namespace VideoCommon {
9 9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) 10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : GPU(system, renderer) {} 11 : GPU(system, renderer, false) {}
12 12
13GPUSynch::~GPUSynch() = default; 13GPUSynch::~GPUSynch() = default;
14 14
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 3031fcf72..07bcc47f1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -25,6 +25,10 @@ public:
25 void FlushRegion(CacheAddr addr, u64 size) override; 25 void FlushRegion(CacheAddr addr, u64 size) override;
26 void InvalidateRegion(CacheAddr addr, u64 size) override; 26 void InvalidateRegion(CacheAddr addr, u64 size) override;
27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
28
29protected:
30 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
31 [[maybe_unused]] u32 value) const override {}
28}; 32};
29 33
30} // namespace VideoCommon 34} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3f0939ec9..b441e92b0 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
21 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
22 22
23 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
24 state.WaitForCommands(); 24 while (state.queue.Empty())
25 ;
25 26
26 // If emulation was stopped during disk shader loading, abort before trying to acquire context 27 // If emulation was stopped during disk shader loading, abort before trying to acquire context
27 if (!state.is_running) { 28 if (!state.is_running) {
@@ -32,7 +33,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
32 33
33 CommandDataContainer next; 34 CommandDataContainer next;
34 while (state.is_running) { 35 while (state.is_running) {
35 state.WaitForCommands();
36 while (!state.queue.Empty()) { 36 while (!state.queue.Empty()) {
37 state.queue.Pop(next); 37 state.queue.Pop(next);
38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
@@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
49 } else { 49 } else {
50 UNREACHABLE(); 50 UNREACHABLE();
51 } 51 }
52 state.signaled_fence = next.fence; 52 state.signaled_fence.store(next.fence);
53 state.TrySynchronize();
54 } 53 }
55 } 54 }
56} 55}
@@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
89} 88}
90 89
91void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 90void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
92 if (state.queue.Empty()) { 91 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
93 // It's quicker to invalidate a single region on the CPU if the queue is already empty
94 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
95 } else {
96 PushCommand(InvalidateRegionCommand(addr, size));
97 }
98} 92}
99 93
100void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 94void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
105u64 ThreadManager::PushCommand(CommandData&& command_data) { 99u64 ThreadManager::PushCommand(CommandData&& command_data) {
106 const u64 fence{++state.last_fence}; 100 const u64 fence{++state.last_fence};
107 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 101 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
108 state.SignalCommands();
109 return fence; 102 return fence;
110} 103}
111 104
112MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
113void SynchState::WaitForSynchronization(u64 fence) { 106void SynchState::WaitForSynchronization(u64 fence) {
114 if (signaled_fence >= fence) { 107 while (signaled_fence.load() < fence)
115 return; 108 ;
116 }
117
118 // Wait for the GPU to be idle (all commands to be executed)
119 {
120 MICROPROFILE_SCOPE(GPU_wait);
121 std::unique_lock lock{synchronization_mutex};
122 synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
123 }
124} 109}
125 110
126} // namespace VideoCommon::GPUThread 111} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 05a168a72..1d9d0c39e 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -88,41 +88,9 @@ struct CommandDataContainer {
88/// Struct used to synchronize the GPU thread 88/// Struct used to synchronize the GPU thread
89struct SynchState final { 89struct SynchState final {
90 std::atomic_bool is_running{true}; 90 std::atomic_bool is_running{true};
91 std::atomic_int queued_frame_count{};
92 std::mutex synchronization_mutex;
93 std::mutex commands_mutex;
94 std::condition_variable commands_condition;
95 std::condition_variable synchronization_condition;
96
97 /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
98 /// synchronized. This is entirely empirical.
99 bool IsSynchronized() const {
100 constexpr std::size_t max_queue_gap{5};
101 return queue.Size() <= max_queue_gap;
102 }
103
104 void TrySynchronize() {
105 if (IsSynchronized()) {
106 std::lock_guard lock{synchronization_mutex};
107 synchronization_condition.notify_one();
108 }
109 }
110 91
111 void WaitForSynchronization(u64 fence); 92 void WaitForSynchronization(u64 fence);
112 93
113 void SignalCommands() {
114 if (queue.Empty()) {
115 return;
116 }
117
118 commands_condition.notify_one();
119 }
120
121 void WaitForCommands() {
122 std::unique_lock lock{commands_mutex};
123 commands_condition.wait(lock, [this] { return !queue.Empty(); });
124 }
125
126 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
127 CommandQueue queue; 95 CommandQueue queue;
128 u64 last_fence{}; 96 u64 last_fence{};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 9881df0d5..6b3f2d50a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -50,6 +50,9 @@ public:
50 /// and invalidated 50 /// and invalidated
51 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 51 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
52 52
53 /// Notify the rasterizer to send all written commands to the host GPU.
54 virtual void FlushCommands() = 0;
55
53 /// Notify rasterizer that a frame is about to finish 56 /// Notify rasterizer that a frame is about to finish
54 virtual void TickFrame() = 0; 57 virtual void TickFrame() = 0;
55 58
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2a9b523f5..f8a807c84 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,28 +7,41 @@
7#include <glad/glad.h> 7#include <glad/glad.h>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/rasterizer_interface.h"
10#include "video_core/renderer_opengl/gl_buffer_cache.h" 12#include "video_core/renderer_opengl/gl_buffer_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
13 15
14namespace OpenGL { 16namespace OpenGL {
15 17
18MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
19
20CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
21 : VideoCommon::BufferBlock{cache_addr, size} {
22 gl_buffer.Create();
23 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
24}
25
26CachedBufferBlock::~CachedBufferBlock() = default;
27
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 28OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
17 std::size_t stream_size) 29 std::size_t stream_size)
18 : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ 30 : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
19 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} 31 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
20 32
21OGLBufferCache::~OGLBufferCache() = default; 33OGLBufferCache::~OGLBufferCache() = default;
22 34
23OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { 35Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
24 OGLBuffer buffer; 36 return std::make_shared<CachedBufferBlock>(cache_addr, size);
25 buffer.Create(); 37}
26 glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 38
27 return buffer; 39void OGLBufferCache::WriteBarrier() {
40 glMemoryBarrier(GL_ALL_BARRIER_BITS);
28} 41}
29 42
30const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { 43const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
31 return &buffer.handle; 44 return buffer->GetHandle();
32} 45}
33 46
34const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { 47const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
@@ -36,23 +49,24 @@ const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
36 return &null_buffer; 49 return &null_buffer;
37} 50}
38 51
39void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, 52void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
40 const u8* data) { 53 const u8* data) {
41 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), 54 glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
42 static_cast<GLsizeiptr>(size), data); 55 static_cast<GLsizeiptr>(size), data);
43} 56}
44 57
45void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, 58void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
46 std::size_t size, u8* data) { 59 u8* data) {
47 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), 60 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
61 glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
48 static_cast<GLsizeiptr>(size), data); 62 static_cast<GLsizeiptr>(size), data);
49} 63}
50 64
51void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, 65void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
52 std::size_t src_offset, std::size_t dst_offset, 66 std::size_t dst_offset, std::size_t size) {
53 std::size_t size) { 67 glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
54 glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), 68 static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
55 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); 69 static_cast<GLsizeiptr>(size));
56} 70}
57 71
58} // namespace OpenGL 72} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c8ac4038..022e7bfa9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -7,7 +7,7 @@
7#include <memory> 7#include <memory>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -21,7 +21,24 @@ namespace OpenGL {
21class OGLStreamBuffer; 21class OGLStreamBuffer;
22class RasterizerOpenGL; 22class RasterizerOpenGL;
23 23
24class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { 24class CachedBufferBlock;
25
26using Buffer = std::shared_ptr<CachedBufferBlock>;
27
28class CachedBufferBlock : public VideoCommon::BufferBlock {
29public:
30 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
31 ~CachedBufferBlock();
32
33 const GLuint* GetHandle() const {
34 return &gl_buffer.handle;
35 }
36
37private:
38 OGLBuffer gl_buffer{};
39};
40
41class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
25public: 42public:
26 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 43 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
27 std::size_t stream_size); 44 std::size_t stream_size);
@@ -30,18 +47,20 @@ public:
30 const GLuint* GetEmptyBuffer(std::size_t) override; 47 const GLuint* GetEmptyBuffer(std::size_t) override;
31 48
32protected: 49protected:
33 OGLBuffer CreateBuffer(std::size_t size) override; 50 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
51
52 void WriteBarrier() override;
34 53
35 const GLuint* ToHandle(const OGLBuffer& buffer) override; 54 const GLuint* ToHandle(const Buffer& buffer) override;
36 55
37 void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, 56 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
38 const u8* data) override; 57 const u8* data) override;
39 58
40 void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, 59 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
41 u8* data) override; 60 u8* data) override;
42 61
43 void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, 62 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
44 std::size_t dst_offset, std::size_t size) override; 63 std::size_t dst_offset, std::size_t size) override;
45}; 64};
46 65
47} // namespace OpenGL 66} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 85424a4c9..03d434b28 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -27,6 +27,8 @@ Device::Device() {
27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
30 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
31 GLAD_GL_NV_shader_thread_shuffle;
30 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 32 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
31 has_variable_aoffi = TestVariableAoffi(); 33 has_variable_aoffi = TestVariableAoffi();
32 has_component_indexing_bug = TestComponentIndexingBug(); 34 has_component_indexing_bug = TestComponentIndexingBug();
@@ -36,6 +38,7 @@ Device::Device(std::nullptr_t) {
36 uniform_buffer_alignment = 0; 38 uniform_buffer_alignment = 0;
37 max_vertex_attributes = 16; 39 max_vertex_attributes = 16;
38 max_varyings = 15; 40 max_varyings = 15;
41 has_warp_intrinsics = true;
39 has_vertex_viewport_layer = true; 42 has_vertex_viewport_layer = true;
40 has_variable_aoffi = true; 43 has_variable_aoffi = true;
41 has_component_indexing_bug = false; 44 has_component_indexing_bug = false;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index dc883722d..3ef7c6dd8 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,6 +30,10 @@ public:
30 return max_varyings; 30 return max_varyings;
31 } 31 }
32 32
33 bool HasWarpIntrinsics() const {
34 return has_warp_intrinsics;
35 }
36
33 bool HasVertexViewportLayer() const { 37 bool HasVertexViewportLayer() const {
34 return has_vertex_viewport_layer; 38 return has_vertex_viewport_layer;
35 } 39 }
@@ -50,6 +54,7 @@ private:
50 std::size_t shader_storage_alignment{}; 54 std::size_t shader_storage_alignment{};
51 u32 max_vertex_attributes{}; 55 u32 max_vertex_attributes{};
52 u32 max_varyings{}; 56 u32 max_varyings{};
57 bool has_warp_intrinsics{};
53 bool has_vertex_viewport_layer{}; 58 bool has_vertex_viewport_layer{};
54 bool has_variable_aoffi{}; 59 bool has_variable_aoffi{};
55 bool has_component_indexing_bug{}; 60 bool has_component_indexing_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c28ae795c..bb09ecd52 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -708,8 +708,6 @@ void RasterizerOpenGL::DrawArrays() {
708 return; 708 return;
709 } 709 }
710 710
711 const auto& regs = gpu.regs;
712
713 SyncColorMask(); 711 SyncColorMask();
714 SyncFragmentColorClampState(); 712 SyncFragmentColorClampState();
715 SyncMultiSampleState(); 713 SyncMultiSampleState();
@@ -863,6 +861,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
863 InvalidateRegion(addr, size); 861 InvalidateRegion(addr, size);
864} 862}
865 863
864void RasterizerOpenGL::FlushCommands() {
865 glFlush();
866}
867
866void RasterizerOpenGL::TickFrame() { 868void RasterizerOpenGL::TickFrame() {
867 buffer_cache.TickFrame(); 869 buffer_cache.TickFrame();
868} 870}
@@ -976,7 +978,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr
976 GPUVAddr gpu_addr, std::size_t size) { 978 GPUVAddr gpu_addr, std::size_t size) {
977 const auto alignment{device.GetShaderStorageBufferAlignment()}; 979 const auto alignment{device.GetShaderStorageBufferAlignment()};
978 const auto [ssbo, buffer_offset] = 980 const auto [ssbo, buffer_offset] =
979 buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); 981 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
980 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); 982 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
981} 983}
982 984
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 8b123c48d..9d20a4fbf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -63,6 +63,7 @@ public:
63 void FlushRegion(CacheAddr addr, u64 size) override; 63 void FlushRegion(CacheAddr addr, u64 size) override;
64 void InvalidateRegion(CacheAddr addr, u64 size) override; 64 void InvalidateRegion(CacheAddr addr, u64 size) override;
65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
66 void FlushCommands() override;
66 void TickFrame() override; 67 void TickFrame() override;
67 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 68 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
68 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 69 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1c90facc3..cf6a5cddf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -212,7 +212,9 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
212 const auto texture_buffer_usage{variant.texture_buffer_usage}; 212 const auto texture_buffer_usage{variant.texture_buffer_usage};
213 213
214 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
215 "#extension GL_ARB_separate_shader_objects : enable\n"; 215 "#extension GL_ARB_separate_shader_objects : enable\n"
216 "#extension GL_NV_gpu_shader5 : enable\n"
217 "#extension GL_NV_shader_thread_group : enable\n";
216 if (entries.shader_viewport_layer_array) { 218 if (entries.shader_viewport_layer_array) {
217 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; 219 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
218 } 220 }
@@ -247,20 +249,24 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
247 if (!texture_buffer_usage.test(i)) { 249 if (!texture_buffer_usage.test(i)) {
248 continue; 250 continue;
249 } 251 }
250 source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); 252 source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
253 }
254 if (texture_buffer_usage.any()) {
255 source += '\n';
251 } 256 }
252 257
253 if (program_type == ProgramType::Geometry) { 258 if (program_type == ProgramType::Geometry) {
254 const auto [glsl_topology, debug_name, max_vertices] = 259 const auto [glsl_topology, debug_name, max_vertices] =
255 GetPrimitiveDescription(primitive_mode); 260 GetPrimitiveDescription(primitive_mode);
256 261
257 source += "layout (" + std::string(glsl_topology) + ") in;\n"; 262 source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
258 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 263 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
259 } 264 }
260 if (program_type == ProgramType::Compute) { 265 if (program_type == ProgramType::Compute) {
261 source += "layout (local_size_variable) in;\n"; 266 source += "layout (local_size_variable) in;\n";
262 } 267 }
263 268
269 source += '\n';
264 source += code; 270 source += code;
265 271
266 OGLShader shader; 272 OGLShader shader;
@@ -289,7 +295,7 @@ std::set<GLenum> GetSupportedFormats() {
289 295
290CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, 296CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
291 GLShader::ProgramResult result) 297 GLShader::ProgramResult result)
292 : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, 298 : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
293 unique_identifier{params.unique_identifier}, program_type{program_type}, 299 unique_identifier{params.unique_identifier}, program_type{program_type},
294 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, 300 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
295 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} 301 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a3106a0ff..2c8faf855 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -106,7 +106,6 @@ private:
106 106
107 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; 107 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
108 108
109 u8* host_ptr{};
110 VAddr cpu_addr{}; 109 VAddr cpu_addr{};
111 u64 unique_identifier{}; 110 u64 unique_identifier{};
112 ProgramType program_type{}; 111 ProgramType program_type{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ffe26b241..359d58cbe 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -565,7 +565,7 @@ private:
565 case Tegra::Shader::ImageType::Texture1D: 565 case Tegra::Shader::ImageType::Texture1D:
566 return "image1D"; 566 return "image1D";
567 case Tegra::Shader::ImageType::TextureBuffer: 567 case Tegra::Shader::ImageType::TextureBuffer:
568 return "bufferImage"; 568 return "imageBuffer";
569 case Tegra::Shader::ImageType::Texture1DArray: 569 case Tegra::Shader::ImageType::Texture1DArray:
570 return "image1DArray"; 570 return "image1DArray";
571 case Tegra::Shader::ImageType::Texture2D: 571 case Tegra::Shader::ImageType::Texture2D:
@@ -1136,6 +1136,16 @@ private:
1136 Type::Float); 1136 Type::Float);
1137 } 1137 }
1138 1138
1139 std::string FCastHalf0(Operation operation) {
1140 const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
1141 return fmt::format("({})[0]", op_a);
1142 }
1143
1144 std::string FCastHalf1(Operation operation) {
1145 const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
1146 return fmt::format("({})[1]", op_a);
1147 }
1148
1139 template <Type type> 1149 template <Type type>
1140 std::string Min(Operation operation) { 1150 std::string Min(Operation operation) {
1141 return GenerateBinaryCall(operation, "min", type, type, type); 1151 return GenerateBinaryCall(operation, "min", type, type, type);
@@ -1292,6 +1302,11 @@ private:
1292 return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); 1302 return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
1293 } 1303 }
1294 1304
1305 std::string HCastFloat(Operation operation) {
1306 const std::string op_a = VisitOperand(operation, 0, Type::Float);
1307 return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
1308 }
1309
1295 std::string HUnpack(Operation operation) { 1310 std::string HUnpack(Operation operation) {
1296 const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; 1311 const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
1297 const auto value = [&]() -> std::string { 1312 const auto value = [&]() -> std::string {
@@ -1720,6 +1735,48 @@ private:
1720 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1735 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1721 } 1736 }
1722 1737
1738 std::string BallotThread(Operation operation) {
1739 const std::string value = VisitOperand(operation, 0, Type::Bool);
1740 if (!device.HasWarpIntrinsics()) {
1741 LOG_ERROR(Render_OpenGL,
1742 "Nvidia warp intrinsics are not available and its required by a shader");
1743 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
1744 // one.
1745 return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value);
1746 }
1747 return fmt::format("utof(ballotThreadNV({}))", value);
1748 }
1749
1750 std::string Vote(Operation operation, const char* func) {
1751 const std::string value = VisitOperand(operation, 0, Type::Bool);
1752 if (!device.HasWarpIntrinsics()) {
1753 LOG_ERROR(Render_OpenGL,
1754 "Nvidia vote intrinsics are not available and its required by a shader");
1755 // Stub with a warp size of one.
1756 return value;
1757 }
1758 return fmt::format("{}({})", func, value);
1759 }
1760
1761 std::string VoteAll(Operation operation) {
1762 return Vote(operation, "allThreadsNV");
1763 }
1764
1765 std::string VoteAny(Operation operation) {
1766 return Vote(operation, "anyThreadNV");
1767 }
1768
1769 std::string VoteEqual(Operation operation) {
1770 if (!device.HasWarpIntrinsics()) {
1771 LOG_ERROR(Render_OpenGL,
1772 "Nvidia vote intrinsics are not available and its required by a shader");
1773 // We must return true here since a stub for a theoretical warp size of 1 will always
1774 // return an equal result for all its votes.
1775 return "true";
1776 }
1777 return Vote(operation, "allThreadsEqualNV");
1778 }
1779
1723 static constexpr std::array operation_decompilers = { 1780 static constexpr std::array operation_decompilers = {
1724 &GLSLDecompiler::Assign, 1781 &GLSLDecompiler::Assign,
1725 1782
@@ -1732,6 +1789,8 @@ private:
1732 &GLSLDecompiler::Negate<Type::Float>, 1789 &GLSLDecompiler::Negate<Type::Float>,
1733 &GLSLDecompiler::Absolute<Type::Float>, 1790 &GLSLDecompiler::Absolute<Type::Float>,
1734 &GLSLDecompiler::FClamp, 1791 &GLSLDecompiler::FClamp,
1792 &GLSLDecompiler::FCastHalf0,
1793 &GLSLDecompiler::FCastHalf1,
1735 &GLSLDecompiler::Min<Type::Float>, 1794 &GLSLDecompiler::Min<Type::Float>,
1736 &GLSLDecompiler::Max<Type::Float>, 1795 &GLSLDecompiler::Max<Type::Float>,
1737 &GLSLDecompiler::FCos, 1796 &GLSLDecompiler::FCos,
@@ -1792,6 +1851,7 @@ private:
1792 &GLSLDecompiler::Absolute<Type::HalfFloat>, 1851 &GLSLDecompiler::Absolute<Type::HalfFloat>,
1793 &GLSLDecompiler::HNegate, 1852 &GLSLDecompiler::HNegate,
1794 &GLSLDecompiler::HClamp, 1853 &GLSLDecompiler::HClamp,
1854 &GLSLDecompiler::HCastFloat,
1795 &GLSLDecompiler::HUnpack, 1855 &GLSLDecompiler::HUnpack,
1796 &GLSLDecompiler::HMergeF32, 1856 &GLSLDecompiler::HMergeF32,
1797 &GLSLDecompiler::HMergeH0, 1857 &GLSLDecompiler::HMergeH0,
@@ -1867,6 +1927,11 @@ private:
1867 &GLSLDecompiler::WorkGroupId<0>, 1927 &GLSLDecompiler::WorkGroupId<0>,
1868 &GLSLDecompiler::WorkGroupId<1>, 1928 &GLSLDecompiler::WorkGroupId<1>,
1869 &GLSLDecompiler::WorkGroupId<2>, 1929 &GLSLDecompiler::WorkGroupId<2>,
1930
1931 &GLSLDecompiler::BallotThread,
1932 &GLSLDecompiler::VoteAll,
1933 &GLSLDecompiler::VoteAny,
1934 &GLSLDecompiler::VoteEqual,
1870 }; 1935 };
1871 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1936 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1872 1937
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 8fcd39a69..4f135fe03 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -137,7 +137,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
137const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 137const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
138 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 138 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
139 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; 139 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
140 ASSERT(component_type == format.component_type);
141 return format; 140 return format;
142} 141}
143 142
@@ -185,6 +184,9 @@ GLint GetSwizzleSource(SwizzleSource source) {
185} 184}
186 185
187void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { 186void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
187 if (params.IsBuffer()) {
188 return;
189 }
188 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 190 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
189 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 191 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
190 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 192 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
@@ -209,6 +211,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
209 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), 211 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
210 nullptr, GL_DYNAMIC_STORAGE_BIT); 212 nullptr, GL_DYNAMIC_STORAGE_BIT);
211 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); 213 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
214 break;
212 case SurfaceTarget::Texture2D: 215 case SurfaceTarget::Texture2D:
213 case SurfaceTarget::TextureCubemap: 216 case SurfaceTarget::TextureCubemap:
214 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, 217 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index ff6ab6988..21324488a 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -51,7 +51,7 @@ public:
51 } 51 }
52 52
53protected: 53protected:
54 void DecorateSurfaceName(); 54 void DecorateSurfaceName() override;
55 55
56 View CreateView(const ViewParams& view_key) override; 56 View CreateView(const ViewParams& view_key) override;
57 View CreateViewInner(const ViewParams& view_key, bool is_proxy); 57 View CreateViewInner(const ViewParams& view_key, bool is_proxy);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index d267712c9..a35b45c9c 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -735,6 +735,16 @@ private:
735 return {}; 735 return {};
736 } 736 }
737 737
738 Id FCastHalf0(Operation operation) {
739 UNIMPLEMENTED();
740 return {};
741 }
742
743 Id FCastHalf1(Operation operation) {
744 UNIMPLEMENTED();
745 return {};
746 }
747
738 Id HNegate(Operation operation) { 748 Id HNegate(Operation operation) {
739 UNIMPLEMENTED(); 749 UNIMPLEMENTED();
740 return {}; 750 return {};
@@ -745,6 +755,11 @@ private:
745 return {}; 755 return {};
746 } 756 }
747 757
758 Id HCastFloat(Operation operation) {
759 UNIMPLEMENTED();
760 return {};
761 }
762
748 Id HUnpack(Operation operation) { 763 Id HUnpack(Operation operation) {
749 UNIMPLEMENTED(); 764 UNIMPLEMENTED();
750 return {}; 765 return {};
@@ -1057,6 +1072,26 @@ private:
1057 return {}; 1072 return {};
1058 } 1073 }
1059 1074
1075 Id BallotThread(Operation) {
1076 UNIMPLEMENTED();
1077 return {};
1078 }
1079
1080 Id VoteAll(Operation) {
1081 UNIMPLEMENTED();
1082 return {};
1083 }
1084
1085 Id VoteAny(Operation) {
1086 UNIMPLEMENTED();
1087 return {};
1088 }
1089
1090 Id VoteEqual(Operation) {
1091 UNIMPLEMENTED();
1092 return {};
1093 }
1094
1060 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, 1095 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1061 const std::string& name) { 1096 const std::string& name) {
1062 const Id id = OpVariable(type, storage); 1097 const Id id = OpVariable(type, storage);
@@ -1210,6 +1245,8 @@ private:
1210 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, 1245 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
1211 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, 1246 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
1212 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, 1247 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
1248 &SPIRVDecompiler::FCastHalf0,
1249 &SPIRVDecompiler::FCastHalf1,
1213 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, 1250 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
1214 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, 1251 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
1215 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, 1252 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
@@ -1270,6 +1307,7 @@ private:
1270 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, 1307 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
1271 &SPIRVDecompiler::HNegate, 1308 &SPIRVDecompiler::HNegate,
1272 &SPIRVDecompiler::HClamp, 1309 &SPIRVDecompiler::HClamp,
1310 &SPIRVDecompiler::HCastFloat,
1273 &SPIRVDecompiler::HUnpack, 1311 &SPIRVDecompiler::HUnpack,
1274 &SPIRVDecompiler::HMergeF32, 1312 &SPIRVDecompiler::HMergeF32,
1275 &SPIRVDecompiler::HMergeH0, 1313 &SPIRVDecompiler::HMergeH0,
@@ -1346,6 +1384,11 @@ private:
1346 &SPIRVDecompiler::WorkGroupId<0>, 1384 &SPIRVDecompiler::WorkGroupId<0>,
1347 &SPIRVDecompiler::WorkGroupId<1>, 1385 &SPIRVDecompiler::WorkGroupId<1>,
1348 &SPIRVDecompiler::WorkGroupId<2>, 1386 &SPIRVDecompiler::WorkGroupId<2>,
1387
1388 &SPIRVDecompiler::BallotThread,
1389 &SPIRVDecompiler::VoteAll,
1390 &SPIRVDecompiler::VoteAny,
1391 &SPIRVDecompiler::VoteEqual,
1349 }; 1392 };
1350 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1393 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1351 1394
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index fdcc970ff..ec3a76690 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -15,7 +15,7 @@
15#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
16 16
17namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
18 18namespace {
19using Tegra::Shader::Instruction; 19using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode; 20using Tegra::Shader::OpCode;
21 21
@@ -29,8 +29,7 @@ struct Query {
29 29
30struct BlockStack { 30struct BlockStack {
31 BlockStack() = default; 31 BlockStack() = default;
32 BlockStack(const BlockStack& b) = default; 32 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
33 BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
34 std::stack<u32> ssy_stack{}; 33 std::stack<u32> ssy_stack{};
35 std::stack<u32> pbk_stack{}; 34 std::stack<u32> pbk_stack{};
36}; 35};
@@ -58,7 +57,7 @@ struct BlockInfo {
58struct CFGRebuildState { 57struct CFGRebuildState {
59 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, 58 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
60 const u32 start) 59 const u32 start)
61 : program_code{program_code}, program_size{program_size}, start{start} {} 60 : start{start}, program_code{program_code}, program_size{program_size} {}
62 61
63 u32 start{}; 62 u32 start{};
64 std::vector<BlockInfo> block_info{}; 63 std::vector<BlockInfo> block_info{};
@@ -85,7 +84,7 @@ std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address)
85 return {BlockCollision::Inside, index}; 84 return {BlockCollision::Inside, index};
86 } 85 }
87 } 86 }
88 return {BlockCollision::None, -1}; 87 return {BlockCollision::None, 0xFFFFFFFF};
89} 88}
90 89
91struct ParseInfo { 90struct ParseInfo {
@@ -365,27 +364,29 @@ bool TryQuery(CFGRebuildState& state) {
365 const auto gather_end = labels.upper_bound(block.end); 364 const auto gather_end = labels.upper_bound(block.end);
366 while (gather_start != gather_end) { 365 while (gather_start != gather_end) {
367 cc.push(gather_start->second); 366 cc.push(gather_start->second);
368 gather_start++; 367 ++gather_start;
369 } 368 }
370 }; 369 };
371 if (state.queries.empty()) { 370 if (state.queries.empty()) {
372 return false; 371 return false;
373 } 372 }
373
374 Query& q = state.queries.front(); 374 Query& q = state.queries.front();
375 const u32 block_index = state.registered[q.address]; 375 const u32 block_index = state.registered[q.address];
376 BlockInfo& block = state.block_info[block_index]; 376 BlockInfo& block = state.block_info[block_index];
377 // If the block is visted, check if the stacks match, else gather the ssy/pbk 377 // If the block is visited, check if the stacks match, else gather the ssy/pbk
378 // labels into the current stack and look if the branch at the end of the block 378 // labels into the current stack and look if the branch at the end of the block
379 // consumes a label. Schedule new queries accordingly 379 // consumes a label. Schedule new queries accordingly
380 if (block.visited) { 380 if (block.visited) {
381 BlockStack& stack = state.stacks[q.address]; 381 BlockStack& stack = state.stacks[q.address];
382 const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && 382 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
383 (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); 383 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
384 state.queries.pop_front(); 384 state.queries.pop_front();
385 return all_okay; 385 return all_okay;
386 } 386 }
387 block.visited = true; 387 block.visited = true;
388 state.stacks[q.address] = BlockStack{q}; 388 state.stacks.insert_or_assign(q.address, BlockStack{q});
389
389 Query q2(q); 390 Query q2(q);
390 state.queries.pop_front(); 391 state.queries.pop_front();
391 gather_labels(q2.ssy_stack, state.ssy_labels, block); 392 gather_labels(q2.ssy_stack, state.ssy_labels, block);
@@ -394,6 +395,7 @@ bool TryQuery(CFGRebuildState& state) {
394 q2.address = block.end + 1; 395 q2.address = block.end + 1;
395 state.queries.push_back(q2); 396 state.queries.push_back(q2);
396 } 397 }
398
397 Query conditional_query{q2}; 399 Query conditional_query{q2};
398 if (block.branch.is_sync) { 400 if (block.branch.is_sync) {
399 if (block.branch.address == unassigned_branch) { 401 if (block.branch.address == unassigned_branch) {
@@ -408,13 +410,15 @@ bool TryQuery(CFGRebuildState& state) {
408 conditional_query.pbk_stack.pop(); 410 conditional_query.pbk_stack.pop();
409 } 411 }
410 conditional_query.address = block.branch.address; 412 conditional_query.address = block.branch.address;
411 state.queries.push_back(conditional_query); 413 state.queries.push_back(std::move(conditional_query));
412 return true; 414 return true;
413} 415}
416} // Anonymous namespace
414 417
415std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, 418std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
416 u32 start_address) { 419 std::size_t program_size, u32 start_address) {
417 CFGRebuildState state{program_code, program_size, start_address}; 420 CFGRebuildState state{program_code, program_size, start_address};
421
418 // Inspect Code and generate blocks 422 // Inspect Code and generate blocks
419 state.labels.clear(); 423 state.labels.clear();
420 state.labels.emplace(start_address); 424 state.labels.emplace(start_address);
@@ -424,10 +428,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
424 return {}; 428 return {};
425 } 429 }
426 } 430 }
431
427 // Decompile Stacks 432 // Decompile Stacks
428 Query start_query{}; 433 state.queries.push_back(Query{state.start, {}, {}});
429 start_query.address = state.start;
430 state.queries.push_back(start_query);
431 bool decompiled = true; 434 bool decompiled = true;
432 while (!state.queries.empty()) { 435 while (!state.queries.empty()) {
433 if (!TryQuery(state)) { 436 if (!TryQuery(state)) {
@@ -435,14 +438,15 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
435 break; 438 break;
436 } 439 }
437 } 440 }
441
438 // Sort and organize results 442 // Sort and organize results
439 std::sort(state.block_info.begin(), state.block_info.end(), 443 std::sort(state.block_info.begin(), state.block_info.end(),
440 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); 444 [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
441 ShaderCharacteristics result_out{}; 445 ShaderCharacteristics result_out{};
442 result_out.decompilable = decompiled; 446 result_out.decompilable = decompiled;
443 result_out.start = start_address; 447 result_out.start = start_address;
444 result_out.end = start_address; 448 result_out.end = start_address;
445 for (auto& block : state.block_info) { 449 for (const auto& block : state.block_info) {
446 ShaderBlock new_block{}; 450 ShaderBlock new_block{};
447 new_block.start = block.start; 451 new_block.start = block.start;
448 new_block.end = block.end; 452 new_block.end = block.end;
@@ -457,8 +461,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
457 } 461 }
458 if (result_out.decompilable) { 462 if (result_out.decompilable) {
459 result_out.labels = std::move(state.labels); 463 result_out.labels = std::move(state.labels);
460 return {result_out}; 464 return {std::move(result_out)};
461 } 465 }
466
462 // If it's not decompilable, merge the unlabelled blocks together 467 // If it's not decompilable, merge the unlabelled blocks together
463 auto back = result_out.blocks.begin(); 468 auto back = result_out.blocks.begin();
464 auto next = std::next(back); 469 auto next = std::next(back);
@@ -469,8 +474,8 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
469 continue; 474 continue;
470 } 475 }
471 back = next; 476 back = next;
472 next++; 477 ++next;
473 } 478 }
474 return {result_out}; 479 return {std::move(result_out)};
475} 480}
476} // namespace VideoCommon::Shader 481} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5e8ea3271..b0a5e4f8c 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstring>
8#include <list> 7#include <list>
9#include <optional> 8#include <optional>
10#include <unordered_set> 9#include <unordered_set>
@@ -26,27 +25,44 @@ struct Condition {
26 bool IsUnconditional() const { 25 bool IsUnconditional() const {
27 return predicate == Pred::UnusedIndex && cc == ConditionCode::T; 26 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
28 } 27 }
28
29 bool operator==(const Condition& other) const { 29 bool operator==(const Condition& other) const {
30 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); 30 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
31 } 31 }
32
33 bool operator!=(const Condition& other) const {
34 return !operator==(other);
35 }
32}; 36};
33 37
34struct ShaderBlock { 38struct ShaderBlock {
35 u32 start{};
36 u32 end{};
37 bool ignore_branch{};
38 struct Branch { 39 struct Branch {
39 Condition cond{}; 40 Condition cond{};
40 bool kills{}; 41 bool kills{};
41 s32 address{}; 42 s32 address{};
43
42 bool operator==(const Branch& b) const { 44 bool operator==(const Branch& b) const {
43 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); 45 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
44 } 46 }
45 } branch{}; 47
48 bool operator!=(const Branch& b) const {
49 return !operator==(b);
50 }
51 };
52
53 u32 start{};
54 u32 end{};
55 bool ignore_branch{};
56 Branch branch{};
57
46 bool operator==(const ShaderBlock& sb) const { 58 bool operator==(const ShaderBlock& sb) const {
47 return std::tie(start, end, ignore_branch, branch) == 59 return std::tie(start, end, ignore_branch, branch) ==
48 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); 60 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
49 } 61 }
62
63 bool operator!=(const ShaderBlock& sb) const {
64 return !operator==(sb);
65 }
50}; 66};
51 67
52struct ShaderCharacteristics { 68struct ShaderCharacteristics {
@@ -57,7 +73,7 @@ struct ShaderCharacteristics {
57 std::unordered_set<u32> labels{}; 73 std::unordered_set<u32> labels{};
58}; 74};
59 75
60std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, 76std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
61 u32 start_address); 77 std::size_t program_size, u32 start_address);
62 78
63} // namespace VideoCommon::Shader 79} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index afffd157f..47a9fd961 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -47,14 +47,14 @@ void ShaderIR::Decode() {
47 if (shader_info.decompilable) { 47 if (shader_info.decompilable) {
48 disable_flow_stack = true; 48 disable_flow_stack = true;
49 const auto insert_block = [this](NodeBlock& nodes, u32 label) { 49 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
50 if (label == exit_branch) { 50 if (label == static_cast<u32>(exit_branch)) {
51 return; 51 return;
52 } 52 }
53 basic_blocks.insert({label, nodes}); 53 basic_blocks.insert({label, nodes});
54 }; 54 };
55 const auto& blocks = shader_info.blocks; 55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block; 56 NodeBlock current_block;
57 u32 current_label = exit_branch; 57 u32 current_label = static_cast<u32>(exit_branch);
58 for (auto& block : blocks) { 58 for (auto& block : blocks) {
59 if (shader_info.labels.count(block.start) != 0) { 59 if (shader_info.labels.count(block.start) != 0) {
60 insert_block(current_block, current_label); 60 insert_block(current_block, current_label);
@@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
176 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, 176 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
177 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 177 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
178 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 178 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
179 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
179 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 180 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
180 {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, 181 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
181 {OpCode::Type::Image, &ShaderIR::DecodeImage}, 182 {OpCode::Type::Image, &ShaderIR::DecodeImage},
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 87d8fecaa..1473c282a 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
42 case OpCode::Id::FMUL_R: 42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: { 43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. 44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", 45 if (instr.fmul.tab5cb8_2 != 0) {
46 instr.fmul.tab5cb8_2.Value()); 46 LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 UNIMPLEMENTED_IF_MSG( 47 instr.fmul.tab5cb8_2.Value());
48 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", 48 }
49 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default 49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
50 53
51 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); 54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
52 55
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7bcf38f23..6466fc011 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); 23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
24 } 24 }
25 } else { 25 } else {
26 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); 26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
27 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
28 }
27 } 29 }
28 30
29 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); 31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 4221f0c58..8973fbefa 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
57 case OpCode::Id::I2F_R: 57 case OpCode::Id::I2F_R:
58 case OpCode::Id::I2F_C: 58 case OpCode::Id::I2F_C:
59 case OpCode::Id::I2F_IMM: { 59 case OpCode::Id::I2F_IMM: {
60 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 60 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
61 UNIMPLEMENTED_IF(instr.conversion.selector); 61 UNIMPLEMENTED_IF(instr.conversion.selector);
62 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 62 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
63 "Condition codes generation in I2F is not implemented"); 63 "Condition codes generation in I2F is not implemented");
@@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
82 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); 82 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
83 83
84 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 84 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
85
86 if (instr.conversion.dst_size == Register::Size::Short) {
87 value = Operation(OperationCode::HCastFloat, PRECISE, value);
88 }
89
85 SetRegister(bb, instr.gpr0, value); 90 SetRegister(bb, instr.gpr0, value);
86 break; 91 break;
87 } 92 }
88 case OpCode::Id::F2F_R: 93 case OpCode::Id::F2F_R:
89 case OpCode::Id::F2F_C: 94 case OpCode::Id::F2F_C:
90 case OpCode::Id::F2F_IMM: { 95 case OpCode::Id::F2F_IMM: {
91 UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); 96 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
92 UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); 97 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
93 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 98 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
94 "Condition codes generation in F2F is not implemented"); 99 "Condition codes generation in F2F is not implemented");
95 100
@@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
107 } 112 }
108 }(); 113 }();
109 114
115 if (instr.conversion.src_size == Register::Size::Short) {
116 // TODO: figure where extract is sey in the encoding
117 value = Operation(OperationCode::FCastHalf0, PRECISE, value);
118 }
119
110 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); 120 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
111 121
112 value = [&]() { 122 value = [&]() {
@@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
124 default: 134 default:
125 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", 135 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
126 static_cast<u32>(instr.conversion.f2f.rounding.Value())); 136 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
127 return Immediate(0); 137 return value;
128 } 138 }
129 }(); 139 }();
130 value = GetSaturatedFloat(value, instr.alu.saturate_d); 140 value = GetSaturatedFloat(value, instr.alu.saturate_d);
131 141
132 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 142 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
143
144 if (instr.conversion.dst_size == Register::Size::Short) {
145 value = Operation(OperationCode::HCastFloat, PRECISE, value);
146 }
147
133 SetRegister(bb, instr.gpr0, value); 148 SetRegister(bb, instr.gpr0, value);
134 break; 149 break;
135 } 150 }
136 case OpCode::Id::F2I_R: 151 case OpCode::Id::F2I_R:
137 case OpCode::Id::F2I_C: 152 case OpCode::Id::F2I_C:
138 case OpCode::Id::F2I_IMM: { 153 case OpCode::Id::F2I_IMM: {
139 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); 154 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
140 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 155 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
141 "Condition codes generation in F2I is not implemented"); 156 "Condition codes generation in F2I is not implemented");
142 Node value = [&]() { 157 Node value = [&]() {
@@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
153 } 168 }
154 }(); 169 }();
155 170
171 if (instr.conversion.src_size == Register::Size::Short) {
172 // TODO: figure where extract is sey in the encoding
173 value = Operation(OperationCode::FCastHalf0, PRECISE, value);
174 }
175
156 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); 176 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
157 177
158 value = [&]() { 178 value = [&]() {
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 29be25ca3..ca2f39e8d 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); 20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", 21 if (instr.ffma.tab5980_0 != 1) {
22 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO 22 LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", 23 }
24 instr.ffma.tab5980_1.Value()); 24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
25 27
26 const Node op_a = GetRegister(instr.gpr8); 28 const Node op_a = GetRegister(instr.gpr8);
27 29
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index f5013e44a..5614e8a0d 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
15 15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { 16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19 18
20 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, 19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
21 instr.fset.neg_a != 0); 20 instr.fset.neg_a != 0);
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 2323052b0..34854fcca 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -16,7 +16,6 @@ using Tegra::Shader::Pred;
16 16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20 19
21 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, 20 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
22 instr.fsetp.neg_a != 0); 21 instr.fsetp.neg_a != 0);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index ad180d6df..afea33e5f 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
21 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); 21 DEBUG_ASSERT(instr.hsetp2.ftz == 0);
22 22
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
@@ -30,7 +30,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
30 case OpCode::Id::HSETP2_C: 30 case OpCode::Id::HSETP2_C:
31 cond = instr.hsetp2.cbuf_and_imm.cond; 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 h_and = instr.hsetp2.cbuf_and_imm.h_and; 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 break; 35 break;
36 case OpCode::Id::HSETP2_IMM: 36 case OpCode::Id::HSETP2_IMM:
@@ -52,15 +52,15 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
52 } 52 }
53 53
54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
55 const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); 55 const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred);
56 56
57 const auto Write = [&](u64 dest, Node src) { 57 const auto Write = [&](u64 dest, Node src) {
58 SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39)); 58 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
59 }; 59 };
60 60
61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); 61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
62 const u64 first = instr.hsetp2.pred0; 62 const u64 first = instr.hsetp2.pred0;
63 const u64 second = instr.hsetp2.pred3; 63 const u64 second = instr.hsetp2.pred39;
64 if (h_and) { 64 if (h_and) {
65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison); 65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
66 Write(first, joined); 66 Write(first, joined);
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index c3bcf1ae9..5b44cb79c 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { 24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); 25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else { 26 } else {
27 UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); 27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 } 28 }
29 29
30 constexpr auto identity = HalfType::H0_H1; 30 constexpr auto identity = HalfType::H0_H1;
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 46e3d5905..59809bcd8 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -14,7 +14,6 @@ using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { 15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18 17
19 const Node op_a = GetRegister(instr.gpr8); 18 const Node op_a = GetRegister(instr.gpr8);
20 const Node op_b = [&]() { 19 const Node op_b = [&]() {
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index dd20775d7..25e48fef8 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -16,7 +16,6 @@ using Tegra::Shader::Pred;
16 16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20 19
21 const Node op_a = GetRegister(instr.gpr8); 20 const Node op_a = GetRegister(instr.gpr8);
22 21
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index c0f64d7a0..d46e0f823 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 switch (opcode->get().GetId()) { 24 switch (opcode->get().GetId()) {
25 case OpCode::Id::NOP: {
26 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
27 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
28 // With the previous preconditions, this instruction is a no-operation.
29 break;
30 }
25 case OpCode::Id::EXIT: { 31 case OpCode::Id::EXIT: {
26 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 32 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
27 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", 33 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
@@ -68,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
68 case SystemVariable::InvocationInfo: 74 case SystemVariable::InvocationInfo:
69 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); 75 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
70 return Immediate(0u); 76 return Immediate(0u);
77 case SystemVariable::Tid: {
78 Node value = Immediate(0);
79 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
80 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9);
81 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
82 return value;
83 }
71 case SystemVariable::TidX: 84 case SystemVariable::TidX:
72 return Operation(OperationCode::LocalInvocationIdX); 85 return Operation(OperationCode::LocalInvocationIdX);
73 case SystemVariable::TidY: 86 case SystemVariable::TidY:
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index febbfeb50..84dbc50fe 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
15 15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { 16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19 18
20 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
21 "Condition codes generation in PSET is not implemented"); 20 "Condition codes generation in PSET is not implemented");
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
new file mode 100644
index 000000000..04ca74f46
--- /dev/null
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,55 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::VoteOperation;
17
18namespace {
19OperationCode GetOperationCode(VoteOperation vote_op) {
20 switch (vote_op) {
21 case VoteOperation::All:
22 return OperationCode::VoteAll;
23 case VoteOperation::Any:
24 return OperationCode::VoteAny;
25 case VoteOperation::Eq:
26 return OperationCode::VoteEqual;
27 default:
28 UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
29 return OperationCode::VoteAll;
30 }
31}
32} // Anonymous namespace
33
34u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
35 const Instruction instr = {program_code[pc]};
36 const auto opcode = OpCode::Decode(instr);
37
38 switch (opcode->get().GetId()) {
39 case OpCode::Id::VOTE: {
40 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
41 const Node active = Operation(OperationCode::BallotThread, value);
42 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
43 SetRegister(bb, instr.gpr0, active);
44 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break;
46 }
47 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break;
50 }
51
52 return pc;
53}
54
55} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 715184d67..5db9313c4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -30,6 +30,8 @@ enum class OperationCode {
30 FNegate, /// (MetaArithmetic, float a) -> float 30 FNegate, /// (MetaArithmetic, float a) -> float
31 FAbsolute, /// (MetaArithmetic, float a) -> float 31 FAbsolute, /// (MetaArithmetic, float a) -> float
32 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float 32 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
33 FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
34 FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
33 FMin, /// (MetaArithmetic, float a, float b) -> float 35 FMin, /// (MetaArithmetic, float a, float b) -> float
34 FMax, /// (MetaArithmetic, float a, float b) -> float 36 FMax, /// (MetaArithmetic, float a, float b) -> float
35 FCos, /// (MetaArithmetic, float a) -> float 37 FCos, /// (MetaArithmetic, float a) -> float
@@ -83,17 +85,18 @@ enum class OperationCode {
83 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint 85 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
84 UBitCount, /// (MetaArithmetic, uint) -> uint 86 UBitCount, /// (MetaArithmetic, uint) -> uint
85 87
86 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 88 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
87 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 89 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
88 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 90 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
89 HAbsolute, /// (f16vec2 a) -> f16vec2 91 HAbsolute, /// (f16vec2 a) -> f16vec2
90 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 92 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
91 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 93 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
92 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 94 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
93 HMergeF32, /// (f16vec2 src) -> float 95 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
94 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 96 HMergeF32, /// (f16vec2 src) -> float
95 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 97 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
96 HPack2, /// (float a, float b) -> f16vec2 98 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
99 HPack2, /// (float a, float b) -> f16vec2
97 100
98 LogicalAssign, /// (bool& dst, bool src) -> void 101 LogicalAssign, /// (bool& dst, bool src) -> void
99 LogicalAnd, /// (bool a, bool b) -> bool 102 LogicalAnd, /// (bool a, bool b) -> bool
@@ -165,6 +168,11 @@ enum class OperationCode {
165 WorkGroupIdY, /// () -> uint 168 WorkGroupIdY, /// () -> uint
166 WorkGroupIdZ, /// () -> uint 169 WorkGroupIdZ, /// () -> uint
167 170
171 BallotThread, /// (bool) -> uint
172 VoteAll, /// (bool) -> bool
173 VoteAny, /// (bool) -> bool
174 VoteEqual, /// (bool) -> bool
175
168 Amount, 176 Amount,
169}; 177};
170 178
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 5e91fe129..1e5c7f660 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -405,4 +405,9 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
405 Immediate(offset), Immediate(bits)); 405 Immediate(offset), Immediate(bits));
406} 406}
407 407
408Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
409 return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
410 Immediate(bits));
411}
412
408} // namespace VideoCommon::Shader 413} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 59a083d90..bcc9b79b6 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -167,6 +167,7 @@ private:
167 u32 DecodeFfma(NodeBlock& bb, u32 pc); 167 u32 DecodeFfma(NodeBlock& bb, u32 pc);
168 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 168 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
169 u32 DecodeConversion(NodeBlock& bb, u32 pc); 169 u32 DecodeConversion(NodeBlock& bb, u32 pc);
170 u32 DecodeWarp(NodeBlock& bb, u32 pc);
170 u32 DecodeMemory(NodeBlock& bb, u32 pc); 171 u32 DecodeMemory(NodeBlock& bb, u32 pc);
171 u32 DecodeTexture(NodeBlock& bb, u32 pc); 172 u32 DecodeTexture(NodeBlock& bb, u32 pc);
172 u32 DecodeImage(NodeBlock& bb, u32 pc); 173 u32 DecodeImage(NodeBlock& bb, u32 pc);
@@ -279,6 +280,9 @@ private:
279 /// Extracts a sequence of bits from a node 280 /// Extracts a sequence of bits from a node
280 Node BitfieldExtract(Node value, u32 offset, u32 bits); 281 Node BitfieldExtract(Node value, u32 offset, u32 bits);
281 282
283 /// Inserts a sequence of bits from a node
284 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
285
282 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 286 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
283 const Node4& components); 287 const Node4& components);
284 288
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index a53e02253..55f5949e4 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -59,8 +59,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
59 return TrackCbuf(source, code, new_cursor); 59 return TrackCbuf(source, code, new_cursor);
60 } 60 }
61 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 61 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
62 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { 62 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
63 if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) { 63 if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
64 // Cbuf found in operand. 64 // Cbuf found in operand.
65 return found; 65 return found;
66 } 66 }
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 6af9044ca..683c49207 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -24,9 +24,8 @@ StagingCache::StagingCache() = default;
24StagingCache::~StagingCache() = default; 24StagingCache::~StagingCache() = default;
25 25
26SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) 26SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
27 : params{params}, mipmap_sizes(params.num_levels), 27 : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
28 mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ 28 mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
29 params.GetHostSizeInBytes()} {
30 std::size_t offset = 0; 29 std::size_t offset = 0;
31 for (u32 level = 0; level < params.num_levels; ++level) { 30 for (u32 level = 0; level < params.num_levels; ++level) {
32 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; 31 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 358d6757c..e7ef66ee2 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -58,7 +58,6 @@ public:
58 std::size_t GetHostSizeInBytes() const { 58 std::size_t GetHostSizeInBytes() const {
59 std::size_t host_size_in_bytes; 59 std::size_t host_size_in_bytes;
60 if (GetCompressionType() == SurfaceCompression::Converted) { 60 if (GetCompressionType() == SurfaceCompression::Converted) {
61 constexpr std::size_t rgb8_bpp = 4ULL;
62 // ASTC is uncompressed in software, in emulated as RGBA8 61 // ASTC is uncompressed in software, in emulated as RGBA8
63 host_size_in_bytes = 0; 62 host_size_in_bytes = 0;
64 for (u32 level = 0; level < num_levels; ++level) { 63 for (u32 level = 0; level < num_levels; ++level) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a3a3770a7..2ec0203d1 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -308,8 +308,6 @@ protected:
308 if (!guard_render_targets && surface->IsRenderTarget()) { 308 if (!guard_render_targets && surface->IsRenderTarget()) {
309 ManageRenderTargetUnregister(surface); 309 ManageRenderTargetUnregister(surface);
310 } 310 }
311 const GPUVAddr gpu_addr = surface->GetGpuAddr();
312 const CacheAddr cache_ptr = surface->GetCacheAddr();
313 const std::size_t size = surface->GetSizeInBytes(); 311 const std::size_t size = surface->GetSizeInBytes();
314 const VAddr cpu_addr = surface->GetCpuAddr(); 312 const VAddr cpu_addr = surface->GetCpuAddr();
315 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 313 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e3be018b9..e36bc2c04 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -213,7 +213,7 @@ struct TICEntry {
213 if (header_version != TICHeaderVersion::OneDBuffer) { 213 if (header_version != TICHeaderVersion::OneDBuffer) {
214 return width_minus_1 + 1; 214 return width_minus_1 + 1;
215 } 215 }
216 return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one; 216 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
217 } 217 }
218 218
219 u32 Height() const { 219 u32 Height() const {
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 3dc0e47d0..f051e17b4 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -1,5 +1,6 @@
1set(CMAKE_AUTOMOC ON) 1set(CMAKE_AUTOMOC ON)
2set(CMAKE_AUTORCC ON) 2set(CMAKE_AUTORCC ON)
3set(CMAKE_AUTOUIC ON)
3set(CMAKE_INCLUDE_CURRENT_DIR ON) 4set(CMAKE_INCLUDE_CURRENT_DIR ON)
4set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) 5set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
5 6
@@ -7,6 +8,7 @@ add_executable(yuzu
7 Info.plist 8 Info.plist
8 about_dialog.cpp 9 about_dialog.cpp
9 about_dialog.h 10 about_dialog.h
11 aboutdialog.ui
10 applets/error.cpp 12 applets/error.cpp
11 applets/error.h 13 applets/error.h
12 applets/profile_select.cpp 14 applets/profile_select.cpp
@@ -17,42 +19,59 @@ add_executable(yuzu
17 applets/web_browser.h 19 applets/web_browser.h
18 bootmanager.cpp 20 bootmanager.cpp
19 bootmanager.h 21 bootmanager.h
22 compatdb.ui
20 compatibility_list.cpp 23 compatibility_list.cpp
21 compatibility_list.h 24 compatibility_list.h
22 configuration/config.cpp 25 configuration/config.cpp
23 configuration/config.h 26 configuration/config.h
27 configuration/configure.ui
24 configuration/configure_audio.cpp 28 configuration/configure_audio.cpp
25 configuration/configure_audio.h 29 configuration/configure_audio.h
30 configuration/configure_audio.ui
26 configuration/configure_debug.cpp 31 configuration/configure_debug.cpp
27 configuration/configure_debug.h 32 configuration/configure_debug.h
33 configuration/configure_debug.ui
28 configuration/configure_dialog.cpp 34 configuration/configure_dialog.cpp
29 configuration/configure_dialog.h 35 configuration/configure_dialog.h
30 configuration/configure_gamelist.cpp 36 configuration/configure_gamelist.cpp
31 configuration/configure_gamelist.h 37 configuration/configure_gamelist.h
38 configuration/configure_gamelist.ui
32 configuration/configure_general.cpp 39 configuration/configure_general.cpp
33 configuration/configure_general.h 40 configuration/configure_general.h
41 configuration/configure_general.ui
34 configuration/configure_graphics.cpp 42 configuration/configure_graphics.cpp
35 configuration/configure_graphics.h 43 configuration/configure_graphics.h
44 configuration/configure_graphics.ui
36 configuration/configure_hotkeys.cpp 45 configuration/configure_hotkeys.cpp
37 configuration/configure_hotkeys.h 46 configuration/configure_hotkeys.h
47 configuration/configure_hotkeys.ui
38 configuration/configure_input.cpp 48 configuration/configure_input.cpp
39 configuration/configure_input.h 49 configuration/configure_input.h
50 configuration/configure_input.ui
40 configuration/configure_input_player.cpp 51 configuration/configure_input_player.cpp
41 configuration/configure_input_player.h 52 configuration/configure_input_player.h
53 configuration/configure_input_player.ui
42 configuration/configure_input_simple.cpp 54 configuration/configure_input_simple.cpp
43 configuration/configure_input_simple.h 55 configuration/configure_input_simple.h
56 configuration/configure_input_simple.ui
44 configuration/configure_mouse_advanced.cpp 57 configuration/configure_mouse_advanced.cpp
45 configuration/configure_mouse_advanced.h 58 configuration/configure_mouse_advanced.h
59 configuration/configure_mouse_advanced.ui
60 configuration/configure_per_general.cpp
61 configuration/configure_per_general.h
62 configuration/configure_per_general.ui
46 configuration/configure_profile_manager.cpp 63 configuration/configure_profile_manager.cpp
47 configuration/configure_profile_manager.h 64 configuration/configure_profile_manager.h
65 configuration/configure_profile_manager.ui
48 configuration/configure_system.cpp 66 configuration/configure_system.cpp
49 configuration/configure_system.h 67 configuration/configure_system.h
50 configuration/configure_per_general.cpp 68 configuration/configure_system.ui
51 configuration/configure_per_general.h
52 configuration/configure_touchscreen_advanced.cpp 69 configuration/configure_touchscreen_advanced.cpp
53 configuration/configure_touchscreen_advanced.h 70 configuration/configure_touchscreen_advanced.h
71 configuration/configure_touchscreen_advanced.ui
54 configuration/configure_web.cpp 72 configuration/configure_web.cpp
55 configuration/configure_web.h 73 configuration/configure_web.h
74 configuration/configure_web.ui
56 debugger/graphics/graphics_breakpoint_observer.cpp 75 debugger/graphics/graphics_breakpoint_observer.cpp
57 debugger/graphics/graphics_breakpoint_observer.h 76 debugger/graphics/graphics_breakpoint_observer.h
58 debugger/graphics/graphics_breakpoints.cpp 77 debugger/graphics/graphics_breakpoints.cpp
@@ -72,12 +91,14 @@ add_executable(yuzu
72 game_list_worker.h 91 game_list_worker.h
73 loading_screen.cpp 92 loading_screen.cpp
74 loading_screen.h 93 loading_screen.h
94 loading_screen.ui
75 hotkeys.cpp 95 hotkeys.cpp
76 hotkeys.h 96 hotkeys.h
77 main.cpp 97 main.cpp
78 main.h 98 main.h
79 ui_settings.cpp 99 main.ui
80 ui_settings.h 100 uisettings.cpp
101 uisettings.h
81 util/limitable_input_dialog.cpp 102 util/limitable_input_dialog.cpp
82 util/limitable_input_dialog.h 103 util/limitable_input_dialog.h
83 util/sequence_dialog/sequence_dialog.cpp 104 util/sequence_dialog/sequence_dialog.cpp
@@ -89,44 +110,18 @@ add_executable(yuzu
89 yuzu.rc 110 yuzu.rc
90) 111)
91 112
92set(UIS
93 aboutdialog.ui
94 configuration/configure.ui
95 configuration/configure_audio.ui
96 configuration/configure_debug.ui
97 configuration/configure_gamelist.ui
98 configuration/configure_general.ui
99 configuration/configure_graphics.ui
100 configuration/configure_hotkeys.ui
101 configuration/configure_input.ui
102 configuration/configure_input_player.ui
103 configuration/configure_input_simple.ui
104 configuration/configure_mouse_advanced.ui
105 configuration/configure_per_general.ui
106 configuration/configure_profile_manager.ui
107 configuration/configure_system.ui
108 configuration/configure_touchscreen_advanced.ui
109 configuration/configure_web.ui
110 compatdb.ui
111 loading_screen.ui
112 main.ui
113)
114
115file(GLOB COMPAT_LIST 113file(GLOB COMPAT_LIST
116 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc 114 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
117 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) 115 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json)
118file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) 116file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*)
119file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) 117file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*)
120 118
121qt5_wrap_ui(UI_HDRS ${UIS})
122 119
123target_sources(yuzu 120target_sources(yuzu
124 PRIVATE 121 PRIVATE
125 ${COMPAT_LIST} 122 ${COMPAT_LIST}
126 ${ICONS} 123 ${ICONS}
127 ${THEMES} 124 ${THEMES}
128 ${UI_HDRS}
129 ${UIS}
130) 125)
131 126
132if (APPLE) 127if (APPLE)
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index b7f3fdf75..5d0fb3f9f 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -11,7 +11,7 @@
11#include "core/hle/service/hid/controllers/npad.h" 11#include "core/hle/service/hid/controllers/npad.h"
12#include "input_common/main.h" 12#include "input_common/main.h"
13#include "yuzu/configuration/config.h" 13#include "yuzu/configuration/config.h"
14#include "yuzu/ui_settings.h" 14#include "yuzu/uisettings.h"
15 15
16Config::Config() { 16Config::Config() {
17 // TODO: Don't hardcode the path; let the frontend decide where to put the config files. 17 // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 9a13bb797..5b7e03056 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -12,13 +12,13 @@
12#include "ui_configure_debug.h" 12#include "ui_configure_debug.h"
13#include "yuzu/configuration/configure_debug.h" 13#include "yuzu/configuration/configure_debug.h"
14#include "yuzu/debugger/console.h" 14#include "yuzu/debugger/console.h"
15#include "yuzu/ui_settings.h" 15#include "yuzu/uisettings.h"
16 16
17ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { 17ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) {
18 ui->setupUi(this); 18 ui->setupUi(this);
19 SetConfiguration(); 19 SetConfiguration();
20 20
21 connect(ui->open_log_button, &QPushButton::pressed, []() { 21 connect(ui->open_log_button, &QPushButton::clicked, []() {
22 QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); 22 QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir));
23 QDesktopServices::openUrl(QUrl::fromLocalFile(path)); 23 QDesktopServices::openUrl(QUrl::fromLocalFile(path));
24 }); 24 });
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index d1724ba89..daedbc33e 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -9,7 +9,7 @@
9#include "core/settings.h" 9#include "core/settings.h"
10#include "ui_configure_gamelist.h" 10#include "ui_configure_gamelist.h"
11#include "yuzu/configuration/configure_gamelist.h" 11#include "yuzu/configuration/configure_gamelist.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace { 14namespace {
15constexpr std::array default_icon_sizes{ 15constexpr std::array default_icon_sizes{
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 7a6e921cd..75fcbfea3 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -6,7 +6,7 @@
6#include "core/settings.h" 6#include "core/settings.h"
7#include "ui_configure_general.h" 7#include "ui_configure_general.h"
8#include "yuzu/configuration/configure_general.h" 8#include "yuzu/configuration/configure_general.h"
9#include "yuzu/ui_settings.h" 9#include "yuzu/uisettings.h"
10 10
11ConfigureGeneral::ConfigureGeneral(QWidget* parent) 11ConfigureGeneral::ConfigureGeneral(QWidget* parent)
12 : QWidget(parent), ui(new Ui::ConfigureGeneral) { 12 : QWidget(parent), ui(new Ui::ConfigureGeneral) {
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp
index 4dd775aab..7613197f2 100644
--- a/src/yuzu/configuration/configure_input.cpp
+++ b/src/yuzu/configuration/configure_input.cpp
@@ -79,7 +79,7 @@ ConfigureInput::ConfigureInput(QWidget* parent)
79 LoadConfiguration(); 79 LoadConfiguration();
80 UpdateUIEnabled(); 80 UpdateUIEnabled();
81 81
82 connect(ui->restore_defaults_button, &QPushButton::pressed, this, 82 connect(ui->restore_defaults_button, &QPushButton::clicked, this,
83 &ConfigureInput::RestoreDefaults); 83 &ConfigureInput::RestoreDefaults);
84 84
85 for (auto* enabled : players_controller) { 85 for (auto* enabled : players_controller) {
@@ -96,20 +96,20 @@ ConfigureInput::ConfigureInput(QWidget* parent)
96 &ConfigureInput::UpdateUIEnabled); 96 &ConfigureInput::UpdateUIEnabled);
97 97
98 for (std::size_t i = 0; i < players_configure.size(); ++i) { 98 for (std::size_t i = 0; i < players_configure.size(); ++i) {
99 connect(players_configure[i], &QPushButton::pressed, this, 99 connect(players_configure[i], &QPushButton::clicked, this,
100 [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); 100 [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); });
101 } 101 }
102 102
103 connect(ui->handheld_configure, &QPushButton::pressed, this, 103 connect(ui->handheld_configure, &QPushButton::clicked, this,
104 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); 104 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); });
105 105
106 connect(ui->debug_configure, &QPushButton::pressed, this, 106 connect(ui->debug_configure, &QPushButton::clicked, this,
107 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); 107 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); });
108 108
109 connect(ui->mouse_advanced, &QPushButton::pressed, this, 109 connect(ui->mouse_advanced, &QPushButton::clicked, this,
110 [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); 110 [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); });
111 111
112 connect(ui->touchscreen_advanced, &QPushButton::pressed, this, 112 connect(ui->touchscreen_advanced, &QPushButton::clicked, this,
113 [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); 113 [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); });
114} 114}
115 115
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 916baccc1..7b70f307c 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -244,7 +244,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
244 } 244 }
245 245
246 button->setContextMenuPolicy(Qt::CustomContextMenu); 246 button->setContextMenuPolicy(Qt::CustomContextMenu);
247 connect(button, &QPushButton::released, [=] { 247 connect(button, &QPushButton::clicked, [=] {
248 HandleClick( 248 HandleClick(
249 button_map[button_id], 249 button_map[button_id],
250 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, 250 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; },
@@ -273,7 +273,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
273 } 273 }
274 274
275 analog_button->setContextMenuPolicy(Qt::CustomContextMenu); 275 analog_button->setContextMenuPolicy(Qt::CustomContextMenu);
276 connect(analog_button, &QPushButton::released, [=]() { 276 connect(analog_button, &QPushButton::clicked, [=]() {
277 HandleClick(analog_map_buttons[analog_id][sub_button_id], 277 HandleClick(analog_map_buttons[analog_id][sub_button_id],
278 [=](const Common::ParamPackage& params) { 278 [=](const Common::ParamPackage& params) {
279 SetAnalogButton(params, analogs_param[analog_id], 279 SetAnalogButton(params, analogs_param[analog_id],
@@ -300,7 +300,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
300 menu_location)); 300 menu_location));
301 }); 301 });
302 } 302 }
303 connect(analog_map_stick[analog_id], &QPushButton::released, [=] { 303 connect(analog_map_stick[analog_id], &QPushButton::clicked, [=] {
304 QMessageBox::information(this, tr("Information"), 304 QMessageBox::information(this, tr("Information"),
305 tr("After pressing OK, first move your joystick horizontally, " 305 tr("After pressing OK, first move your joystick horizontally, "
306 "and then vertically.")); 306 "and then vertically."));
@@ -311,8 +311,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
311 }); 311 });
312 } 312 }
313 313
314 connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); 314 connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
315 connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); 315 connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); });
316 316
317 timeout_timer->setSingleShot(true); 317 timeout_timer->setSingleShot(true);
318 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); 318 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); });
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp
index 864803ea3..ab3a11d30 100644
--- a/src/yuzu/configuration/configure_input_simple.cpp
+++ b/src/yuzu/configuration/configure_input_simple.cpp
@@ -9,7 +9,7 @@
9#include "yuzu/configuration/configure_input.h" 9#include "yuzu/configuration/configure_input.h"
10#include "yuzu/configuration/configure_input_player.h" 10#include "yuzu/configuration/configure_input_player.h"
11#include "yuzu/configuration/configure_input_simple.h" 11#include "yuzu/configuration/configure_input_simple.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace { 14namespace {
15 15
@@ -101,7 +101,7 @@ ConfigureInputSimple::ConfigureInputSimple(QWidget* parent)
101 101
102 connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, 102 connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
103 &ConfigureInputSimple::OnSelectProfile); 103 &ConfigureInputSimple::OnSelectProfile);
104 connect(ui->profile_configure, &QPushButton::pressed, this, &ConfigureInputSimple::OnConfigure); 104 connect(ui->profile_configure, &QPushButton::clicked, this, &ConfigureInputSimple::OnConfigure);
105 105
106 LoadConfiguration(); 106 LoadConfiguration();
107} 107}
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp
index b7305e653..0a4abe34f 100644
--- a/src/yuzu/configuration/configure_mouse_advanced.cpp
+++ b/src/yuzu/configuration/configure_mouse_advanced.cpp
@@ -83,7 +83,7 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent)
83 } 83 }
84 84
85 button->setContextMenuPolicy(Qt::CustomContextMenu); 85 button->setContextMenuPolicy(Qt::CustomContextMenu);
86 connect(button, &QPushButton::released, [=] { 86 connect(button, &QPushButton::clicked, [=] {
87 HandleClick( 87 HandleClick(
88 button_map[button_id], 88 button_map[button_id],
89 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, 89 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; },
@@ -104,8 +104,8 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent)
104 }); 104 });
105 } 105 }
106 106
107 connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); 107 connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
108 connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); 108 connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); });
109 109
110 timeout_timer->setSingleShot(true); 110 timeout_timer->setSingleShot(true);
111 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); 111 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); });
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp
index 90336e235..d7f259f12 100644
--- a/src/yuzu/configuration/configure_per_general.cpp
+++ b/src/yuzu/configuration/configure_per_general.cpp
@@ -23,7 +23,7 @@
23#include "yuzu/configuration/config.h" 23#include "yuzu/configuration/config.h"
24#include "yuzu/configuration/configure_input.h" 24#include "yuzu/configuration/configure_input.h"
25#include "yuzu/configuration/configure_per_general.h" 25#include "yuzu/configuration/configure_per_general.h"
26#include "yuzu/ui_settings.h" 26#include "yuzu/uisettings.h"
27#include "yuzu/util/util.h" 27#include "yuzu/util/util.h"
28 28
29ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) 29ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id)
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp
index c90f4cdd8..f53423440 100644
--- a/src/yuzu/configuration/configure_profile_manager.cpp
+++ b/src/yuzu/configuration/configure_profile_manager.cpp
@@ -108,10 +108,10 @@ ConfigureProfileManager ::ConfigureProfileManager(QWidget* parent)
108 108
109 connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); 109 connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser);
110 110
111 connect(ui->pm_add, &QPushButton::pressed, this, &ConfigureProfileManager::AddUser); 111 connect(ui->pm_add, &QPushButton::clicked, this, &ConfigureProfileManager::AddUser);
112 connect(ui->pm_rename, &QPushButton::pressed, this, &ConfigureProfileManager::RenameUser); 112 connect(ui->pm_rename, &QPushButton::clicked, this, &ConfigureProfileManager::RenameUser);
113 connect(ui->pm_remove, &QPushButton::pressed, this, &ConfigureProfileManager::DeleteUser); 113 connect(ui->pm_remove, &QPushButton::clicked, this, &ConfigureProfileManager::DeleteUser);
114 connect(ui->pm_set_image, &QPushButton::pressed, this, &ConfigureProfileManager::SetUserImage); 114 connect(ui->pm_set_image, &QPushButton::clicked, this, &ConfigureProfileManager::SetUserImage);
115 115
116 scene = new QGraphicsScene; 116 scene = new QGraphicsScene;
117 ui->current_user_icon->setScene(scene); 117 ui->current_user_icon->setScene(scene);
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.cpp b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
index 8ced28c75..7d7cc00b7 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.cpp
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
@@ -11,7 +11,7 @@ ConfigureTouchscreenAdvanced::ConfigureTouchscreenAdvanced(QWidget* parent)
11 : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { 11 : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) {
12 ui->setupUi(this); 12 ui->setupUi(this);
13 13
14 connect(ui->restore_defaults_button, &QPushButton::pressed, this, 14 connect(ui->restore_defaults_button, &QPushButton::clicked, this,
15 &ConfigureTouchscreenAdvanced::RestoreDefaults); 15 &ConfigureTouchscreenAdvanced::RestoreDefaults);
16 16
17 LoadConfiguration(); 17 LoadConfiguration();
diff --git a/src/yuzu/configuration/configure_web.cpp b/src/yuzu/configuration/configure_web.cpp
index 5a70ef168..336b062b3 100644
--- a/src/yuzu/configuration/configure_web.cpp
+++ b/src/yuzu/configuration/configure_web.cpp
@@ -9,7 +9,7 @@
9#include "core/telemetry_session.h" 9#include "core/telemetry_session.h"
10#include "ui_configure_web.h" 10#include "ui_configure_web.h"
11#include "yuzu/configuration/configure_web.h" 11#include "yuzu/configuration/configure_web.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14ConfigureWeb::ConfigureWeb(QWidget* parent) 14ConfigureWeb::ConfigureWeb(QWidget* parent)
15 : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { 15 : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) {
diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp
index 320898f6a..207ff4d58 100644
--- a/src/yuzu/debugger/console.cpp
+++ b/src/yuzu/debugger/console.cpp
@@ -10,7 +10,7 @@
10 10
11#include "common/logging/backend.h" 11#include "common/logging/backend.h"
12#include "yuzu/debugger/console.h" 12#include "yuzu/debugger/console.h"
13#include "yuzu/ui_settings.h" 13#include "yuzu/uisettings.h"
14 14
15namespace Debugger { 15namespace Debugger {
16void ToggleConsole() { 16void ToggleConsole() {
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp
index 9d87a41eb..ea0079353 100644
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -9,7 +9,7 @@
9#include "core/core.h" 9#include "core/core.h"
10#include "core/loader/loader.h" 10#include "core/loader/loader.h"
11#include "yuzu/discord_impl.h" 11#include "yuzu/discord_impl.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace DiscordRPC { 14namespace DiscordRPC {
15 15
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 1885587af..d18b96519 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -23,7 +23,7 @@
23#include "yuzu/game_list_p.h" 23#include "yuzu/game_list_p.h"
24#include "yuzu/game_list_worker.h" 24#include "yuzu/game_list_worker.h"
25#include "yuzu/main.h" 25#include "yuzu/main.h"
26#include "yuzu/ui_settings.h" 26#include "yuzu/uisettings.h"
27 27
28GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} 28GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {}
29 29
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 0b458ef48..ece534dd6 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -19,7 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/logging/log.h" 20#include "common/logging/log.h"
21#include "common/string_util.h" 21#include "common/string_util.h"
22#include "yuzu/ui_settings.h" 22#include "yuzu/uisettings.h"
23#include "yuzu/util/util.h" 23#include "yuzu/util/util.h"
24 24
25/** 25/**
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp
index 4f30e9147..77f358630 100644
--- a/src/yuzu/game_list_worker.cpp
+++ b/src/yuzu/game_list_worker.cpp
@@ -29,7 +29,7 @@
29#include "yuzu/game_list.h" 29#include "yuzu/game_list.h"
30#include "yuzu/game_list_p.h" 30#include "yuzu/game_list_p.h"
31#include "yuzu/game_list_worker.h" 31#include "yuzu/game_list_worker.h"
32#include "yuzu/ui_settings.h" 32#include "yuzu/uisettings.h"
33 33
34namespace { 34namespace {
35 35
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp
index 4582e7f21..d4e97fa16 100644
--- a/src/yuzu/hotkeys.cpp
+++ b/src/yuzu/hotkeys.cpp
@@ -7,7 +7,7 @@
7#include <QTreeWidgetItem> 7#include <QTreeWidgetItem>
8#include <QtGlobal> 8#include <QtGlobal>
9#include "yuzu/hotkeys.h" 9#include "yuzu/hotkeys.h"
10#include "yuzu/ui_settings.h" 10#include "yuzu/uisettings.h"
11 11
12HotkeyRegistry::HotkeyRegistry() = default; 12HotkeyRegistry::HotkeyRegistry() = default;
13HotkeyRegistry::~HotkeyRegistry() = default; 13HotkeyRegistry::~HotkeyRegistry() = default;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ae21f4753..a7c656fdb 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -100,7 +100,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
100#include "yuzu/hotkeys.h" 100#include "yuzu/hotkeys.h"
101#include "yuzu/loading_screen.h" 101#include "yuzu/loading_screen.h"
102#include "yuzu/main.h" 102#include "yuzu/main.h"
103#include "yuzu/ui_settings.h" 103#include "yuzu/uisettings.h"
104 104
105#ifdef USE_DISCORD_PRESENCE 105#ifdef USE_DISCORD_PRESENCE
106#include "yuzu/discord_impl.h" 106#include "yuzu/discord_impl.h"
@@ -1843,13 +1843,14 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det
1843 "data, or other bugs."); 1843 "data, or other bugs.");
1844 switch (result) { 1844 switch (result) {
1845 case Core::System::ResultStatus::ErrorSystemFiles: { 1845 case Core::System::ResultStatus::ErrorSystemFiles: {
1846 QString message = tr("yuzu was unable to locate a Switch system archive"); 1846 QString message;
1847 if (!details.empty()) { 1847 if (details.empty()) {
1848 message.append(tr(": %1. ").arg(QString::fromStdString(details))); 1848 message =
1849 tr("yuzu was unable to locate a Switch system archive. %1").arg(common_message);
1849 } else { 1850 } else {
1850 message.append(tr(". ")); 1851 message = tr("yuzu was unable to locate a Switch system archive: %1. %2")
1852 .arg(QString::fromStdString(details), common_message);
1851 } 1853 }
1852 message.append(common_message);
1853 1854
1854 answer = QMessageBox::question(this, tr("System Archive Not Found"), message, 1855 answer = QMessageBox::question(this, tr("System Archive Not Found"), message,
1855 QMessageBox::Yes | QMessageBox::No, QMessageBox::No); 1856 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
@@ -1858,8 +1859,8 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det
1858 } 1859 }
1859 1860
1860 case Core::System::ResultStatus::ErrorSharedFont: { 1861 case Core::System::ResultStatus::ErrorSharedFont: {
1861 QString message = tr("yuzu was unable to locate the Switch shared fonts. "); 1862 const QString message =
1862 message.append(common_message); 1863 tr("yuzu was unable to locate the Switch shared fonts. %1").arg(common_message);
1863 answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, 1864 answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message,
1864 QMessageBox::Yes | QMessageBox::No, QMessageBox::No); 1865 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
1865 status_message = tr("Shared Font Missing"); 1866 status_message = tr("Shared Font Missing");
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/uisettings.cpp
index 4bdc302e0..7f7d247a3 100644
--- a/src/yuzu/ui_settings.cpp
+++ b/src/yuzu/uisettings.cpp
@@ -2,7 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "ui_settings.h" 5#include "yuzu/uisettings.h"
6 6
7namespace UISettings { 7namespace UISettings {
8 8
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/uisettings.h
index a62cd6911..a62cd6911 100644
--- a/src/yuzu/ui_settings.h
+++ b/src/yuzu/uisettings.h
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index b589c3de3..0ee97aa54 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -92,7 +92,6 @@ int main(int argc, char** argv) {
92 92
93 int option_index = 0; 93 int option_index = 0;
94 94
95 char* endarg;
96#ifdef _WIN32 95#ifdef _WIN32
97 int argc_w; 96 int argc_w;
98 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); 97 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w);
@@ -226,7 +225,7 @@ int main(int argc, char** argv) {
226 225
227 switch (load_result) { 226 switch (load_result) {
228 case Core::System::ResultStatus::ErrorGetLoader: 227 case Core::System::ResultStatus::ErrorGetLoader:
229 LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filepath.c_str()); 228 LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath);
230 return -1; 229 return -1;
231 case Core::System::ResultStatus::ErrorLoader: 230 case Core::System::ResultStatus::ErrorLoader:
232 LOG_CRITICAL(Frontend, "Failed to load ROM!"); 231 LOG_CRITICAL(Frontend, "Failed to load ROM!");