summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/dynamic_library.cpp106
-rw-r--r--src/common/dynamic_library.h75
-rw-r--r--src/core/frontend/emu_window.h41
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp48
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h2
-rw-r--r--src/core/hle/service/vi/vi.cpp46
-rw-r--r--src/core/memory.cpp127
-rw-r--r--src/core/memory.h78
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/buffer_cache/buffer_block.h42
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h145
-rw-r--r--src/video_core/buffer_cache/map_interval.h12
-rw-r--r--src/video_core/engines/maxwell_3d.h67
-rw-r--r--src/video_core/engines/shader_bytecode.h65
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp6
-rw-r--r--src/video_core/gpu_asynch.h6
-rw-r--r--src/video_core/gpu_synch.cpp6
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h18
-rw-r--r--src/video_core/memory_manager.cpp93
-rw-r--r--src/video_core/memory_manager.h5
-rw-r--r--src/video_core/query_cache.h37
-rw-r--r--src/video_core/rasterizer_cache.h44
-rw-r--r--src/video_core/rasterizer_interface.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp12
-rw-r--r--src/video_core/renderer_vulkan/declarations.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp264
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp92
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp38
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h6
-rw-r--r--src/video_core/shader/decode/other.cpp12
-rw-r--r--src/video_core/shader/decode/texture.cpp14
-rw-r--r--src/video_core/shader/decode/video.cpp58
-rw-r--r--src/video_core/shader/shader_ir.cpp3
-rw-r--r--src/video_core/shader/shader_ir.h3
-rw-r--r--src/video_core/texture_cache/surface_base.cpp42
-rw-r--r--src/video_core/texture_cache/surface_base.h34
-rw-r--r--src/video_core/texture_cache/texture_cache.h121
-rw-r--r--src/video_core/textures/texture.cpp80
-rw-r--r--src/video_core/textures/texture.h46
-rw-r--r--src/yuzu/CMakeLists.txt4
-rw-r--r--src/yuzu/bootmanager.cpp93
-rw-r--r--src/yuzu/bootmanager.h9
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp42
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp15
-rw-r--r--src/yuzu/configuration/configure_input_simple.cpp9
-rw-r--r--src/yuzu/configuration/configure_mouse_advanced.cpp15
-rw-r--r--src/yuzu/game_list.cpp6
-rw-r--r--src/yuzu/game_list_p.h24
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp6
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h4
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp159
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h18
-rw-r--r--src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp4
-rw-r--r--src/yuzu_tester/emu_window/emu_window_sdl2_hide.h4
70 files changed, 1618 insertions, 876 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index fbebed715..eeceaa655 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -106,6 +106,8 @@ add_library(common STATIC
106 common_funcs.h 106 common_funcs.h
107 common_paths.h 107 common_paths.h
108 common_types.h 108 common_types.h
109 dynamic_library.cpp
110 dynamic_library.h
109 file_util.cpp 111 file_util.cpp
110 file_util.h 112 file_util.h
111 hash.h 113 hash.h
diff --git a/src/common/dynamic_library.cpp b/src/common/dynamic_library.cpp
new file mode 100644
index 000000000..7ab54e9e4
--- /dev/null
+++ b/src/common/dynamic_library.cpp
@@ -0,0 +1,106 @@
1// Copyright 2019 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <string>
7#include <utility>
8
9#include <fmt/format.h>
10
11#include "common/dynamic_library.h"
12
13#ifdef _WIN32
14#include <windows.h>
15#else
16#include <dlfcn.h>
17#endif
18
19namespace Common {
20
21DynamicLibrary::DynamicLibrary() = default;
22
23DynamicLibrary::DynamicLibrary(const char* filename) {
24 Open(filename);
25}
26
27DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept
28 : handle{std::exchange(rhs.handle, nullptr)} {}
29
30DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept {
31 Close();
32 handle = std::exchange(rhs.handle, nullptr);
33 return *this;
34}
35
36DynamicLibrary::~DynamicLibrary() {
37 Close();
38}
39
40std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) {
41#if defined(_WIN32)
42 return std::string(filename) + ".dll";
43#elif defined(__APPLE__)
44 return std::string(filename) + ".dylib";
45#else
46 return std::string(filename) + ".so";
47#endif
48}
49
50std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) {
51#if defined(_WIN32)
52 if (major >= 0 && minor >= 0)
53 return fmt::format("{}-{}-{}.dll", libname, major, minor);
54 else if (major >= 0)
55 return fmt::format("{}-{}.dll", libname, major);
56 else
57 return fmt::format("{}.dll", libname);
58#elif defined(__APPLE__)
59 const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
60 if (major >= 0 && minor >= 0)
61 return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor);
62 else if (major >= 0)
63 return fmt::format("{}{}.{}.dylib", prefix, libname, major);
64 else
65 return fmt::format("{}{}.dylib", prefix, libname);
66#else
67 const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
68 if (major >= 0 && minor >= 0)
69 return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor);
70 else if (major >= 0)
71 return fmt::format("{}{}.so.{}", prefix, libname, major);
72 else
73 return fmt::format("{}{}.so", prefix, libname);
74#endif
75}
76
77bool DynamicLibrary::Open(const char* filename) {
78#ifdef _WIN32
79 handle = reinterpret_cast<void*>(LoadLibraryA(filename));
80#else
81 handle = dlopen(filename, RTLD_NOW);
82#endif
83 return handle != nullptr;
84}
85
86void DynamicLibrary::Close() {
87 if (!IsOpen())
88 return;
89
90#ifdef _WIN32
91 FreeLibrary(reinterpret_cast<HMODULE>(handle));
92#else
93 dlclose(handle);
94#endif
95 handle = nullptr;
96}
97
98void* DynamicLibrary::GetSymbolAddress(const char* name) const {
99#ifdef _WIN32
100 return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name));
101#else
102 return reinterpret_cast<void*>(dlsym(handle, name));
103#endif
104}
105
106} // namespace Common
diff --git a/src/common/dynamic_library.h b/src/common/dynamic_library.h
new file mode 100644
index 000000000..2a06372fd
--- /dev/null
+++ b/src/common/dynamic_library.h
@@ -0,0 +1,75 @@
1// Copyright 2019 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9namespace Common {
10
11/**
12 * Provides a platform-independent interface for loading a dynamic library and retrieving symbols.
13 * The interface maintains an internal reference count to allow one handle to be shared between
14 * multiple users.
15 */
16class DynamicLibrary final {
17public:
18 /// Default constructor, does not load a library.
19 explicit DynamicLibrary();
20
21 /// Automatically loads the specified library. Call IsOpen() to check validity before use.
22 explicit DynamicLibrary(const char* filename);
23
24 /// Moves the library.
25 DynamicLibrary(DynamicLibrary&&) noexcept;
26 DynamicLibrary& operator=(DynamicLibrary&&) noexcept;
27
28 /// Delete copies, we can't copy a dynamic library.
29 DynamicLibrary(const DynamicLibrary&) = delete;
30 DynamicLibrary& operator=(const DynamicLibrary&) = delete;
31
32 /// Closes the library.
33 ~DynamicLibrary();
34
35 /// Returns the specified library name with the platform-specific suffix added.
36 static std::string GetUnprefixedFilename(const char* filename);
37
38 /// Returns the specified library name in platform-specific format.
39 /// Major/minor versions will not be included if set to -1.
40 /// If libname already contains the "lib" prefix, it will not be added again.
41 /// Windows: LIBNAME-MAJOR-MINOR.dll
42 /// Linux: libLIBNAME.so.MAJOR.MINOR
43 /// Mac: libLIBNAME.MAJOR.MINOR.dylib
44 static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1);
45
46 /// Returns true if a module is loaded, otherwise false.
47 bool IsOpen() const {
48 return handle != nullptr;
49 }
50
51 /// Loads (or replaces) the handle with the specified library file name.
52 /// Returns true if the library was loaded and can be used.
53 bool Open(const char* filename);
54
55 /// Unloads the library, any function pointers from this library are no longer valid.
56 void Close();
57
58 /// Returns the address of the specified symbol (function or variable) as an untyped pointer.
59 /// If the specified symbol does not exist in this library, nullptr is returned.
60 void* GetSymbolAddress(const char* name) const;
61
62 /// Obtains the address of the specified symbol, automatically casting to the correct type.
63 /// Returns true if the symbol was found and assigned, otherwise false.
64 template <typename T>
65 bool GetSymbol(const char* name, T* ptr) const {
66 *ptr = reinterpret_cast<T>(GetSymbolAddress(name));
67 return *ptr != nullptr;
68 }
69
70private:
71 /// Platform-dependent data type representing a dynamic library handle.
72 void* handle = nullptr;
73};
74
75} // namespace Common
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 72294d4d8..13aa14934 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -12,6 +12,15 @@
12 12
13namespace Core::Frontend { 13namespace Core::Frontend {
14 14
15/// Information for the Graphics Backends signifying what type of screen pointer is in
16/// WindowInformation
17enum class WindowSystemType {
18 Headless,
19 Windows,
20 X11,
21 Wayland,
22};
23
15/** 24/**
16 * Represents a drawing context that supports graphics operations. 25 * Represents a drawing context that supports graphics operations.
17 */ 26 */
@@ -76,6 +85,23 @@ public:
76 std::pair<unsigned, unsigned> min_client_area_size; 85 std::pair<unsigned, unsigned> min_client_area_size;
77 }; 86 };
78 87
88 /// Data describing host window system information
89 struct WindowSystemInfo {
90 // Window system type. Determines which GL context or Vulkan WSI is used.
91 WindowSystemType type = WindowSystemType::Headless;
92
93 // Connection to a display server. This is used on X11 and Wayland platforms.
94 void* display_connection = nullptr;
95
96 // Render surface. This is a pointer to the native window handle, which depends
97 // on the platform. e.g. HWND for Windows, Window for X11. If the surface is
98 // set to nullptr, the video backend will run in headless mode.
99 void* render_surface = nullptr;
100
101 // Scale of the render surface. For hidpi systems, this will be >1.
102 float render_surface_scale = 1.0f;
103 };
104
79 /// Polls window events 105 /// Polls window events
80 virtual void PollEvents() = 0; 106 virtual void PollEvents() = 0;
81 107
@@ -87,10 +113,6 @@ public:
87 /// Returns if window is shown (not minimized) 113 /// Returns if window is shown (not minimized)
88 virtual bool IsShown() const = 0; 114 virtual bool IsShown() const = 0;
89 115
90 /// Retrieves Vulkan specific handlers from the window
91 virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
92 void* surface) const = 0;
93
94 /** 116 /**
95 * Signal that a touch pressed event has occurred (e.g. mouse click pressed) 117 * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
96 * @param framebuffer_x Framebuffer x-coordinate that was pressed 118 * @param framebuffer_x Framebuffer x-coordinate that was pressed
@@ -128,6 +150,13 @@ public:
128 } 150 }
129 151
130 /** 152 /**
153 * Returns system information about the drawing area.
154 */
155 const WindowSystemInfo& GetWindowInfo() const {
156 return window_info;
157 }
158
159 /**
131 * Gets the framebuffer layout (width, height, and screen regions) 160 * Gets the framebuffer layout (width, height, and screen regions)
132 * @note This method is thread-safe 161 * @note This method is thread-safe
133 */ 162 */
@@ -142,7 +171,7 @@ public:
142 void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); 171 void UpdateCurrentFramebufferLayout(unsigned width, unsigned height);
143 172
144protected: 173protected:
145 EmuWindow(); 174 explicit EmuWindow();
146 virtual ~EmuWindow(); 175 virtual ~EmuWindow();
147 176
148 /** 177 /**
@@ -179,6 +208,8 @@ protected:
179 client_area_height = size.second; 208 client_area_height = size.second;
180 } 209 }
181 210
211 WindowSystemInfo window_info;
212
182private: 213private:
183 /** 214 /**
184 * Handler called when the minimal client area was requested to be changed via SetConfig. 215 * Handler called when the minimal client area was requested to be changed via SetConfig.
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 32b6f4b27..f1e3d832a 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
28 buffer.slot = slot; 28 buffer.slot = slot;
29 buffer.igbp_buffer = igbp_buffer; 29 buffer.igbp_buffer = igbp_buffer;
30 buffer.status = Buffer::Status::Free; 30 buffer.status = Buffer::Status::Free;
31 free_buffers.push_back(slot);
31 32
32 queue.emplace_back(buffer); 33 queue.emplace_back(buffer);
33 buffer_wait_event.writable->Signal(); 34 buffer_wait_event.writable->Signal();
@@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
35 36
36std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, 37std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
37 u32 height) { 38 u32 height) {
38 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
39 // Only consider free buffers. Buffers become free once again after they've been Acquired
40 // and Released by the compositor, see the NVFlinger::Compose method.
41 if (buffer.status != Buffer::Status::Free) {
42 return false;
43 }
44 39
45 // Make sure that the parameters match. 40 if (free_buffers.empty()) {
46 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; 41 return {};
47 }); 42 }
43
44 auto f_itr = free_buffers.begin();
45 auto itr = queue.end();
46
47 while (f_itr != free_buffers.end()) {
48 auto slot = *f_itr;
49 itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
50 // Only consider free buffers. Buffers become free once again after they've been
51 // Acquired and Released by the compositor, see the NVFlinger::Compose method.
52 if (buffer.status != Buffer::Status::Free) {
53 return false;
54 }
55
56 if (buffer.slot != slot) {
57 return false;
58 }
59
60 // Make sure that the parameters match.
61 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
62 });
63
64 if (itr != queue.end()) {
65 free_buffers.erase(f_itr);
66 break;
67 }
68 ++f_itr;
69 }
48 70
49 if (itr == queue.end()) { 71 if (itr == queue.end()) {
50 return {}; 72 return {};
@@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
99 ASSERT(itr != queue.end()); 121 ASSERT(itr != queue.end());
100 ASSERT(itr->status == Buffer::Status::Acquired); 122 ASSERT(itr->status == Buffer::Status::Acquired);
101 itr->status = Buffer::Status::Free; 123 itr->status = Buffer::Status::Free;
124 free_buffers.push_back(slot);
102 125
103 buffer_wait_event.writable->Signal(); 126 buffer_wait_event.writable->Signal();
104} 127}
105 128
129void BufferQueue::Disconnect() {
130 queue.clear();
131 queue_sequence.clear();
132 id = 1;
133 layer_id = 1;
134}
135
106u32 BufferQueue::Query(QueryType type) { 136u32 BufferQueue::Query(QueryType type) {
107 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); 137 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type));
108 138
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index f4bbfd945..d5f31e567 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -87,6 +87,7 @@ public:
87 Service::Nvidia::MultiFence& multi_fence); 87 Service::Nvidia::MultiFence& multi_fence);
88 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 88 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
89 void ReleaseBuffer(u32 slot); 89 void ReleaseBuffer(u32 slot);
90 void Disconnect();
90 u32 Query(QueryType type); 91 u32 Query(QueryType type);
91 92
92 u32 GetId() const { 93 u32 GetId() const {
@@ -101,6 +102,7 @@ private:
101 u32 id; 102 u32 id;
102 u64 layer_id; 103 u64 layer_id;
103 104
105 std::list<u32> free_buffers;
104 std::vector<Buffer> queue; 106 std::vector<Buffer> queue;
105 std::list<u32> queue_sequence; 107 std::list<u32> queue_sequence;
106 Kernel::EventPair buffer_wait_event; 108 Kernel::EventPair buffer_wait_event;
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 519da74e0..fdc62d05b 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -513,7 +513,8 @@ private:
513 513
514 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 514 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
515 515
516 if (transaction == TransactionId::Connect) { 516 switch (transaction) {
517 case TransactionId::Connect: {
517 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 518 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
518 IGBPConnectResponseParcel response{ 519 IGBPConnectResponseParcel response{
519 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * 520 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
@@ -521,14 +522,18 @@ private:
521 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * 522 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
522 Settings::values.resolution_factor)}; 523 Settings::values.resolution_factor)};
523 ctx.WriteBuffer(response.Serialize()); 524 ctx.WriteBuffer(response.Serialize());
524 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 525 break;
526 }
527 case TransactionId::SetPreallocatedBuffer: {
525 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 528 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
526 529
527 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); 530 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
528 531
529 IGBPSetPreallocatedBufferResponseParcel response{}; 532 IGBPSetPreallocatedBufferResponseParcel response{};
530 ctx.WriteBuffer(response.Serialize()); 533 ctx.WriteBuffer(response.Serialize());
531 } else if (transaction == TransactionId::DequeueBuffer) { 534 break;
535 }
536 case TransactionId::DequeueBuffer: {
532 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 537 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
533 const u32 width{request.data.width}; 538 const u32 width{request.data.width};
534 const u32 height{request.data.height}; 539 const u32 height{request.data.height};
@@ -556,14 +561,18 @@ private:
556 }, 561 },
557 buffer_queue.GetWritableBufferWaitEvent()); 562 buffer_queue.GetWritableBufferWaitEvent());
558 } 563 }
559 } else if (transaction == TransactionId::RequestBuffer) { 564 break;
565 }
566 case TransactionId::RequestBuffer: {
560 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 567 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
561 568
562 auto& buffer = buffer_queue.RequestBuffer(request.slot); 569 auto& buffer = buffer_queue.RequestBuffer(request.slot);
563 570
564 IGBPRequestBufferResponseParcel response{buffer}; 571 IGBPRequestBufferResponseParcel response{buffer};
565 ctx.WriteBuffer(response.Serialize()); 572 ctx.WriteBuffer(response.Serialize());
566 } else if (transaction == TransactionId::QueueBuffer) { 573 break;
574 }
575 case TransactionId::QueueBuffer: {
567 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 576 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
568 577
569 buffer_queue.QueueBuffer(request.data.slot, request.data.transform, 578 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
@@ -572,7 +581,9 @@ private:
572 581
573 IGBPQueueBufferResponseParcel response{1280, 720}; 582 IGBPQueueBufferResponseParcel response{1280, 720};
574 ctx.WriteBuffer(response.Serialize()); 583 ctx.WriteBuffer(response.Serialize());
575 } else if (transaction == TransactionId::Query) { 584 break;
585 }
586 case TransactionId::Query: {
576 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 587 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
577 588
578 const u32 value = 589 const u32 value =
@@ -580,15 +591,30 @@ private:
580 591
581 IGBPQueryResponseParcel response{value}; 592 IGBPQueryResponseParcel response{value};
582 ctx.WriteBuffer(response.Serialize()); 593 ctx.WriteBuffer(response.Serialize());
583 } else if (transaction == TransactionId::CancelBuffer) { 594 break;
595 }
596 case TransactionId::CancelBuffer: {
584 LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); 597 LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer");
585 } else if (transaction == TransactionId::Disconnect || 598 break;
586 transaction == TransactionId::DetachBuffer) { 599 }
600 case TransactionId::Disconnect: {
601 LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect");
602 const auto buffer = ctx.ReadBuffer();
603
604 buffer_queue.Disconnect();
605
606 IGBPEmptyResponseParcel response{};
607 ctx.WriteBuffer(response.Serialize());
608 break;
609 }
610 case TransactionId::DetachBuffer: {
587 const auto buffer = ctx.ReadBuffer(); 611 const auto buffer = ctx.ReadBuffer();
588 612
589 IGBPEmptyResponseParcel response{}; 613 IGBPEmptyResponseParcel response{};
590 ctx.WriteBuffer(response.Serialize()); 614 ctx.WriteBuffer(response.Serialize());
591 } else { 615 break;
616 }
617 default:
592 ASSERT_MSG(false, "Unimplemented"); 618 ASSERT_MSG(false, "Unimplemented");
593 } 619 }
594 620
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f0888327f..6061d37ae 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -242,7 +242,52 @@ struct Memory::Impl {
242 } 242 }
243 case Common::PageType::RasterizerCachedMemory: { 243 case Common::PageType::RasterizerCachedMemory: {
244 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 244 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
245 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); 245 system.GPU().FlushRegion(current_vaddr, copy_amount);
246 std::memcpy(dest_buffer, host_ptr, copy_amount);
247 break;
248 }
249 default:
250 UNREACHABLE();
251 }
252
253 page_index++;
254 page_offset = 0;
255 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
256 remaining_size -= copy_amount;
257 }
258 }
259
260 void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
261 const std::size_t size) {
262 const auto& page_table = process.VMManager().page_table;
263
264 std::size_t remaining_size = size;
265 std::size_t page_index = src_addr >> PAGE_BITS;
266 std::size_t page_offset = src_addr & PAGE_MASK;
267
268 while (remaining_size > 0) {
269 const std::size_t copy_amount =
270 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
271 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
272
273 switch (page_table.attributes[page_index]) {
274 case Common::PageType::Unmapped: {
275 LOG_ERROR(HW_Memory,
276 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
277 current_vaddr, src_addr, size);
278 std::memset(dest_buffer, 0, copy_amount);
279 break;
280 }
281 case Common::PageType::Memory: {
282 DEBUG_ASSERT(page_table.pointers[page_index]);
283
284 const u8* const src_ptr =
285 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
286 std::memcpy(dest_buffer, src_ptr, copy_amount);
287 break;
288 }
289 case Common::PageType::RasterizerCachedMemory: {
290 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
246 std::memcpy(dest_buffer, host_ptr, copy_amount); 291 std::memcpy(dest_buffer, host_ptr, copy_amount);
247 break; 292 break;
248 } 293 }
@@ -261,6 +306,10 @@ struct Memory::Impl {
261 ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); 306 ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
262 } 307 }
263 308
309 void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
310 ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
311 }
312
264 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, 313 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
265 const std::size_t size) { 314 const std::size_t size) {
266 const auto& page_table = process.VMManager().page_table; 315 const auto& page_table = process.VMManager().page_table;
@@ -290,7 +339,50 @@ struct Memory::Impl {
290 } 339 }
291 case Common::PageType::RasterizerCachedMemory: { 340 case Common::PageType::RasterizerCachedMemory: {
292 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 341 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
293 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); 342 system.GPU().InvalidateRegion(current_vaddr, copy_amount);
343 std::memcpy(host_ptr, src_buffer, copy_amount);
344 break;
345 }
346 default:
347 UNREACHABLE();
348 }
349
350 page_index++;
351 page_offset = 0;
352 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
353 remaining_size -= copy_amount;
354 }
355 }
356
357 void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
358 const void* src_buffer, const std::size_t size) {
359 const auto& page_table = process.VMManager().page_table;
360 std::size_t remaining_size = size;
361 std::size_t page_index = dest_addr >> PAGE_BITS;
362 std::size_t page_offset = dest_addr & PAGE_MASK;
363
364 while (remaining_size > 0) {
365 const std::size_t copy_amount =
366 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
367 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
368
369 switch (page_table.attributes[page_index]) {
370 case Common::PageType::Unmapped: {
371 LOG_ERROR(HW_Memory,
372 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
373 current_vaddr, dest_addr, size);
374 break;
375 }
376 case Common::PageType::Memory: {
377 DEBUG_ASSERT(page_table.pointers[page_index]);
378
379 u8* const dest_ptr =
380 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
381 std::memcpy(dest_ptr, src_buffer, copy_amount);
382 break;
383 }
384 case Common::PageType::RasterizerCachedMemory: {
385 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
294 std::memcpy(host_ptr, src_buffer, copy_amount); 386 std::memcpy(host_ptr, src_buffer, copy_amount);
295 break; 387 break;
296 } 388 }
@@ -309,6 +401,10 @@ struct Memory::Impl {
309 WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); 401 WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
310 } 402 }
311 403
404 void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
405 WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
406 }
407
312 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { 408 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
313 const auto& page_table = process.VMManager().page_table; 409 const auto& page_table = process.VMManager().page_table;
314 std::size_t remaining_size = size; 410 std::size_t remaining_size = size;
@@ -337,7 +433,7 @@ struct Memory::Impl {
337 } 433 }
338 case Common::PageType::RasterizerCachedMemory: { 434 case Common::PageType::RasterizerCachedMemory: {
339 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 435 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
340 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); 436 system.GPU().InvalidateRegion(current_vaddr, copy_amount);
341 std::memset(host_ptr, 0, copy_amount); 437 std::memset(host_ptr, 0, copy_amount);
342 break; 438 break;
343 } 439 }
@@ -384,7 +480,7 @@ struct Memory::Impl {
384 } 480 }
385 case Common::PageType::RasterizerCachedMemory: { 481 case Common::PageType::RasterizerCachedMemory: {
386 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 482 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
387 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); 483 system.GPU().FlushRegion(current_vaddr, copy_amount);
388 WriteBlock(process, dest_addr, host_ptr, copy_amount); 484 WriteBlock(process, dest_addr, host_ptr, copy_amount);
389 break; 485 break;
390 } 486 }
@@ -545,7 +641,7 @@ struct Memory::Impl {
545 break; 641 break;
546 case Common::PageType::RasterizerCachedMemory: { 642 case Common::PageType::RasterizerCachedMemory: {
547 const u8* const host_ptr = GetPointerFromVMA(vaddr); 643 const u8* const host_ptr = GetPointerFromVMA(vaddr);
548 system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); 644 system.GPU().FlushRegion(vaddr, sizeof(T));
549 T value; 645 T value;
550 std::memcpy(&value, host_ptr, sizeof(T)); 646 std::memcpy(&value, host_ptr, sizeof(T));
551 return value; 647 return value;
@@ -587,7 +683,7 @@ struct Memory::Impl {
587 break; 683 break;
588 case Common::PageType::RasterizerCachedMemory: { 684 case Common::PageType::RasterizerCachedMemory: {
589 u8* const host_ptr{GetPointerFromVMA(vaddr)}; 685 u8* const host_ptr{GetPointerFromVMA(vaddr)};
590 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); 686 system.GPU().InvalidateRegion(vaddr, sizeof(T));
591 std::memcpy(host_ptr, &data, sizeof(T)); 687 std::memcpy(host_ptr, &data, sizeof(T));
592 break; 688 break;
593 } 689 }
@@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
696 impl->ReadBlock(src_addr, dest_buffer, size); 792 impl->ReadBlock(src_addr, dest_buffer, size);
697} 793}
698 794
795void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
796 void* dest_buffer, const std::size_t size) {
797 impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
798}
799
800void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
801 impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
802}
803
699void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, 804void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
700 std::size_t size) { 805 std::size_t size) {
701 impl->WriteBlock(process, dest_addr, src_buffer, size); 806 impl->WriteBlock(process, dest_addr, src_buffer, size);
@@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
705 impl->WriteBlock(dest_addr, src_buffer, size); 810 impl->WriteBlock(dest_addr, src_buffer, size);
706} 811}
707 812
813void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
814 const void* src_buffer, std::size_t size) {
815 impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
816}
817
818void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
819 const std::size_t size) {
820 impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
821}
822
708void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { 823void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
709 impl->ZeroBlock(process, dest_addr, size); 824 impl->ZeroBlock(process, dest_addr, size);
710} 825}
diff --git a/src/core/memory.h b/src/core/memory.h
index 8913a9da4..b92d678a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -295,6 +295,27 @@ public:
295 std::size_t size); 295 std::size_t size);
296 296
297 /** 297 /**
298 * Reads a contiguous block of bytes from a specified process' address space.
299 * This unsafe version does not trigger GPU flushing.
300 *
301 * @param process The process to read the data from.
302 * @param src_addr The virtual address to begin reading from.
303 * @param dest_buffer The buffer to place the read bytes into.
304 * @param size The amount of data to read, in bytes.
305 *
306 * @note If a size of 0 is specified, then this function reads nothing and
307 * no attempts to access memory are made at all.
308 *
309 * @pre dest_buffer must be at least size bytes in length, otherwise a
310 * buffer overrun will occur.
311 *
312 * @post The range [dest_buffer, size) contains the read bytes from the
313 * process' address space.
314 */
315 void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
316 std::size_t size);
317
318 /**
298 * Reads a contiguous block of bytes from the current process' address space. 319 * Reads a contiguous block of bytes from the current process' address space.
299 * 320 *
300 * @param src_addr The virtual address to begin reading from. 321 * @param src_addr The virtual address to begin reading from.
@@ -313,6 +334,25 @@ public:
313 void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); 334 void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
314 335
315 /** 336 /**
337 * Reads a contiguous block of bytes from the current process' address space.
338 * This unsafe version does not trigger GPU flushing.
339 *
340 * @param src_addr The virtual address to begin reading from.
341 * @param dest_buffer The buffer to place the read bytes into.
342 * @param size The amount of data to read, in bytes.
343 *
344 * @note If a size of 0 is specified, then this function reads nothing and
345 * no attempts to access memory are made at all.
346 *
347 * @pre dest_buffer must be at least size bytes in length, otherwise a
348 * buffer overrun will occur.
349 *
350 * @post The range [dest_buffer, size) contains the read bytes from the
351 * current process' address space.
352 */
353 void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
354
355 /**
316 * Writes a range of bytes into a given process' address space at the specified 356 * Writes a range of bytes into a given process' address space at the specified
317 * virtual address. 357 * virtual address.
318 * 358 *
@@ -336,6 +376,26 @@ public:
336 std::size_t size); 376 std::size_t size);
337 377
338 /** 378 /**
379 * Writes a range of bytes into a given process' address space at the specified
380 * virtual address.
381 * This unsafe version does not invalidate GPU Memory.
382 *
383 * @param process The process to write data into the address space of.
384 * @param dest_addr The destination virtual address to begin writing the data at.
385 * @param src_buffer The data to write into the process' address space.
386 * @param size The size of the data to write, in bytes.
387 *
388 * @post The address range [dest_addr, size) in the process' address space
389 * contains the data that was within src_buffer.
390 *
391 * @post If an attempt is made to write into an unmapped region of memory, the writes
392 * will be ignored and an error will be logged.
393 *
394 */
395 void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
396 std::size_t size);
397
398 /**
339 * Writes a range of bytes into the current process' address space at the specified 399 * Writes a range of bytes into the current process' address space at the specified
340 * virtual address. 400 * virtual address.
341 * 401 *
@@ -357,6 +417,24 @@ public:
357 void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); 417 void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
358 418
359 /** 419 /**
420 * Writes a range of bytes into the current process' address space at the specified
421 * virtual address.
422 * This unsafe version does not invalidate GPU Memory.
423 *
424 * @param dest_addr The destination virtual address to begin writing the data at.
425 * @param src_buffer The data to write into the current process' address space.
426 * @param size The size of the data to write, in bytes.
427 *
428 * @post The address range [dest_addr, size) in the current process' address space
429 * contains the data that was within src_buffer.
430 *
431 * @post If an attempt is made to write into an unmapped region of memory, the writes
432 * will be ignored and an error will be logged.
433 *
434 */
435 void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
436
437 /**
360 * Fills the specified address range within a process' address space with zeroes. 438 * Fills the specified address range within a process' address space with zeroes.
361 * 439 *
362 * @param process The process that will have a portion of its memory zeroed out. 440 * @param process The process that will have a portion of its memory zeroed out.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index effe76a63..f7febd6a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -148,6 +148,7 @@ add_library(video_core STATIC
148 textures/convert.h 148 textures/convert.h
149 textures/decoders.cpp 149 textures/decoders.cpp
150 textures/decoders.h 150 textures/decoders.h
151 textures/texture.cpp
151 textures/texture.h 152 textures/texture.h
152 video_core.cpp 153 video_core.cpp
153 video_core.h 154 video_core.h
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index 4b9193182..e35ee0b67 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,37 +15,29 @@ namespace VideoCommon {
15 15
16class BufferBlock { 16class BufferBlock {
17public: 17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const { 18 bool Overlaps(const VAddr start, const VAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start); 19 return (cpu_addr < end) && (cpu_addr_end > start);
20 } 20 }
21 21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { 22 bool IsInside(const VAddr other_start, const VAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end; 23 return cpu_addr <= other_start && other_end <= cpu_addr_end;
24 } 24 }
25 25
26 u8* GetWritableHostPtr() const { 26 std::size_t GetOffset(const VAddr in_addr) {
27 return FromCacheAddr(cache_addr); 27 return static_cast<std::size_t>(in_addr - cpu_addr);
28 } 28 }
29 29
30 u8* GetWritableHostPtr(std::size_t offset) const { 30 VAddr GetCpuAddr() const {
31 return FromCacheAddr(cache_addr + offset); 31 return cpu_addr;
32 } 32 }
33 33
34 std::size_t GetOffset(const CacheAddr in_addr) { 34 VAddr GetCpuAddrEnd() const {
35 return static_cast<std::size_t>(in_addr - cache_addr); 35 return cpu_addr_end;
36 } 36 }
37 37
38 CacheAddr GetCacheAddr() const { 38 void SetCpuAddr(const VAddr new_addr) {
39 return cache_addr; 39 cpu_addr = new_addr;
40 } 40 cpu_addr_end = new_addr + size;
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 } 41 }
50 42
51 std::size_t GetSize() const { 43 std::size_t GetSize() const {
@@ -61,14 +53,14 @@ public:
61 } 53 }
62 54
63protected: 55protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { 56 explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr); 57 SetCpuAddr(cpu_addr);
66 } 58 }
67 ~BufferBlock() = default; 59 ~BufferBlock() = default;
68 60
69private: 61private:
70 CacheAddr cache_addr{}; 62 VAddr cpu_addr{};
71 CacheAddr cache_addr_end{}; 63 VAddr cpu_addr_end{};
72 std::size_t size{}; 64 std::size_t size{};
73 u64 epoch{}; 65 u64 epoch{};
74}; 66};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 186aca61d..b57c0d4d4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
19#include "common/alignment.h" 19#include "common/alignment.h"
20#include "common/common_types.h" 20#include "common/common_types.h"
21#include "core/core.h" 21#include "core/core.h"
22#include "core/memory.h"
22#include "video_core/buffer_cache/buffer_block.h" 23#include "video_core/buffer_cache/buffer_block.h"
23#include "video_core/buffer_cache/map_interval.h" 24#include "video_core/buffer_cache/map_interval.h"
24#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
@@ -37,28 +38,45 @@ public:
37 bool is_written = false, bool use_fast_cbuf = false) { 38 bool is_written = false, bool use_fast_cbuf = false) {
38 std::lock_guard lock{mutex}; 39 std::lock_guard lock{mutex};
39 40
40 auto& memory_manager = system.GPU().MemoryManager(); 41 const std::optional<VAddr> cpu_addr_opt =
41 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 42 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
42 if (!host_ptr) { 43
44 if (!cpu_addr_opt) {
43 return {GetEmptyBuffer(size), 0}; 45 return {GetEmptyBuffer(size), 0};
44 } 46 }
45 const auto cache_addr = ToCacheAddr(host_ptr); 47
48 VAddr cpu_addr = *cpu_addr_opt;
46 49
47 // Cache management is a big overhead, so only cache entries with a given size. 50 // Cache management is a big overhead, so only cache entries with a given size.
48 // TODO: Figure out which size is the best for given games. 51 // TODO: Figure out which size is the best for given games.
49 constexpr std::size_t max_stream_size = 0x800; 52 constexpr std::size_t max_stream_size = 0x800;
50 if (use_fast_cbuf || size < max_stream_size) { 53 if (use_fast_cbuf || size < max_stream_size) {
51 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { 54 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
55 auto& memory_manager = system.GPU().MemoryManager();
52 if (use_fast_cbuf) { 56 if (use_fast_cbuf) {
53 return ConstBufferUpload(host_ptr, size); 57 if (memory_manager.IsGranularRange(gpu_addr, size)) {
58 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
59 return ConstBufferUpload(host_ptr, size);
60 } else {
61 staging_buffer.resize(size);
62 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
63 return ConstBufferUpload(staging_buffer.data(), size);
64 }
54 } else { 65 } else {
55 return StreamBufferUpload(host_ptr, size, alignment); 66 if (memory_manager.IsGranularRange(gpu_addr, size)) {
67 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
68 return StreamBufferUpload(host_ptr, size, alignment);
69 } else {
70 staging_buffer.resize(size);
71 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
72 return StreamBufferUpload(staging_buffer.data(), size, alignment);
73 }
56 } 74 }
57 } 75 }
58 } 76 }
59 77
60 auto block = GetBlock(cache_addr, size); 78 auto block = GetBlock(cpu_addr, size);
61 auto map = MapAddress(block, gpu_addr, cache_addr, size); 79 auto map = MapAddress(block, gpu_addr, cpu_addr, size);
62 if (is_written) { 80 if (is_written) {
63 map->MarkAsModified(true, GetModifiedTicks()); 81 map->MarkAsModified(true, GetModifiedTicks());
64 if (!map->IsWritten()) { 82 if (!map->IsWritten()) {
@@ -71,7 +89,7 @@ public:
71 } 89 }
72 } 90 }
73 91
74 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); 92 const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
75 93
76 return {ToHandle(block), offset}; 94 return {ToHandle(block), offset};
77 } 95 }
@@ -112,7 +130,7 @@ public:
112 } 130 }
113 131
114 /// Write any cached resources overlapping the specified region back to memory 132 /// Write any cached resources overlapping the specified region back to memory
115 void FlushRegion(CacheAddr addr, std::size_t size) { 133 void FlushRegion(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex}; 134 std::lock_guard lock{mutex};
117 135
118 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 136 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -127,7 +145,7 @@ public:
127 } 145 }
128 146
129 /// Mark the specified region as being invalidated 147 /// Mark the specified region as being invalidated
130 void InvalidateRegion(CacheAddr addr, u64 size) { 148 void InvalidateRegion(VAddr addr, u64 size) {
131 std::lock_guard lock{mutex}; 149 std::lock_guard lock{mutex};
132 150
133 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 151 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -152,7 +170,7 @@ protected:
152 170
153 virtual void WriteBarrier() = 0; 171 virtual void WriteBarrier() = 0;
154 172
155 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; 173 virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
156 174
157 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, 175 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
158 const u8* data) = 0; 176 const u8* data) = 0;
@@ -169,20 +187,17 @@ protected:
169 187
170 /// Register an object into the cache 188 /// Register an object into the cache
171 void Register(const MapInterval& new_map, bool inherit_written = false) { 189 void Register(const MapInterval& new_map, bool inherit_written = false) {
172 const CacheAddr cache_ptr = new_map->GetStart(); 190 const VAddr cpu_addr = new_map->GetStart();
173 const std::optional<VAddr> cpu_addr = 191 if (!cpu_addr) {
174 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
175 if (!cache_ptr || !cpu_addr) {
176 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", 192 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
177 new_map->GetGpuAddress()); 193 new_map->GetGpuAddress());
178 return; 194 return;
179 } 195 }
180 const std::size_t size = new_map->GetEnd() - new_map->GetStart(); 196 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
181 new_map->SetCpuAddress(*cpu_addr);
182 new_map->MarkAsRegistered(true); 197 new_map->MarkAsRegistered(true);
183 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; 198 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
184 mapped_addresses.insert({interval, new_map}); 199 mapped_addresses.insert({interval, new_map});
185 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); 200 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
186 if (inherit_written) { 201 if (inherit_written) {
187 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); 202 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
188 new_map->MarkAsWritten(true); 203 new_map->MarkAsWritten(true);
@@ -192,7 +207,7 @@ protected:
192 /// Unregisters an object from the cache 207 /// Unregisters an object from the cache
193 void Unregister(MapInterval& map) { 208 void Unregister(MapInterval& map) {
194 const std::size_t size = map->GetEnd() - map->GetStart(); 209 const std::size_t size = map->GetEnd() - map->GetStart();
195 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); 210 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
196 map->MarkAsRegistered(false); 211 map->MarkAsRegistered(false);
197 if (map->IsWritten()) { 212 if (map->IsWritten()) {
198 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 213 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -202,32 +217,39 @@ protected:
202 } 217 }
203 218
204private: 219private:
205 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { 220 MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
206 return std::make_shared<MapIntervalBase>(start, end, gpu_addr); 221 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
207 } 222 }
208 223
209 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, 224 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
210 const CacheAddr cache_addr, const std::size_t size) { 225 const std::size_t size) {
211 226
212 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); 227 std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
213 if (overlaps.empty()) { 228 if (overlaps.empty()) {
214 const CacheAddr cache_addr_end = cache_addr + size; 229 auto& memory_manager = system.GPU().MemoryManager();
215 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); 230 const VAddr cpu_addr_end = cpu_addr + size;
216 u8* host_ptr = FromCacheAddr(cache_addr); 231 MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
217 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); 232 if (memory_manager.IsGranularRange(gpu_addr, size)) {
233 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
234 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
235 } else {
236 staging_buffer.resize(size);
237 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
238 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
239 }
218 Register(new_map); 240 Register(new_map);
219 return new_map; 241 return new_map;
220 } 242 }
221 243
222 const CacheAddr cache_addr_end = cache_addr + size; 244 const VAddr cpu_addr_end = cpu_addr + size;
223 if (overlaps.size() == 1) { 245 if (overlaps.size() == 1) {
224 MapInterval& current_map = overlaps[0]; 246 MapInterval& current_map = overlaps[0];
225 if (current_map->IsInside(cache_addr, cache_addr_end)) { 247 if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
226 return current_map; 248 return current_map;
227 } 249 }
228 } 250 }
229 CacheAddr new_start = cache_addr; 251 VAddr new_start = cpu_addr;
230 CacheAddr new_end = cache_addr_end; 252 VAddr new_end = cpu_addr_end;
231 bool write_inheritance = false; 253 bool write_inheritance = false;
232 bool modified_inheritance = false; 254 bool modified_inheritance = false;
233 // Calculate new buffer parameters 255 // Calculate new buffer parameters
@@ -237,7 +259,7 @@ private:
237 write_inheritance |= overlap->IsWritten(); 259 write_inheritance |= overlap->IsWritten();
238 modified_inheritance |= overlap->IsModified(); 260 modified_inheritance |= overlap->IsModified();
239 } 261 }
240 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; 262 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
241 for (auto& overlap : overlaps) { 263 for (auto& overlap : overlaps) {
242 Unregister(overlap); 264 Unregister(overlap);
243 } 265 }
@@ -250,7 +272,7 @@ private:
250 return new_map; 272 return new_map;
251 } 273 }
252 274
253 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, 275 void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
254 std::vector<MapInterval>& overlaps) { 276 std::vector<MapInterval>& overlaps) {
255 const IntervalType base_interval{start, end}; 277 const IntervalType base_interval{start, end};
256 IntervalSet interval_set{}; 278 IntervalSet interval_set{};
@@ -262,13 +284,15 @@ private:
262 for (auto& interval : interval_set) { 284 for (auto& interval : interval_set) {
263 std::size_t size = interval.upper() - interval.lower(); 285 std::size_t size = interval.upper() - interval.lower();
264 if (size > 0) { 286 if (size > 0) {
265 u8* host_ptr = FromCacheAddr(interval.lower()); 287 staging_buffer.resize(size);
266 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); 288 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
289 UploadBlockData(block, block->GetOffset(interval.lower()), size,
290 staging_buffer.data());
267 } 291 }
268 } 292 }
269 } 293 }
270 294
271 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { 295 std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
272 if (size == 0) { 296 if (size == 0) {
273 return {}; 297 return {};
274 } 298 }
@@ -290,8 +314,9 @@ private:
290 void FlushMap(MapInterval map) { 314 void FlushMap(MapInterval map) {
291 std::size_t size = map->GetEnd() - map->GetStart(); 315 std::size_t size = map->GetEnd() - map->GetStart();
292 TBuffer block = blocks[map->GetStart() >> block_page_bits]; 316 TBuffer block = blocks[map->GetStart() >> block_page_bits];
293 u8* host_ptr = FromCacheAddr(map->GetStart()); 317 staging_buffer.resize(size);
294 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); 318 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
319 system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
295 map->MarkAsModified(false, 0); 320 map->MarkAsModified(false, 0);
296 } 321 }
297 322
@@ -316,14 +341,14 @@ private:
316 TBuffer EnlargeBlock(TBuffer buffer) { 341 TBuffer EnlargeBlock(TBuffer buffer) {
317 const std::size_t old_size = buffer->GetSize(); 342 const std::size_t old_size = buffer->GetSize();
318 const std::size_t new_size = old_size + block_page_size; 343 const std::size_t new_size = old_size + block_page_size;
319 const CacheAddr cache_addr = buffer->GetCacheAddr(); 344 const VAddr cpu_addr = buffer->GetCpuAddr();
320 TBuffer new_buffer = CreateBlock(cache_addr, new_size); 345 TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
321 CopyBlock(buffer, new_buffer, 0, 0, old_size); 346 CopyBlock(buffer, new_buffer, 0, 0, old_size);
322 buffer->SetEpoch(epoch); 347 buffer->SetEpoch(epoch);
323 pending_destruction.push_back(buffer); 348 pending_destruction.push_back(buffer);
324 const CacheAddr cache_addr_end = cache_addr + new_size - 1; 349 const VAddr cpu_addr_end = cpu_addr + new_size - 1;
325 u64 page_start = cache_addr >> block_page_bits; 350 u64 page_start = cpu_addr >> block_page_bits;
326 const u64 page_end = cache_addr_end >> block_page_bits; 351 const u64 page_end = cpu_addr_end >> block_page_bits;
327 while (page_start <= page_end) { 352 while (page_start <= page_end) {
328 blocks[page_start] = new_buffer; 353 blocks[page_start] = new_buffer;
329 ++page_start; 354 ++page_start;
@@ -334,9 +359,9 @@ private:
334 TBuffer MergeBlocks(TBuffer first, TBuffer second) { 359 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
335 const std::size_t size_1 = first->GetSize(); 360 const std::size_t size_1 = first->GetSize();
336 const std::size_t size_2 = second->GetSize(); 361 const std::size_t size_2 = second->GetSize();
337 const CacheAddr first_addr = first->GetCacheAddr(); 362 const VAddr first_addr = first->GetCpuAddr();
338 const CacheAddr second_addr = second->GetCacheAddr(); 363 const VAddr second_addr = second->GetCpuAddr();
339 const CacheAddr new_addr = std::min(first_addr, second_addr); 364 const VAddr new_addr = std::min(first_addr, second_addr);
340 const std::size_t new_size = size_1 + size_2; 365 const std::size_t new_size = size_1 + size_2;
341 TBuffer new_buffer = CreateBlock(new_addr, new_size); 366 TBuffer new_buffer = CreateBlock(new_addr, new_size);
342 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); 367 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@@ -345,9 +370,9 @@ private:
345 second->SetEpoch(epoch); 370 second->SetEpoch(epoch);
346 pending_destruction.push_back(first); 371 pending_destruction.push_back(first);
347 pending_destruction.push_back(second); 372 pending_destruction.push_back(second);
348 const CacheAddr cache_addr_end = new_addr + new_size - 1; 373 const VAddr cpu_addr_end = new_addr + new_size - 1;
349 u64 page_start = new_addr >> block_page_bits; 374 u64 page_start = new_addr >> block_page_bits;
350 const u64 page_end = cache_addr_end >> block_page_bits; 375 const u64 page_end = cpu_addr_end >> block_page_bits;
351 while (page_start <= page_end) { 376 while (page_start <= page_end) {
352 blocks[page_start] = new_buffer; 377 blocks[page_start] = new_buffer;
353 ++page_start; 378 ++page_start;
@@ -355,18 +380,18 @@ private:
355 return new_buffer; 380 return new_buffer;
356 } 381 }
357 382
358 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { 383 TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
359 TBuffer found{}; 384 TBuffer found{};
360 const CacheAddr cache_addr_end = cache_addr + size - 1; 385 const VAddr cpu_addr_end = cpu_addr + size - 1;
361 u64 page_start = cache_addr >> block_page_bits; 386 u64 page_start = cpu_addr >> block_page_bits;
362 const u64 page_end = cache_addr_end >> block_page_bits; 387 const u64 page_end = cpu_addr_end >> block_page_bits;
363 while (page_start <= page_end) { 388 while (page_start <= page_end) {
364 auto it = blocks.find(page_start); 389 auto it = blocks.find(page_start);
365 if (it == blocks.end()) { 390 if (it == blocks.end()) {
366 if (found) { 391 if (found) {
367 found = EnlargeBlock(found); 392 found = EnlargeBlock(found);
368 } else { 393 } else {
369 const CacheAddr start_addr = (page_start << block_page_bits); 394 const VAddr start_addr = (page_start << block_page_bits);
370 found = CreateBlock(start_addr, block_page_size); 395 found = CreateBlock(start_addr, block_page_size);
371 blocks[page_start] = found; 396 blocks[page_start] = found;
372 } 397 }
@@ -386,7 +411,7 @@ private:
386 return found; 411 return found;
387 } 412 }
388 413
389 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { 414 void MarkRegionAsWritten(const VAddr start, const VAddr end) {
390 u64 page_start = start >> write_page_bit; 415 u64 page_start = start >> write_page_bit;
391 const u64 page_end = end >> write_page_bit; 416 const u64 page_end = end >> write_page_bit;
392 while (page_start <= page_end) { 417 while (page_start <= page_end) {
@@ -400,7 +425,7 @@ private:
400 } 425 }
401 } 426 }
402 427
403 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { 428 void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
404 u64 page_start = start >> write_page_bit; 429 u64 page_start = start >> write_page_bit;
405 const u64 page_end = end >> write_page_bit; 430 const u64 page_end = end >> write_page_bit;
406 while (page_start <= page_end) { 431 while (page_start <= page_end) {
@@ -416,7 +441,7 @@ private:
416 } 441 }
417 } 442 }
418 443
419 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { 444 bool IsRegionWritten(const VAddr start, const VAddr end) const {
420 u64 page_start = start >> write_page_bit; 445 u64 page_start = start >> write_page_bit;
421 const u64 page_end = end >> write_page_bit; 446 const u64 page_end = end >> write_page_bit;
422 while (page_start <= page_end) { 447 while (page_start <= page_end) {
@@ -440,8 +465,8 @@ private:
440 u64 buffer_offset = 0; 465 u64 buffer_offset = 0;
441 u64 buffer_offset_base = 0; 466 u64 buffer_offset_base = 0;
442 467
443 using IntervalSet = boost::icl::interval_set<CacheAddr>; 468 using IntervalSet = boost::icl::interval_set<VAddr>;
444 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; 469 using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
445 using IntervalType = typename IntervalCache::interval_type; 470 using IntervalType = typename IntervalCache::interval_type;
446 IntervalCache mapped_addresses; 471 IntervalCache mapped_addresses;
447 472
@@ -456,6 +481,8 @@ private:
456 u64 epoch = 0; 481 u64 epoch = 0;
457 u64 modified_ticks = 0; 482 u64 modified_ticks = 0;
458 483
484 std::vector<u8> staging_buffer;
485
459 std::recursive_mutex mutex; 486 std::recursive_mutex mutex;
460}; 487};
461 488
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 3a104d5cd..b0956029d 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -11,7 +11,7 @@ namespace VideoCommon {
11 11
12class MapIntervalBase { 12class MapIntervalBase {
13public: 13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) 14 MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {} 15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16 16
17 void SetCpuAddress(VAddr new_cpu_addr) { 17 void SetCpuAddress(VAddr new_cpu_addr) {
@@ -26,7 +26,7 @@ public:
26 return gpu_addr; 26 return gpu_addr;
27 } 27 }
28 28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { 29 bool IsInside(const VAddr other_start, const VAddr other_end) const {
30 return (start <= other_start && other_end <= end); 30 return (start <= other_start && other_end <= end);
31 } 31 }
32 32
@@ -46,11 +46,11 @@ public:
46 return is_registered; 46 return is_registered;
47 } 47 }
48 48
49 CacheAddr GetStart() const { 49 VAddr GetStart() const {
50 return start; 50 return start;
51 } 51 }
52 52
53 CacheAddr GetEnd() const { 53 VAddr GetEnd() const {
54 return end; 54 return end;
55 } 55 }
56 56
@@ -76,8 +76,8 @@ public:
76 } 76 }
77 77
78private: 78private:
79 CacheAddr start; 79 VAddr start;
80 CacheAddr end; 80 VAddr end;
81 GPUVAddr gpu_addr; 81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{}; 82 VAddr cpu_addr{};
83 bool is_written{}; 83 bool is_written{};
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d24c9f657..4637ddabd 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -312,6 +312,35 @@ public:
312 } 312 }
313 }; 313 };
314 314
315 struct MsaaSampleLocation {
316 union {
317 BitField<0, 4, u32> x0;
318 BitField<4, 4, u32> y0;
319 BitField<8, 4, u32> x1;
320 BitField<12, 4, u32> y1;
321 BitField<16, 4, u32> x2;
322 BitField<20, 4, u32> y2;
323 BitField<24, 4, u32> x3;
324 BitField<28, 4, u32> y3;
325 };
326
327 constexpr std::pair<u32, u32> Location(int index) const {
328 switch (index) {
329 case 0:
330 return {x0, y0};
331 case 1:
332 return {x1, y1};
333 case 2:
334 return {x2, y2};
335 case 3:
336 return {x3, y3};
337 default:
338 UNREACHABLE();
339 return {0, 0};
340 }
341 }
342 };
343
315 enum class DepthMode : u32 { 344 enum class DepthMode : u32 {
316 MinusOneToOne = 0, 345 MinusOneToOne = 0,
317 ZeroToOne = 1, 346 ZeroToOne = 1,
@@ -793,7 +822,13 @@ public:
793 822
794 u32 rt_separate_frag_data; 823 u32 rt_separate_frag_data;
795 824
796 INSERT_UNION_PADDING_WORDS(0xC); 825 INSERT_UNION_PADDING_WORDS(0x1);
826
827 u32 multisample_raster_enable;
828 u32 multisample_raster_samples;
829 std::array<u32, 4> multisample_sample_mask;
830
831 INSERT_UNION_PADDING_WORDS(0x5);
797 832
798 struct { 833 struct {
799 u32 address_high; 834 u32 address_high;
@@ -830,7 +865,16 @@ public:
830 865
831 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 866 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
832 867
833 INSERT_UNION_PADDING_WORDS(0xF); 868 std::array<MsaaSampleLocation, 4> multisample_sample_locations;
869
870 INSERT_UNION_PADDING_WORDS(0x2);
871
872 union {
873 BitField<0, 1, u32> enable;
874 BitField<4, 3, u32> target;
875 } multisample_coverage_to_color;
876
877 INSERT_UNION_PADDING_WORDS(0x8);
834 878
835 struct { 879 struct {
836 union { 880 union {
@@ -943,7 +987,7 @@ public:
943 987
944 CounterReset counter_reset; 988 CounterReset counter_reset;
945 989
946 INSERT_UNION_PADDING_WORDS(0x1); 990 u32 multisample_enable;
947 991
948 u32 zeta_enable; 992 u32 zeta_enable;
949 993
@@ -1007,7 +1051,11 @@ public:
1007 1051
1008 float polygon_offset_units; 1052 float polygon_offset_units;
1009 1053
1010 INSERT_UNION_PADDING_WORDS(0x11); 1054 INSERT_UNION_PADDING_WORDS(0x4);
1055
1056 Tegra::Texture::MsaaMode multisample_mode;
1057
1058 INSERT_UNION_PADDING_WORDS(0xC);
1011 1059
1012 union { 1060 union {
1013 BitField<2, 1, u32> coord_origin; 1061 BitField<2, 1, u32> coord_origin;
@@ -1507,12 +1555,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1507ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1555ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1508ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1556ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1509ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1557ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1510ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1511ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1558ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1559ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1560ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1561ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1562ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1512ASSERT_REG_POSITION(zeta, 0x3F8); 1563ASSERT_REG_POSITION(zeta, 0x3F8);
1513ASSERT_REG_POSITION(clear_flags, 0x43E); 1564ASSERT_REG_POSITION(clear_flags, 0x43E);
1514ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1565ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1515ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1566ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1567ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
1568ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1516ASSERT_REG_POSITION(rt_control, 0x487); 1569ASSERT_REG_POSITION(rt_control, 0x487);
1517ASSERT_REG_POSITION(zeta_width, 0x48a); 1570ASSERT_REG_POSITION(zeta_width, 0x48a);
1518ASSERT_REG_POSITION(zeta_height, 0x48b); 1571ASSERT_REG_POSITION(zeta_height, 0x48b);
@@ -1545,11 +1598,12 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545);
1545ASSERT_REG_POSITION(point_size, 0x546); 1598ASSERT_REG_POSITION(point_size, 0x546);
1546ASSERT_REG_POSITION(point_sprite_enable, 0x548); 1599ASSERT_REG_POSITION(point_sprite_enable, 0x548);
1547ASSERT_REG_POSITION(counter_reset, 0x54C); 1600ASSERT_REG_POSITION(counter_reset, 0x54C);
1601ASSERT_REG_POSITION(multisample_enable, 0x54D);
1548ASSERT_REG_POSITION(zeta_enable, 0x54E); 1602ASSERT_REG_POSITION(zeta_enable, 0x54E);
1549ASSERT_REG_POSITION(multisample_control, 0x54F); 1603ASSERT_REG_POSITION(multisample_control, 0x54F);
1550ASSERT_REG_POSITION(condition, 0x554); 1604ASSERT_REG_POSITION(condition, 0x554);
1551ASSERT_REG_POSITION(tsc, 0x557); 1605ASSERT_REG_POSITION(tsc, 0x557);
1552ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1606ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
1553ASSERT_REG_POSITION(tic, 0x55D); 1607ASSERT_REG_POSITION(tic, 0x55D);
1554ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); 1608ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
1555ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); 1609ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1558,6 +1612,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
1558ASSERT_REG_POSITION(stencil_back_func_func, 0x569); 1612ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
1559ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); 1613ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
1560ASSERT_REG_POSITION(polygon_offset_units, 0x56F); 1614ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
1615ASSERT_REG_POSITION(multisample_mode, 0x574);
1561ASSERT_REG_POSITION(point_coord_replace, 0x581); 1616ASSERT_REG_POSITION(point_coord_replace, 0x581);
1562ASSERT_REG_POSITION(code_address, 0x582); 1617ASSERT_REG_POSITION(code_address, 0x582);
1563ASSERT_REG_POSITION(draw, 0x585); 1618ASSERT_REG_POSITION(draw, 0x585);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 930b605af..c66c66f6c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -290,6 +290,23 @@ enum class VmadShr : u64 {
290 Shr15 = 2, 290 Shr15 = 2,
291}; 291};
292 292
293enum class VmnmxType : u64 {
294 Bits8,
295 Bits16,
296 Bits32,
297};
298
299enum class VmnmxOperation : u64 {
300 Mrg_16H = 0,
301 Mrg_16L = 1,
302 Mrg_8B0 = 2,
303 Mrg_8B2 = 3,
304 Acc = 4,
305 Min = 5,
306 Max = 6,
307 Nop = 7,
308};
309
293enum class XmadMode : u64 { 310enum class XmadMode : u64 {
294 None = 0, 311 None = 0,
295 CLo = 1, 312 CLo = 1,
@@ -1651,6 +1668,42 @@ union Instruction {
1651 } vmad; 1668 } vmad;
1652 1669
1653 union { 1670 union {
1671 BitField<54, 1, u64> is_dest_signed;
1672 BitField<48, 1, u64> is_src_a_signed;
1673 BitField<49, 1, u64> is_src_b_signed;
1674 BitField<37, 2, u64> src_format_a;
1675 BitField<29, 2, u64> src_format_b;
1676 BitField<56, 1, u64> mx;
1677 BitField<55, 1, u64> sat;
1678 BitField<36, 2, u64> selector_a;
1679 BitField<28, 2, u64> selector_b;
1680 BitField<50, 1, u64> is_op_b_register;
1681 BitField<51, 3, VmnmxOperation> operation;
1682
1683 VmnmxType SourceFormatA() const {
1684 switch (src_format_a) {
1685 case 0b11:
1686 return VmnmxType::Bits32;
1687 case 0b10:
1688 return VmnmxType::Bits16;
1689 default:
1690 return VmnmxType::Bits8;
1691 }
1692 }
1693
1694 VmnmxType SourceFormatB() const {
1695 switch (src_format_b) {
1696 case 0b11:
1697 return VmnmxType::Bits32;
1698 case 0b10:
1699 return VmnmxType::Bits16;
1700 default:
1701 return VmnmxType::Bits8;
1702 }
1703 }
1704 } vmnmx;
1705
1706 union {
1654 BitField<20, 16, u64> imm20_16; 1707 BitField<20, 16, u64> imm20_16;
1655 BitField<35, 1, u64> high_b_rr; // used on RR 1708 BitField<35, 1, u64> high_b_rr; // used on RR
1656 BitField<36, 1, u64> product_shift_left; 1709 BitField<36, 1, u64> product_shift_left;
@@ -1712,6 +1765,7 @@ public:
1712 BRK, 1765 BRK,
1713 DEPBAR, 1766 DEPBAR,
1714 VOTE, 1767 VOTE,
1768 VOTE_VTG,
1715 SHFL, 1769 SHFL,
1716 FSWZADD, 1770 FSWZADD,
1717 BFE_C, 1771 BFE_C,
@@ -1758,9 +1812,11 @@ public:
1758 IPA, 1812 IPA,
1759 OUT_R, // Emit vertex/primitive 1813 OUT_R, // Emit vertex/primitive
1760 ISBERD, 1814 ISBERD,
1815 BAR,
1761 MEMBAR, 1816 MEMBAR,
1762 VMAD, 1817 VMAD,
1763 VSETP, 1818 VSETP,
1819 VMNMX,
1764 FFMA_IMM, // Fused Multiply and Add 1820 FFMA_IMM, // Fused Multiply and Add
1765 FFMA_CR, 1821 FFMA_CR,
1766 FFMA_RC, 1822 FFMA_RC,
@@ -1842,7 +1898,7 @@ public:
1842 MOV_C, 1898 MOV_C,
1843 MOV_R, 1899 MOV_R,
1844 MOV_IMM, 1900 MOV_IMM,
1845 MOV_SYS, 1901 S2R,
1846 MOV32_IMM, 1902 MOV32_IMM,
1847 SHL_C, 1903 SHL_C,
1848 SHL_R, 1904 SHL_R,
@@ -2026,6 +2082,7 @@ private:
2026 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 2082 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2027 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 2083 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2028 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 2084 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2085 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
2029 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), 2086 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
2030 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), 2087 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
2031 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 2088 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
@@ -2063,9 +2120,11 @@ private:
2063 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 2120 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
2064 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 2121 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
2065 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 2122 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
2123 INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
2066 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), 2124 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2067 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), 2125 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2068 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), 2126 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2127 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2069 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 2128 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2070 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 2129 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2071 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 2130 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -2134,7 +2193,7 @@ private:
2134 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), 2193 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
2135 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), 2194 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
2136 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), 2195 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
2137 INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"), 2196 INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
2138 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), 2197 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
2139 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), 2198 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
2140 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), 2199 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
@@ -2166,7 +2225,7 @@ private:
2166 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), 2225 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
2167 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), 2226 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
2168 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), 2227 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
2169 INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), 2228 INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
2170 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), 2229 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
2171 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), 2230 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
2172 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), 2231 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ced9d7e28..1a2d747be 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -270,13 +270,13 @@ public:
270 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 270 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
271 271
272 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 272 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
273 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 273 virtual void FlushRegion(VAddr addr, u64 size) = 0;
274 274
275 /// Notify rasterizer that any caches of the specified region should be invalidated 275 /// Notify rasterizer that any caches of the specified region should be invalidated
276 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; 276 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
277 277
278 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 278 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
279 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 279 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
280 280
281protected: 281protected:
282 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; 282 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 925be8d7b..cc434faf7 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
30 gpu_thread.SwapBuffers(framebuffer); 30 gpu_thread.SwapBuffers(framebuffer);
31} 31}
32 32
33void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { 33void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
34 gpu_thread.FlushRegion(addr, size); 34 gpu_thread.FlushRegion(addr, size);
35} 35}
36 36
37void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { 37void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
38 gpu_thread.InvalidateRegion(addr, size); 38 gpu_thread.InvalidateRegion(addr, size);
39} 39}
40 40
41void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 41void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
42 gpu_thread.FlushAndInvalidateRegion(addr, size); 42 gpu_thread.FlushAndInvalidateRegion(addr, size);
43} 43}
44 44
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 265c62758..03fd0eef0 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -27,9 +27,9 @@ public:
27 void Start() override; 27 void Start() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override; 28 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
30 void FlushRegion(CacheAddr addr, u64 size) override; 30 void FlushRegion(VAddr addr, u64 size) override;
31 void InvalidateRegion(CacheAddr addr, u64 size) override; 31 void InvalidateRegion(VAddr addr, u64 size) override;
32 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
33 void WaitIdle() const override; 33 void WaitIdle() const override;
34 34
35protected: 35protected:
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index bd5278a5c..6f38a672a 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
26 renderer->SwapBuffers(framebuffer); 26 renderer->SwapBuffers(framebuffer);
27} 27}
28 28
29void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { 29void GPUSynch::FlushRegion(VAddr addr, u64 size) {
30 renderer->Rasterizer().FlushRegion(addr, size); 30 renderer->Rasterizer().FlushRegion(addr, size);
31} 31}
32 32
33void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { 33void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
34 renderer->Rasterizer().InvalidateRegion(addr, size); 34 renderer->Rasterizer().InvalidateRegion(addr, size);
35} 35}
36 36
37void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 37void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
38 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); 38 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
39} 39}
40 40
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 866a94c8c..4a6e9a01d 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -26,9 +26,9 @@ public:
26 void Start() override; 26 void Start() override;
27 void PushGPUEntries(Tegra::CommandList&& entries) override; 27 void PushGPUEntries(Tegra::CommandList&& entries) override;
28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override; 29 void FlushRegion(VAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override; 30 void InvalidateRegion(VAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
32 void WaitIdle() const override {} 32 void WaitIdle() const override {}
33 33
34protected: 34protected:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 270c7ae0d..10cda686b 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
77 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 77 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
78} 78}
79 79
80void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { 80void ThreadManager::FlushRegion(VAddr addr, u64 size) {
81 PushCommand(FlushRegionCommand(addr, size)); 81 PushCommand(FlushRegionCommand(addr, size));
82} 82}
83 83
84void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 84void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
85 system.Renderer().Rasterizer().InvalidateRegion(addr, size); 85 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
86} 86}
87 87
88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 88void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size); 90 InvalidateRegion(addr, size);
91} 91}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index be36c580e..cd74ad330 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
47 47
48/// Command to signal to the GPU thread to flush a region 48/// Command to signal to the GPU thread to flush a region
49struct FlushRegionCommand final { 49struct FlushRegionCommand final {
50 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} 50 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
51 51
52 CacheAddr addr; 52 VAddr addr;
53 u64 size; 53 u64 size;
54}; 54};
55 55
56/// Command to signal to the GPU thread to invalidate a region 56/// Command to signal to the GPU thread to invalidate a region
57struct InvalidateRegionCommand final { 57struct InvalidateRegionCommand final {
58 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} 58 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
59 59
60 CacheAddr addr; 60 VAddr addr;
61 u64 size; 61 u64 size;
62}; 62};
63 63
64/// Command to signal to the GPU thread to flush and invalidate a region 64/// Command to signal to the GPU thread to flush and invalidate a region
65struct FlushAndInvalidateRegionCommand final { 65struct FlushAndInvalidateRegionCommand final {
66 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) 66 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
67 : addr{addr}, size{size} {} 67 : addr{addr}, size{size} {}
68 68
69 CacheAddr addr; 69 VAddr addr;
70 u64 size; 70 u64 size;
71}; 71};
72 72
@@ -111,13 +111,13 @@ public:
111 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 111 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
112 112
113 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 113 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
114 void FlushRegion(CacheAddr addr, u64 size); 114 void FlushRegion(VAddr addr, u64 size);
115 115
116 /// Notify rasterizer that any caches of the specified region should be invalidated 116 /// Notify rasterizer that any caches of the specified region should be invalidated
117 void InvalidateRegion(CacheAddr addr, u64 size); 117 void InvalidateRegion(VAddr addr, u64 size);
118 118
119 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 119 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
120 void FlushAndInvalidateRegion(CacheAddr addr, u64 size); 120 void FlushAndInvalidateRegion(VAddr addr, u64 size);
121 121
122 // Wait until the gpu thread is idle. 122 // Wait until the gpu thread is idle.
123 void WaitIdle() const; 123 void WaitIdle() const;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f5d33f27a..a3389d0d2 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
81 ASSERT((gpu_addr & page_mask) == 0); 81 ASSERT((gpu_addr & page_mask) == 0);
82 82
83 const u64 aligned_size{Common::AlignUp(size, page_size)}; 83 const u64 aligned_size{Common::AlignUp(size, page_size)};
84 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
85 const auto cpu_addr = GpuToCpuAddress(gpu_addr); 84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
86 ASSERT(cpu_addr); 85 ASSERT(cpu_addr);
87 86
88 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 87 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
89 system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); 88 system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
90 89
91 UnmapRange(gpu_addr, aligned_size); 90 UnmapRange(gpu_addr, aligned_size);
92 ASSERT(system.CurrentProcess() 91 ASSERT(system.CurrentProcess()
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
140 return {}; 139 return {};
141 } 140 }
142 141
143 const u8* page_pointer{page_table.pointers[addr >> page_bits]}; 142 const u8* page_pointer{GetPointer(addr)};
144 if (page_pointer) { 143 if (page_pointer) {
145 // NOTE: Avoid adding any extra logic to this fast-path block 144 // NOTE: Avoid adding any extra logic to this fast-path block
146 T value; 145 T value;
147 std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); 146 std::memcpy(&value, page_pointer, sizeof(T));
148 return value; 147 return value;
149 } 148 }
150 149
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
167 return; 166 return;
168 } 167 }
169 168
170 u8* page_pointer{page_table.pointers[addr >> page_bits]}; 169 u8* page_pointer{GetPointer(addr)};
171 if (page_pointer) { 170 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block 171 // NOTE: Avoid adding any extra logic to this fast-path block
173 std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); 172 std::memcpy(page_pointer, &data, sizeof(T));
174 return; 173 return;
175 } 174 }
176 175
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
201 return {}; 200 return {};
202 } 201 }
203 202
204 u8* const page_pointer{page_table.pointers[addr >> page_bits]}; 203 auto& memory = system.Memory();
205 if (page_pointer != nullptr) { 204
206 return page_pointer + (addr & page_mask); 205 const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
206
207 if (page_addr != 0) {
208 return memory.GetPointer(page_addr + (addr & page_mask));
207 } 209 }
208 210
209 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); 211 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
215 return {}; 217 return {};
216 } 218 }
217 219
218 const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; 220 const auto& memory = system.Memory();
219 if (page_pointer != nullptr) { 221
220 return page_pointer + (addr & page_mask); 222 const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
223
224 if (page_addr != 0) {
225 return memory.GetPointer(page_addr + (addr & page_mask));
221 } 226 }
222 227
223 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); 228 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
238 std::size_t page_index{src_addr >> page_bits}; 243 std::size_t page_index{src_addr >> page_bits};
239 std::size_t page_offset{src_addr & page_mask}; 244 std::size_t page_offset{src_addr & page_mask};
240 245
246 auto& memory = system.Memory();
247
241 while (remaining_size > 0) { 248 while (remaining_size > 0) {
242 const std::size_t copy_amount{ 249 const std::size_t copy_amount{
243 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 250 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
244 251
245 switch (page_table.attributes[page_index]) { 252 switch (page_table.attributes[page_index]) {
246 case Common::PageType::Memory: { 253 case Common::PageType::Memory: {
247 const u8* src_ptr{page_table.pointers[page_index] + page_offset}; 254 const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
248 // Flush must happen on the rasterizer interface, such that memory is always synchronous 255 // Flush must happen on the rasterizer interface, such that memory is always synchronous
249 // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. 256 // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
250 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); 257 rasterizer.FlushRegion(src_addr, copy_amount);
251 std::memcpy(dest_buffer, src_ptr, copy_amount); 258 memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
252 break; 259 break;
253 } 260 }
254 default: 261 default:
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
268 std::size_t page_index{src_addr >> page_bits}; 275 std::size_t page_index{src_addr >> page_bits};
269 std::size_t page_offset{src_addr & page_mask}; 276 std::size_t page_offset{src_addr & page_mask};
270 277
278 auto& memory = system.Memory();
279
271 while (remaining_size > 0) { 280 while (remaining_size > 0) {
272 const std::size_t copy_amount{ 281 const std::size_t copy_amount{
273 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 282 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
274 const u8* page_pointer = page_table.pointers[page_index]; 283 const u8* page_pointer = page_table.pointers[page_index];
275 if (page_pointer) { 284 if (page_pointer) {
276 const u8* src_ptr{page_pointer + page_offset}; 285 const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
277 std::memcpy(dest_buffer, src_ptr, copy_amount); 286 memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
278 } else { 287 } else {
279 std::memset(dest_buffer, 0, copy_amount); 288 std::memset(dest_buffer, 0, copy_amount);
280 } 289 }
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
290 std::size_t page_index{dest_addr >> page_bits}; 299 std::size_t page_index{dest_addr >> page_bits};
291 std::size_t page_offset{dest_addr & page_mask}; 300 std::size_t page_offset{dest_addr & page_mask};
292 301
302 auto& memory = system.Memory();
303
293 while (remaining_size > 0) { 304 while (remaining_size > 0) {
294 const std::size_t copy_amount{ 305 const std::size_t copy_amount{
295 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 306 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
296 307
297 switch (page_table.attributes[page_index]) { 308 switch (page_table.attributes[page_index]) {
298 case Common::PageType::Memory: { 309 case Common::PageType::Memory: {
299 u8* dest_ptr{page_table.pointers[page_index] + page_offset}; 310 const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
300 // Invalidate must happen on the rasterizer interface, such that memory is always 311 // Invalidate must happen on the rasterizer interface, such that memory is always
301 // synchronous when it is written (even when in asynchronous GPU mode). 312 // synchronous when it is written (even when in asynchronous GPU mode).
302 rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); 313 rasterizer.InvalidateRegion(dest_addr, copy_amount);
303 std::memcpy(dest_ptr, src_buffer, copy_amount); 314 memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
304 break; 315 break;
305 } 316 }
306 default: 317 default:
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
320 std::size_t page_index{dest_addr >> page_bits}; 331 std::size_t page_index{dest_addr >> page_bits};
321 std::size_t page_offset{dest_addr & page_mask}; 332 std::size_t page_offset{dest_addr & page_mask};
322 333
334 auto& memory = system.Memory();
335
323 while (remaining_size > 0) { 336 while (remaining_size > 0) {
324 const std::size_t copy_amount{ 337 const std::size_t copy_amount{
325 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 338 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
326 u8* page_pointer = page_table.pointers[page_index]; 339 u8* page_pointer = page_table.pointers[page_index];
327 if (page_pointer) { 340 if (page_pointer) {
328 u8* dest_ptr{page_pointer + page_offset}; 341 const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
329 std::memcpy(dest_ptr, src_buffer, copy_amount); 342 memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
330 } 343 }
331 page_index++; 344 page_index++;
332 page_offset = 0; 345 page_offset = 0;
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
336} 349}
337 350
338void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 351void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
339 std::size_t remaining_size{size}; 352 std::vector<u8> tmp_buffer(size);
340 std::size_t page_index{src_addr >> page_bits}; 353 ReadBlock(src_addr, tmp_buffer.data(), size);
341 std::size_t page_offset{src_addr & page_mask}; 354 WriteBlock(dest_addr, tmp_buffer.data(), size);
342
343 while (remaining_size > 0) {
344 const std::size_t copy_amount{
345 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
346
347 switch (page_table.attributes[page_index]) {
348 case Common::PageType::Memory: {
349 // Flush must happen on the rasterizer interface, such that memory is always synchronous
350 // when it is copied (even when in asynchronous GPU mode).
351 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
352 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
353 WriteBlock(dest_addr, src_ptr, copy_amount);
354 break;
355 }
356 default:
357 UNREACHABLE();
358 }
359
360 page_index++;
361 page_offset = 0;
362 dest_addr += static_cast<VAddr>(copy_amount);
363 src_addr += static_cast<VAddr>(copy_amount);
364 remaining_size -= copy_amount;
365 }
366} 355}
367 356
368void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 357void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
371 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); 360 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
372} 361}
373 362
363bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
364 const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
365 const std::size_t page = (addr & Memory::PAGE_MASK) + size;
366 return page <= Memory::PAGE_SIZE;
367}
368
374void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, 369void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
375 VAddr backing_addr) { 370 VAddr backing_addr) {
376 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, 371 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 073bdb491..0d9468535 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -97,6 +97,11 @@ public:
97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
99 99
100 /**
101 * IsGranularRange checks if a gpu region can be simply read with a pointer
102 */
103 bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
104
100private: 105private:
101 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; 106 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
102 using VMAHandle = VMAMap::const_iterator; 107 using VMAHandle = VMAMap::const_iterator;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index e66054ed0..5ea2b01f2 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -98,12 +98,12 @@ public:
98 static_cast<QueryCache&>(*this), 98 static_cast<QueryCache&>(*this),
99 VideoCore::QueryType::SamplesPassed}}} {} 99 VideoCore::QueryType::SamplesPassed}}} {}
100 100
101 void InvalidateRegion(CacheAddr addr, std::size_t size) { 101 void InvalidateRegion(VAddr addr, std::size_t size) {
102 std::unique_lock lock{mutex}; 102 std::unique_lock lock{mutex};
103 FlushAndRemoveRegion(addr, size); 103 FlushAndRemoveRegion(addr, size);
104 } 104 }
105 105
106 void FlushRegion(CacheAddr addr, std::size_t size) { 106 void FlushRegion(VAddr addr, std::size_t size) {
107 std::unique_lock lock{mutex}; 107 std::unique_lock lock{mutex};
108 FlushAndRemoveRegion(addr, size); 108 FlushAndRemoveRegion(addr, size);
109 } 109 }
@@ -117,14 +117,16 @@ public:
117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { 117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
118 std::unique_lock lock{mutex}; 118 std::unique_lock lock{mutex};
119 auto& memory_manager = system.GPU().MemoryManager(); 119 auto& memory_manager = system.GPU().MemoryManager();
120 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 120 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
121 ASSERT(cpu_addr_opt);
122 VAddr cpu_addr = *cpu_addr_opt;
121 123
122 CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); 124 CachedQuery* query = TryGet(cpu_addr);
123 if (!query) { 125 if (!query) {
124 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 126 ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
125 ASSERT_OR_EXECUTE(cpu_addr, return;); 127 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
126 128
127 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 129 query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
128 } 130 }
129 131
130 query->BindCounter(Stream(type).Current(), timestamp); 132 query->BindCounter(Stream(type).Current(), timestamp);
@@ -173,11 +175,11 @@ protected:
173 175
174private: 176private:
175 /// Flushes a memory range to guest memory and removes it from the cache. 177 /// Flushes a memory range to guest memory and removes it from the cache.
176 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { 178 void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
177 const u64 addr_begin = static_cast<u64>(addr); 179 const u64 addr_begin = static_cast<u64>(addr);
178 const u64 addr_end = addr_begin + static_cast<u64>(size); 180 const u64 addr_end = addr_begin + static_cast<u64>(size);
179 const auto in_range = [addr_begin, addr_end](CachedQuery& query) { 181 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
180 const u64 cache_begin = query.GetCacheAddr(); 182 const u64 cache_begin = query.GetCpuAddr();
181 const u64 cache_end = cache_begin + query.SizeInBytes(); 183 const u64 cache_end = cache_begin + query.SizeInBytes();
182 return cache_begin < addr_end && addr_begin < cache_end; 184 return cache_begin < addr_end && addr_begin < cache_end;
183 }; 185 };
@@ -193,7 +195,7 @@ private:
193 if (!in_range(query)) { 195 if (!in_range(query)) {
194 continue; 196 continue;
195 } 197 }
196 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); 198 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
197 query.Flush(); 199 query.Flush();
198 } 200 }
199 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), 201 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@@ -204,22 +206,21 @@ private:
204 /// Registers the passed parameters as cached and returns a pointer to the stored cached query. 206 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
205 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { 207 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
206 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); 208 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
207 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; 209 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
208 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, 210 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
209 host_ptr); 211 host_ptr);
210 } 212 }
211 213
212 /// Tries to a get a cached query. Returns nullptr on failure. 214 /// Tries to a get a cached query. Returns nullptr on failure.
213 CachedQuery* TryGet(CacheAddr addr) { 215 CachedQuery* TryGet(VAddr addr) {
214 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; 216 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
215 const auto it = cached_queries.find(page); 217 const auto it = cached_queries.find(page);
216 if (it == std::end(cached_queries)) { 218 if (it == std::end(cached_queries)) {
217 return nullptr; 219 return nullptr;
218 } 220 }
219 auto& contents = it->second; 221 auto& contents = it->second;
220 const auto found = 222 const auto found = std::find_if(std::begin(contents), std::end(contents),
221 std::find_if(std::begin(contents), std::end(contents), 223 [addr](auto& query) { return query.GetCpuAddr() == addr; });
222 [addr](auto& query) { return query.GetCacheAddr() == addr; });
223 return found != std::end(contents) ? &*found : nullptr; 224 return found != std::end(contents) ? &*found : nullptr;
224 } 225 }
225 226
@@ -323,14 +324,10 @@ public:
323 timestamp = timestamp_; 324 timestamp = timestamp_;
324 } 325 }
325 326
326 VAddr CpuAddr() const noexcept { 327 VAddr GetCpuAddr() const noexcept {
327 return cpu_addr; 328 return cpu_addr;
328 } 329 }
329 330
330 CacheAddr GetCacheAddr() const noexcept {
331 return ToCacheAddr(host_ptr);
332 }
333
334 u64 SizeInBytes() const noexcept { 331 u64 SizeInBytes() const noexcept {
335 return SizeInBytes(timestamp.has_value()); 332 return SizeInBytes(timestamp.has_value());
336 } 333 }
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 6de1597a2..22987751e 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -18,22 +18,14 @@
18 18
19class RasterizerCacheObject { 19class RasterizerCacheObject {
20public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr) 21 explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23 22
24 virtual ~RasterizerCacheObject(); 23 virtual ~RasterizerCacheObject();
25 24
26 CacheAddr GetCacheAddr() const { 25 VAddr GetCpuAddr() const {
27 return cache_addr; 26 return cpu_addr;
28 } 27 }
29 28
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
34 /// Gets the address of the shader in guest memory, required for cache management
35 virtual VAddr GetCpuAddr() const = 0;
36
37 /// Gets the size of the shader in guest memory, required for cache management 29 /// Gets the size of the shader in guest memory, required for cache management
38 virtual std::size_t GetSizeInBytes() const = 0; 30 virtual std::size_t GetSizeInBytes() const = 0;
39 31
@@ -68,8 +60,7 @@ private:
68 bool is_registered{}; ///< Whether the object is currently registered with the cache 60 bool is_registered{}; ///< Whether the object is currently registered with the cache
69 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 61 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
70 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 62 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
71 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region 63 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
72 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
73}; 64};
74 65
75template <class T> 66template <class T>
@@ -80,7 +71,7 @@ public:
80 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 71 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
81 72
82 /// Write any cached resources overlapping the specified region back to memory 73 /// Write any cached resources overlapping the specified region back to memory
83 void FlushRegion(CacheAddr addr, std::size_t size) { 74 void FlushRegion(VAddr addr, std::size_t size) {
84 std::lock_guard lock{mutex}; 75 std::lock_guard lock{mutex};
85 76
86 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 77 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -90,7 +81,7 @@ public:
90 } 81 }
91 82
92 /// Mark the specified region as being invalidated 83 /// Mark the specified region as being invalidated
93 void InvalidateRegion(CacheAddr addr, u64 size) { 84 void InvalidateRegion(VAddr addr, u64 size) {
94 std::lock_guard lock{mutex}; 85 std::lock_guard lock{mutex};
95 86
96 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 87 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -114,27 +105,20 @@ public:
114 105
115protected: 106protected:
116 /// Tries to get an object from the cache with the specified cache address 107 /// Tries to get an object from the cache with the specified cache address
117 T TryGet(CacheAddr addr) const { 108 T TryGet(VAddr addr) const {
118 const auto iter = map_cache.find(addr); 109 const auto iter = map_cache.find(addr);
119 if (iter != map_cache.end()) 110 if (iter != map_cache.end())
120 return iter->second; 111 return iter->second;
121 return nullptr; 112 return nullptr;
122 } 113 }
123 114
124 T TryGet(const void* addr) const {
125 const auto iter = map_cache.find(ToCacheAddr(addr));
126 if (iter != map_cache.end())
127 return iter->second;
128 return nullptr;
129 }
130
131 /// Register an object into the cache 115 /// Register an object into the cache
132 virtual void Register(const T& object) { 116 virtual void Register(const T& object) {
133 std::lock_guard lock{mutex}; 117 std::lock_guard lock{mutex};
134 118
135 object->SetIsRegistered(true); 119 object->SetIsRegistered(true);
136 interval_cache.add({GetInterval(object), ObjectSet{object}}); 120 interval_cache.add({GetInterval(object), ObjectSet{object}});
137 map_cache.insert({object->GetCacheAddr(), object}); 121 map_cache.insert({object->GetCpuAddr(), object});
138 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); 122 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
139 } 123 }
140 124
@@ -144,7 +128,7 @@ protected:
144 128
145 object->SetIsRegistered(false); 129 object->SetIsRegistered(false);
146 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); 130 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
147 const CacheAddr addr = object->GetCacheAddr(); 131 const VAddr addr = object->GetCpuAddr();
148 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 132 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
149 map_cache.erase(addr); 133 map_cache.erase(addr);
150 } 134 }
@@ -173,7 +157,7 @@ protected:
173 157
174private: 158private:
175 /// Returns a list of cached objects from the specified memory region, ordered by access time 159 /// Returns a list of cached objects from the specified memory region, ordered by access time
176 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { 160 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
177 if (size == 0) { 161 if (size == 0) {
178 return {}; 162 return {};
179 } 163 }
@@ -197,13 +181,13 @@ private:
197 } 181 }
198 182
199 using ObjectSet = std::set<T>; 183 using ObjectSet = std::set<T>;
200 using ObjectCache = std::unordered_map<CacheAddr, T>; 184 using ObjectCache = std::unordered_map<VAddr, T>;
201 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; 185 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
202 using ObjectInterval = typename IntervalCache::interval_type; 186 using ObjectInterval = typename IntervalCache::interval_type;
203 187
204 static auto GetInterval(const T& object) { 188 static auto GetInterval(const T& object) {
205 return ObjectInterval::right_open(object->GetCacheAddr(), 189 return ObjectInterval::right_open(object->GetCpuAddr(),
206 object->GetCacheAddr() + object->GetSizeInBytes()); 190 object->GetCpuAddr() + object->GetSizeInBytes());
207 } 191 }
208 192
209 ObjectCache map_cache; 193 ObjectCache map_cache;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1a68e3caa..8ae5b9c4e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -53,14 +53,14 @@ public:
53 virtual void FlushAll() = 0; 53 virtual void FlushAll() = 0;
54 54
55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
56 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 56 virtual void FlushRegion(VAddr addr, u64 size) = 0;
57 57
58 /// Notify rasterizer that any caches of the specified region should be invalidated 58 /// Notify rasterizer that any caches of the specified region should be invalidated
59 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; 59 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
60 60
61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
62 /// and invalidated 62 /// and invalidated
63 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 63 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
64 64
65 /// Notify the rasterizer to send all written commands to the host GPU. 65 /// Notify the rasterizer to send all written commands to the host GPU.
66 virtual void FlushCommands() = 0; 66 virtual void FlushCommands() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0375fca17..4eb37a96c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
23 23
24CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) 24CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
25 : VideoCommon::BufferBlock{cache_addr, size} { 25 : VideoCommon::BufferBlock{cpu_addr, size} {
26 gl_buffer.Create(); 26 gl_buffer.Create();
27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
28} 28}
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
48} 48}
49 49
50Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 50Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
51 return std::make_shared<CachedBufferBlock>(cache_addr, size); 51 return std::make_shared<CachedBufferBlock>(cpu_addr, size);
52} 52}
53 53
54void OGLBufferCache::WriteBarrier() { 54void OGLBufferCache::WriteBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c7145443..d94a11252 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
31 31
32class CachedBufferBlock : public VideoCommon::BufferBlock { 32class CachedBufferBlock : public VideoCommon::BufferBlock {
33public: 33public:
34 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); 34 explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
35 ~CachedBufferBlock(); 35 ~CachedBufferBlock();
36 36
37 const GLuint* GetHandle() const { 37 const GLuint* GetHandle() const {
@@ -55,7 +55,7 @@ public:
55 } 55 }
56 56
57protected: 57protected:
58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
59 59
60 void WriteBarrier() override; 60 void WriteBarrier() override;
61 61
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 346feeb2f..368f399df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
656 656
657void RasterizerOpenGL::FlushAll() {} 657void RasterizerOpenGL::FlushAll() {}
658 658
659void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { 659void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
660 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 660 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
661 if (!addr || !size) { 661 if (addr == 0 || size == 0) {
662 return; 662 return;
663 } 663 }
664 texture_cache.FlushRegion(addr, size); 664 texture_cache.FlushRegion(addr, size);
@@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
666 query_cache.FlushRegion(addr, size); 666 query_cache.FlushRegion(addr, size);
667} 667}
668 668
669void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 669void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
670 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 670 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
671 if (!addr || !size) { 671 if (addr == 0 || size == 0) {
672 return; 672 return;
673 } 673 }
674 texture_cache.InvalidateRegion(addr, size); 674 texture_cache.InvalidateRegion(addr, size);
@@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
677 query_cache.InvalidateRegion(addr, size); 677 query_cache.InvalidateRegion(addr, size);
678} 678}
679 679
680void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 680void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
681 if (Settings::values.use_accurate_gpu_emulation) { 681 if (Settings::values.use_accurate_gpu_emulation) {
682 FlushRegion(addr, size); 682 FlushRegion(addr, size);
683 } 683 }
@@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
716 716
717 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 717 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
718 718
719 const auto surface{ 719 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
720 texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
721 if (!surface) { 720 if (!surface) {
722 return {}; 721 return {};
723 } 722 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2d3be2437..212dad852 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -65,9 +65,9 @@ public:
65 void ResetCounter(VideoCore::QueryType type) override; 65 void ResetCounter(VideoCore::QueryType type) override;
66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
67 void FlushAll() override; 67 void FlushAll() override;
68 void FlushRegion(CacheAddr addr, u64 size) override; 68 void FlushRegion(VAddr addr, u64 size) override;
69 void InvalidateRegion(CacheAddr addr, u64 size) override; 69 void InvalidateRegion(VAddr addr, u64 size) override;
70 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 70 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
71 void FlushCommands() override; 71 void FlushCommands() override;
72 void TickFrame() override; 72 void TickFrame() override;
73 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 73 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 046ee55a5..6d2ff20f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
214 214
215} // Anonymous namespace 215} // Anonymous namespace
216 216
217CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, 217CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
218 std::shared_ptr<VideoCommon::Shader::Registry> registry, 218 std::shared_ptr<VideoCommon::Shader::Registry> registry,
219 ShaderEntries entries, std::shared_ptr<OGLProgram> program) 219 ShaderEntries entries, std::shared_ptr<OGLProgram> program)
220 : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, 220 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
221 cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} 221 size_in_bytes{size_in_bytes}, program{std::move(program)} {}
222 222
223CachedShader::~CachedShader() = default; 223CachedShader::~CachedShader() = default;
224 224
@@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
254 entry.bindless_samplers = registry->GetBindlessSamplers(); 254 entry.bindless_samplers = registry->GetBindlessSamplers();
255 params.disk_cache.SaveEntry(std::move(entry)); 255 params.disk_cache.SaveEntry(std::move(entry));
256 256
257 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, 257 return std::shared_ptr<CachedShader>(new CachedShader(
258 size_in_bytes, std::move(registry), 258 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
259 MakeEntries(ir), std::move(program)));
260} 259}
261 260
262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 261Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
279 entry.bindless_samplers = registry->GetBindlessSamplers(); 278 entry.bindless_samplers = registry->GetBindlessSamplers();
280 params.disk_cache.SaveEntry(std::move(entry)); 279 params.disk_cache.SaveEntry(std::move(entry));
281 280
282 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, 281 return std::shared_ptr<CachedShader>(new CachedShader(
283 size_in_bytes, std::move(registry), 282 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
284 MakeEntries(ir), std::move(program)));
285} 283}
286 284
287Shader CachedShader::CreateFromCache(const ShaderParameters& params, 285Shader CachedShader::CreateFromCache(const ShaderParameters& params,
288 const PrecompiledShader& precompiled_shader, 286 const PrecompiledShader& precompiled_shader,
289 std::size_t size_in_bytes) { 287 std::size_t size_in_bytes) {
290 return std::shared_ptr<CachedShader>(new CachedShader( 288 return std::shared_ptr<CachedShader>(
291 params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, 289 new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
292 precompiled_shader.entries, precompiled_shader.program)); 290 precompiled_shader.entries, precompiled_shader.program));
293} 291}
294 292
295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 293ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
449 const GPUVAddr address{GetShaderAddress(system, program)}; 447 const GPUVAddr address{GetShaderAddress(system, program)};
450 448
451 // Look up shader in the cache based on address 449 // Look up shader in the cache based on address
452 const auto host_ptr{memory_manager.GetPointer(address)}; 450 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
453 Shader shader{TryGet(host_ptr)}; 451 Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
454 if (shader) { 452 if (shader) {
455 return last_shaders[static_cast<std::size_t>(program)] = shader; 453 return last_shaders[static_cast<std::size_t>(program)] = shader;
456 } 454 }
457 455
456 const auto host_ptr{memory_manager.GetPointer(address)};
457
458 // No shader found - create a new one 458 // No shader found - create a new one
459 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; 459 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
460 ProgramCode code_b; 460 ProgramCode code_b;
@@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
465 465
466 const auto unique_identifier = GetUniqueIdentifier( 466 const auto unique_identifier = GetUniqueIdentifier(
467 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 467 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
468 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 468
469 const ShaderParameters params{system, disk_cache, device, 469 const ShaderParameters params{system, disk_cache, device,
470 cpu_addr, host_ptr, unique_identifier}; 470 *cpu_addr, host_ptr, unique_identifier};
471 471
472 const auto found = runtime_cache.find(unique_identifier); 472 const auto found = runtime_cache.find(unique_identifier);
473 if (found == runtime_cache.end()) { 473 if (found == runtime_cache.end()) {
@@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
484 484
485Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 485Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
486 auto& memory_manager{system.GPU().MemoryManager()}; 486 auto& memory_manager{system.GPU().MemoryManager()};
487 const auto host_ptr{memory_manager.GetPointer(code_addr)}; 487 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
488 auto kernel = TryGet(host_ptr); 488
489 auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
489 if (kernel) { 490 if (kernel) {
490 return kernel; 491 return kernel;
491 } 492 }
492 493
494 const auto host_ptr{memory_manager.GetPointer(code_addr)};
493 // No kernel found, create a new one 495 // No kernel found, create a new one
494 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 496 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
495 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 497 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
496 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 498
497 const ShaderParameters params{system, disk_cache, device, 499 const ShaderParameters params{system, disk_cache, device,
498 cpu_addr, host_ptr, unique_identifier}; 500 *cpu_addr, host_ptr, unique_identifier};
499 501
500 const auto found = runtime_cache.find(unique_identifier); 502 const auto found = runtime_cache.find(unique_identifier);
501 if (found == runtime_cache.end()) { 503 if (found == runtime_cache.end()) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 4935019fc..c836df5bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -65,11 +65,6 @@ public:
65 /// Gets the GL program handle for the shader 65 /// Gets the GL program handle for the shader
66 GLuint GetHandle() const; 66 GLuint GetHandle() const;
67 67
68 /// Returns the guest CPU address of the shader
69 VAddr GetCpuAddr() const override {
70 return cpu_addr;
71 }
72
73 /// Returns the size in bytes of the shader 68 /// Returns the size in bytes of the shader
74 std::size_t GetSizeInBytes() const override { 69 std::size_t GetSizeInBytes() const override {
75 return size_in_bytes; 70 return size_in_bytes;
@@ -90,13 +85,12 @@ public:
90 std::size_t size_in_bytes); 85 std::size_t size_in_bytes);
91 86
92private: 87private:
93 explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, 88 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
94 std::shared_ptr<VideoCommon::Shader::Registry> registry, 89 std::shared_ptr<VideoCommon::Shader::Registry> registry,
95 ShaderEntries entries, std::shared_ptr<OGLProgram> program); 90 ShaderEntries entries, std::shared_ptr<OGLProgram> program);
96 91
97 std::shared_ptr<VideoCommon::Shader::Registry> registry; 92 std::shared_ptr<VideoCommon::Shader::Registry> registry;
98 ShaderEntries entries; 93 ShaderEntries entries;
99 VAddr cpu_addr = 0;
100 std::size_t size_in_bytes = 0; 94 std::size_t size_in_bytes = 0;
101 std::shared_ptr<OGLProgram> program; 95 std::shared_ptr<OGLProgram> program;
102}; 96};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 160ae4340..1f1f01313 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1819,15 +1819,15 @@ private:
1819 } 1819 }
1820 1820
1821 Expression HMergeH0(Operation operation) { 1821 Expression HMergeH0(Operation operation) {
1822 std::string dest = VisitOperand(operation, 0).AsUint(); 1822 const std::string dest = VisitOperand(operation, 0).AsUint();
1823 std::string src = VisitOperand(operation, 1).AsUint(); 1823 const std::string src = VisitOperand(operation, 1).AsUint();
1824 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; 1824 return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint};
1825 } 1825 }
1826 1826
1827 Expression HMergeH1(Operation operation) { 1827 Expression HMergeH1(Operation operation) {
1828 std::string dest = VisitOperand(operation, 0).AsUint(); 1828 const std::string dest = VisitOperand(operation, 0).AsUint();
1829 std::string src = VisitOperand(operation, 1).AsUint(); 1829 const std::string src = VisitOperand(operation, 1).AsUint();
1830 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; 1830 return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint};
1831 } 1831 }
1832 1832
1833 Expression HPack2(Operation operation) { 1833 Expression HPack2(Operation operation) {
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
index 323bf6b39..89a035ca4 100644
--- a/src/video_core/renderer_vulkan/declarations.h
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -39,6 +39,7 @@ using UniqueFence = UniqueHandle<vk::Fence>;
39using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; 39using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
40using UniqueImage = UniqueHandle<vk::Image>; 40using UniqueImage = UniqueHandle<vk::Image>;
41using UniqueImageView = UniqueHandle<vk::ImageView>; 41using UniqueImageView = UniqueHandle<vk::ImageView>;
42using UniqueInstance = UniqueHandle<vk::Instance>;
42using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; 43using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
43using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; 44using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
44using UniquePipeline = UniqueHandle<vk::Pipeline>; 45using UniquePipeline = UniqueHandle<vk::Pipeline>;
@@ -50,6 +51,7 @@ using UniqueSampler = UniqueHandle<vk::Sampler>;
50using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; 51using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
51using UniqueSemaphore = UniqueHandle<vk::Semaphore>; 52using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
52using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; 53using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
54using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>;
53using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; 55using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
54using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; 56using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
55using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>; 57using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 6953aaafe..9cdb4b627 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -2,13 +2,18 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cstring>
5#include <memory> 8#include <memory>
6#include <optional> 9#include <optional>
10#include <string>
7#include <vector> 11#include <vector>
8 12
9#include <fmt/format.h> 13#include <fmt/format.h>
10 14
11#include "common/assert.h" 15#include "common/assert.h"
16#include "common/dynamic_library.h"
12#include "common/logging/log.h" 17#include "common/logging/log.h"
13#include "common/telemetry.h" 18#include "common/telemetry.h"
14#include "core/core.h" 19#include "core/core.h"
@@ -30,15 +35,30 @@
30#include "video_core/renderer_vulkan/vk_state_tracker.h" 35#include "video_core/renderer_vulkan/vk_state_tracker.h"
31#include "video_core/renderer_vulkan/vk_swapchain.h" 36#include "video_core/renderer_vulkan/vk_swapchain.h"
32 37
38// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size
39#ifdef _WIN32
40#include <windows.h>
41// ensure include order
42#include <vulkan/vulkan_win32.h>
43#endif
44
45#ifdef __linux__
46#include <X11/Xlib.h>
47#include <vulkan/vulkan_wayland.h>
48#include <vulkan/vulkan_xlib.h>
49#endif
50
33namespace Vulkan { 51namespace Vulkan {
34 52
35namespace { 53namespace {
36 54
55using Core::Frontend::WindowSystemType;
56
37VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, 57VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
38 VkDebugUtilsMessageTypeFlagsEXT type, 58 VkDebugUtilsMessageTypeFlagsEXT type,
39 const VkDebugUtilsMessengerCallbackDataEXT* data, 59 const VkDebugUtilsMessengerCallbackDataEXT* data,
40 [[maybe_unused]] void* user_data) { 60 [[maybe_unused]] void* user_data) {
41 const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; 61 const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)};
42 const char* message{data->pMessage}; 62 const char* message{data->pMessage};
43 63
44 if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { 64 if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
@@ -53,6 +73,110 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
53 return VK_FALSE; 73 return VK_FALSE;
54} 74}
55 75
76Common::DynamicLibrary OpenVulkanLibrary() {
77 Common::DynamicLibrary library;
78#ifdef __APPLE__
79 // Check if a path to a specific Vulkan library has been specified.
80 char* libvulkan_env = getenv("LIBVULKAN_PATH");
81 if (!libvulkan_env || !library.Open(libvulkan_env)) {
82 // Use the libvulkan.dylib from the application bundle.
83 std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
84 library.Open(filename.c_str());
85 }
86#else
87 std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
88 if (!library.Open(filename.c_str())) {
89 // Android devices may not have libvulkan.so.1, only libvulkan.so.
90 filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
91 library.Open(filename.c_str());
92 }
93#endif
94 return library;
95}
96
97UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld,
98 WindowSystemType window_type = WindowSystemType::Headless,
99 bool enable_layers = false) {
100 if (!library.IsOpen()) {
101 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
102 return UniqueInstance{};
103 }
104 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
105 if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) {
106 LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
107 return UniqueInstance{};
108 }
109 dld.init(vkGetInstanceProcAddr);
110
111 std::vector<const char*> extensions;
112 extensions.reserve(4);
113 switch (window_type) {
114 case Core::Frontend::WindowSystemType::Headless:
115 break;
116#ifdef _WIN32
117 case Core::Frontend::WindowSystemType::Windows:
118 extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
119 break;
120#endif
121#ifdef __linux__
122 case Core::Frontend::WindowSystemType::X11:
123 extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
124 break;
125 case Core::Frontend::WindowSystemType::Wayland:
126 extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
127 break;
128#endif
129 default:
130 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
131 break;
132 }
133 if (window_type != Core::Frontend::WindowSystemType::Headless) {
134 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
135 }
136 if (enable_layers) {
137 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
138 }
139
140 u32 num_properties;
141 if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) !=
142 vk::Result::eSuccess) {
143 LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties");
144 return UniqueInstance{};
145 }
146 std::vector<vk::ExtensionProperties> properties(num_properties);
147 if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(),
148 dld) != vk::Result::eSuccess) {
149 LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
150 return UniqueInstance{};
151 }
152
153 for (const char* extension : extensions) {
154 const auto it =
155 std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) {
156 return !std::strcmp(extension, prop.extensionName);
157 });
158 if (it == properties.end()) {
159 LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
160 return UniqueInstance{};
161 }
162 }
163
164 const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
165 "yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
166 VK_API_VERSION_1_1);
167 const std::array layers = {"VK_LAYER_LUNARG_standard_validation"};
168 const vk::InstanceCreateInfo instance_ci(
169 {}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(),
170 static_cast<u32>(extensions.size()), extensions.data());
171 vk::Instance unsafe_instance;
172 if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) {
173 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
174 return UniqueInstance{};
175 }
176 dld.init(unsafe_instance);
177 return UniqueInstance(unsafe_instance, {nullptr, dld});
178}
179
56std::string GetReadableVersion(u32 version) { 180std::string GetReadableVersion(u32 version) {
57 return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), 181 return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
58 VK_VERSION_PATCH(version)); 182 VK_VERSION_PATCH(version));
@@ -147,27 +271,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
147} 271}
148 272
149bool RendererVulkan::Init() { 273bool RendererVulkan::Init() {
150 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; 274 library = OpenVulkanLibrary();
151 render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); 275 instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
152 const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); 276 Settings::values.renderer_debug);
153 277 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
154 std::optional<vk::DebugUtilsMessengerEXT> callback;
155 if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
156 callback = CreateDebugCallback(dldi);
157 if (!callback) {
158 return false;
159 }
160 }
161
162 if (!PickDevices(dldi)) {
163 if (callback) {
164 instance.destroy(*callback, nullptr, dldi);
165 }
166 return false; 278 return false;
167 } 279 }
168 debug_callback = UniqueDebugUtilsMessengerEXT(
169 *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
170 instance, nullptr, device->GetDispatchLoader()));
171 280
172 Report(); 281 Report();
173 282
@@ -176,7 +285,7 @@ bool RendererVulkan::Init() {
176 resource_manager = std::make_unique<VKResourceManager>(*device); 285 resource_manager = std::make_unique<VKResourceManager>(*device);
177 286
178 const auto& framebuffer = render_window.GetFramebufferLayout(); 287 const auto& framebuffer = render_window.GetFramebufferLayout();
179 swapchain = std::make_unique<VKSwapchain>(surface, *device); 288 swapchain = std::make_unique<VKSwapchain>(*surface, *device);
180 swapchain->Create(framebuffer.width, framebuffer.height, false); 289 swapchain->Create(framebuffer.width, framebuffer.height, false);
181 290
182 state_tracker = std::make_unique<StateTracker>(system); 291 state_tracker = std::make_unique<StateTracker>(system);
@@ -213,8 +322,10 @@ void RendererVulkan::ShutDown() {
213 device.reset(); 322 device.reset();
214} 323}
215 324
216std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( 325bool RendererVulkan::CreateDebugCallback() {
217 const vk::DispatchLoaderDynamic& dldi) { 326 if (!Settings::values.renderer_debug) {
327 return true;
328 }
218 const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( 329 const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
219 {}, 330 {},
220 vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | 331 vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
@@ -225,32 +336,88 @@ std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
225 vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | 336 vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
226 vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, 337 vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
227 &DebugCallback, nullptr); 338 &DebugCallback, nullptr);
228 vk::DebugUtilsMessengerEXT callback; 339 vk::DebugUtilsMessengerEXT unsafe_callback;
229 if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != 340 if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) !=
230 vk::Result::eSuccess) { 341 vk::Result::eSuccess) {
231 LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); 342 LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
232 return {}; 343 return false;
344 }
345 debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld});
346 return true;
347}
348
349bool RendererVulkan::CreateSurface() {
350 [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
351 VkSurfaceKHR unsafe_surface = nullptr;
352
353#ifdef _WIN32
354 if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
355 const HWND hWnd = static_cast<HWND>(window_info.render_surface);
356 const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
357 nullptr, 0, nullptr, hWnd};
358 const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
359 dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
360 if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr,
361 &unsafe_surface) != VK_SUCCESS) {
362 LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
363 return false;
364 }
365 }
366#endif
367#ifdef __linux__
368 if (window_info.type == Core::Frontend::WindowSystemType::X11) {
369 const VkXlibSurfaceCreateInfoKHR xlib_ci{
370 VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
371 static_cast<Display*>(window_info.display_connection),
372 reinterpret_cast<Window>(window_info.render_surface)};
373 const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
374 dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
375 if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr,
376 &unsafe_surface) != VK_SUCCESS) {
377 LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
378 return false;
379 }
380 }
381 if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
382 const VkWaylandSurfaceCreateInfoKHR wayland_ci{
383 VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
384 static_cast<wl_display*>(window_info.display_connection),
385 static_cast<wl_surface*>(window_info.render_surface)};
386 const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
387 dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
388 if (!vkCreateWaylandSurfaceKHR ||
389 vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) !=
390 VK_SUCCESS) {
391 LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
392 return false;
393 }
394 }
395#endif
396 if (!unsafe_surface) {
397 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
398 return false;
233 } 399 }
234 return callback; 400
401 surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld});
402 return true;
235} 403}
236 404
237bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { 405bool RendererVulkan::PickDevices() {
238 const auto devices = instance.enumeratePhysicalDevices(dldi); 406 const auto devices = instance->enumeratePhysicalDevices(dld);
239 407
240 // TODO(Rodrigo): Choose device from config file
241 const s32 device_index = Settings::values.vulkan_device; 408 const s32 device_index = Settings::values.vulkan_device;
242 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { 409 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
243 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); 410 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
244 return false; 411 return false;
245 } 412 }
246 const vk::PhysicalDevice physical_device = devices[device_index]; 413 const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)];
247 414
248 if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { 415 if (!VKDevice::IsSuitable(physical_device, *surface, dld)) {
249 return false; 416 return false;
250 } 417 }
251 418
252 device = std::make_unique<VKDevice>(dldi, physical_device, surface); 419 device = std::make_unique<VKDevice>(dld, physical_device, *surface);
253 return device->Create(dldi, instance); 420 return device->Create(*instance);
254} 421}
255 422
256void RendererVulkan::Report() const { 423void RendererVulkan::Report() const {
@@ -276,4 +443,33 @@ void RendererVulkan::Report() const {
276 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 443 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
277} 444}
278 445
446std::vector<std::string> RendererVulkan::EnumerateDevices() {
447 // Avoid putting DispatchLoaderDynamic, it's too large
448 auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>();
449 auto& dld = *dld_memory;
450
451 Common::DynamicLibrary library = OpenVulkanLibrary();
452 UniqueInstance instance = CreateInstance(library, dld);
453 if (!instance) {
454 return {};
455 }
456
457 u32 num_devices;
458 if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) {
459 return {};
460 }
461 std::vector<vk::PhysicalDevice> devices(num_devices);
462 if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) !=
463 vk::Result::eSuccess) {
464 return {};
465 }
466
467 std::vector<std::string> names;
468 names.reserve(num_devices);
469 for (auto& device : devices) {
470 names.push_back(device.getProperties(dld).deviceName);
471 }
472 return names;
473}
474
279} // namespace Vulkan 475} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index d14384e79..42e253de5 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -6,8 +6,11 @@
6 6
7#include <memory> 7#include <memory>
8#include <optional> 8#include <optional>
9#include <string>
9#include <vector> 10#include <vector>
10 11
12#include "common/dynamic_library.h"
13
11#include "video_core/renderer_base.h" 14#include "video_core/renderer_base.h"
12#include "video_core/renderer_vulkan/declarations.h" 15#include "video_core/renderer_vulkan/declarations.h"
13 16
@@ -44,18 +47,24 @@ public:
44 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 47 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
45 bool TryPresent(int timeout_ms) override; 48 bool TryPresent(int timeout_ms) override;
46 49
50 static std::vector<std::string> EnumerateDevices();
51
47private: 52private:
48 std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( 53 bool CreateDebugCallback();
49 const vk::DispatchLoaderDynamic& dldi);
50 54
51 bool PickDevices(const vk::DispatchLoaderDynamic& dldi); 55 bool CreateSurface();
56
57 bool PickDevices();
52 58
53 void Report() const; 59 void Report() const;
54 60
55 Core::System& system; 61 Core::System& system;
56 62
57 vk::Instance instance; 63 Common::DynamicLibrary library;
58 vk::SurfaceKHR surface; 64 vk::DispatchLoaderDynamic dld;
65
66 UniqueInstance instance;
67 UniqueSurfaceKHR surface;
59 68
60 VKScreenInfo screen_info; 69 VKScreenInfo screen_info;
61 70
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1ba544943..326d74f29 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
42} // Anonymous namespace 42} // Anonymous namespace
43 43
44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
45 CacheAddr cache_addr, std::size_t size) 45 VAddr cpu_addr, std::size_t size)
46 : VideoCommon::BufferBlock{cache_addr, size} { 46 : VideoCommon::BufferBlock{cpu_addr, size} {
47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), 47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | 48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
49 vk::BufferUsageFlagBits::eTransferDst, 49 vk::BufferUsageFlagBits::eTransferDst,
@@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
68 68
69VKBufferCache::~VKBufferCache() = default; 69VKBufferCache::~VKBufferCache() = default;
70 70
71Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 71Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); 72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
73} 73}
74 74
75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { 75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3f38eed0c..508214618 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -30,7 +30,7 @@ class VKScheduler;
30class CachedBufferBlock final : public VideoCommon::BufferBlock { 30class CachedBufferBlock final : public VideoCommon::BufferBlock {
31public: 31public:
32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
33 CacheAddr cache_addr, std::size_t size); 33 VAddr cpu_addr, std::size_t size);
34 ~CachedBufferBlock(); 34 ~CachedBufferBlock();
35 35
36 const vk::Buffer* GetHandle() const { 36 const vk::Buffer* GetHandle() const {
@@ -55,7 +55,7 @@ public:
55protected: 55protected:
56 void WriteBarrier() override {} 56 void WriteBarrier() override {}
57 57
58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
59 59
60 const vk::Buffer* ToHandle(const Buffer& buffer) override; 60 const vk::Buffer* ToHandle(const Buffer& buffer) override;
61 61
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 7aafb5e59..6f4ae9132 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -10,6 +10,7 @@
10#include <string_view> 10#include <string_view>
11#include <thread> 11#include <thread>
12#include <vector> 12#include <vector>
13
13#include "common/assert.h" 14#include "common/assert.h"
14#include "core/settings.h" 15#include "core/settings.h"
15#include "video_core/renderer_vulkan/declarations.h" 16#include "video_core/renderer_vulkan/declarations.h"
@@ -35,20 +36,20 @@ void SetNext(void**& next, T& data) {
35} 36}
36 37
37template <typename T> 38template <typename T>
38T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { 39T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
39 vk::PhysicalDeviceFeatures2 features; 40 vk::PhysicalDeviceFeatures2 features;
40 T extension_features; 41 T extension_features;
41 features.pNext = &extension_features; 42 features.pNext = &extension_features;
42 physical.getFeatures2(&features, dldi); 43 physical.getFeatures2(&features, dld);
43 return extension_features; 44 return extension_features;
44} 45}
45 46
46template <typename T> 47template <typename T>
47T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { 48T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
48 vk::PhysicalDeviceProperties2 properties; 49 vk::PhysicalDeviceProperties2 properties;
49 T extension_properties; 50 T extension_properties;
50 properties.pNext = &extension_properties; 51 properties.pNext = &extension_properties;
51 physical.getProperties2(&properties, dldi); 52 physical.getProperties2(&properties, dld);
52 return extension_properties; 53 return extension_properties;
53} 54}
54 55
@@ -78,19 +79,19 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format
78 79
79} // Anonymous namespace 80} // Anonymous namespace
80 81
81VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 82VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
82 vk::SurfaceKHR surface) 83 vk::SurfaceKHR surface)
83 : physical{physical}, properties{physical.getProperties(dldi)}, 84 : dld{dld}, physical{physical}, properties{physical.getProperties(dld)},
84 format_properties{GetFormatProperties(dldi, physical)} { 85 format_properties{GetFormatProperties(dld, physical)} {
85 SetupFamilies(dldi, surface); 86 SetupFamilies(surface);
86 SetupFeatures(dldi); 87 SetupFeatures();
87} 88}
88 89
89VKDevice::~VKDevice() = default; 90VKDevice::~VKDevice() = default;
90 91
91bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { 92bool VKDevice::Create(vk::Instance instance) {
92 const auto queue_cis = GetDeviceQueueCreateInfos(); 93 const auto queue_cis = GetDeviceQueueCreateInfos();
93 const std::vector extensions = LoadExtensions(dldi); 94 const std::vector extensions = LoadExtensions();
94 95
95 vk::PhysicalDeviceFeatures2 features2; 96 vk::PhysicalDeviceFeatures2 features2;
96 void** next = &features2.pNext; 97 void** next = &features2.pNext;
@@ -165,15 +166,13 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
165 nullptr); 166 nullptr);
166 device_ci.pNext = &features2; 167 device_ci.pNext = &features2;
167 168
168 vk::Device dummy_logical; 169 vk::Device unsafe_logical;
169 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { 170 if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) {
170 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); 171 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
171 return false; 172 return false;
172 } 173 }
173 174 dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical);
174 dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); 175 logical = UniqueDevice(unsafe_logical, {nullptr, dld});
175 logical = UniqueDevice(
176 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
177 176
178 CollectTelemetryParameters(); 177 CollectTelemetryParameters();
179 178
@@ -235,8 +234,8 @@ void VKDevice::ReportLoss() const {
235 // *(VKGraphicsPipeline*)data[0] 234 // *(VKGraphicsPipeline*)data[0]
236} 235}
237 236
238bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, 237bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const {
239 const vk::DispatchLoaderDynamic& dldi) const { 238 // Disable for now to avoid converting ASTC twice.
240 static constexpr std::array astc_formats = { 239 static constexpr std::array astc_formats = {
241 vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, 240 vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
242 vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, 241 vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock,
@@ -260,7 +259,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
260 vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | 259 vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
261 vk::FormatFeatureFlagBits::eTransferDst}; 260 vk::FormatFeatureFlagBits::eTransferDst};
262 for (const auto format : astc_formats) { 261 for (const auto format : astc_formats) {
263 const auto format_properties{physical.getFormatProperties(format, dldi)}; 262 const auto format_properties{physical.getFormatProperties(format, dld)};
264 if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { 263 if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
265 return false; 264 return false;
266 } 265 }
@@ -279,11 +278,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
279 return (supported_usage & wanted_usage) == wanted_usage; 278 return (supported_usage & wanted_usage) == wanted_usage;
280} 279}
281 280
282bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 281bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
283 vk::SurfaceKHR surface) { 282 const vk::DispatchLoaderDynamic& dld) {
284 bool is_suitable = true; 283 static constexpr std::array required_extensions = {
285
286 constexpr std::array required_extensions = {
287 VK_KHR_SWAPCHAIN_EXTENSION_NAME, 284 VK_KHR_SWAPCHAIN_EXTENSION_NAME,
288 VK_KHR_16BIT_STORAGE_EXTENSION_NAME, 285 VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
289 VK_KHR_8BIT_STORAGE_EXTENSION_NAME, 286 VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
@@ -293,9 +290,10 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
293 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 290 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
294 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, 291 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
295 }; 292 };
293 bool is_suitable = true;
296 std::bitset<required_extensions.size()> available_extensions{}; 294 std::bitset<required_extensions.size()> available_extensions{};
297 295
298 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 296 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
299 for (std::size_t i = 0; i < required_extensions.size(); ++i) { 297 for (std::size_t i = 0; i < required_extensions.size(); ++i) {
300 if (available_extensions[i]) { 298 if (available_extensions[i]) {
301 continue; 299 continue;
@@ -315,7 +313,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
315 } 313 }
316 314
317 bool has_graphics{}, has_present{}; 315 bool has_graphics{}, has_present{};
318 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); 316 const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
319 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { 317 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
320 const auto& family = queue_family_properties[i]; 318 const auto& family = queue_family_properties[i];
321 if (family.queueCount == 0) { 319 if (family.queueCount == 0) {
@@ -323,7 +321,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
323 } 321 }
324 has_graphics |= 322 has_graphics |=
325 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); 323 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
326 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; 324 has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0;
327 } 325 }
328 if (!has_graphics || !has_present) { 326 if (!has_graphics || !has_present) {
329 LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); 327 LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
@@ -331,7 +329,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
331 } 329 }
332 330
333 // TODO(Rodrigo): Check if the device matches all requeriments. 331 // TODO(Rodrigo): Check if the device matches all requeriments.
334 const auto properties{physical.getProperties(dldi)}; 332 const auto properties{physical.getProperties(dld)};
335 const auto& limits{properties.limits}; 333 const auto& limits{properties.limits};
336 334
337 constexpr u32 required_ubo_size = 65536; 335 constexpr u32 required_ubo_size = 65536;
@@ -348,7 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
348 is_suitable = false; 346 is_suitable = false;
349 } 347 }
350 348
351 const auto features{physical.getFeatures(dldi)}; 349 const auto features{physical.getFeatures(dld)};
352 const std::array feature_report = { 350 const std::array feature_report = {
353 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 351 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
354 std::make_pair(features.independentBlend, "independentBlend"), 352 std::make_pair(features.independentBlend, "independentBlend"),
@@ -380,7 +378,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
380 return is_suitable; 378 return is_suitable;
381} 379}
382 380
383std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { 381std::vector<const char*> VKDevice::LoadExtensions() {
384 std::vector<const char*> extensions; 382 std::vector<const char*> extensions;
385 const auto Test = [&](const vk::ExtensionProperties& extension, 383 const auto Test = [&](const vk::ExtensionProperties& extension,
386 std::optional<std::reference_wrapper<bool>> status, const char* name, 384 std::optional<std::reference_wrapper<bool>> status, const char* name,
@@ -411,7 +409,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
411 bool has_khr_shader_float16_int8{}; 409 bool has_khr_shader_float16_int8{};
412 bool has_ext_subgroup_size_control{}; 410 bool has_ext_subgroup_size_control{};
413 bool has_ext_transform_feedback{}; 411 bool has_ext_transform_feedback{};
414 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 412 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
415 Test(extension, khr_uniform_buffer_standard_layout, 413 Test(extension, khr_uniform_buffer_standard_layout,
416 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 414 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
417 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, 415 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
@@ -433,15 +431,15 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
433 431
434 if (has_khr_shader_float16_int8) { 432 if (has_khr_shader_float16_int8) {
435 is_float16_supported = 433 is_float16_supported =
436 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 434 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16;
437 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 435 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
438 } 436 }
439 437
440 if (has_ext_subgroup_size_control) { 438 if (has_ext_subgroup_size_control) {
441 const auto features = 439 const auto features =
442 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 440 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld);
443 const auto properties = 441 const auto properties =
444 GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi); 442 GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld);
445 443
446 is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; 444 is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
447 445
@@ -456,9 +454,9 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
456 454
457 if (has_ext_transform_feedback) { 455 if (has_ext_transform_feedback) {
458 const auto features = 456 const auto features =
459 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); 457 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld);
460 const auto properties = 458 const auto properties =
461 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); 459 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld);
462 460
463 if (features.transformFeedback && features.geometryStreams && 461 if (features.transformFeedback && features.geometryStreams &&
464 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && 462 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
@@ -471,10 +469,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
471 return extensions; 469 return extensions;
472} 470}
473 471
474void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { 472void VKDevice::SetupFamilies(vk::SurfaceKHR surface) {
475 std::optional<u32> graphics_family_, present_family_; 473 std::optional<u32> graphics_family_, present_family_;
476 474
477 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); 475 const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
478 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { 476 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
479 if (graphics_family_ && present_family_) 477 if (graphics_family_ && present_family_)
480 break; 478 break;
@@ -483,10 +481,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
483 if (queue_family.queueCount == 0) 481 if (queue_family.queueCount == 0)
484 continue; 482 continue;
485 483
486 if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) 484 if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) {
487 graphics_family_ = i; 485 graphics_family_ = i;
488 if (physical.getSurfaceSupportKHR(i, surface, dldi)) 486 }
487 if (physical.getSurfaceSupportKHR(i, surface, dld)) {
489 present_family_ = i; 488 present_family_ = i;
489 }
490 } 490 }
491 ASSERT(graphics_family_ && present_family_); 491 ASSERT(graphics_family_ && present_family_);
492 492
@@ -494,10 +494,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
494 present_family = *present_family_; 494 present_family = *present_family_;
495} 495}
496 496
497void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 497void VKDevice::SetupFeatures() {
498 const auto supported_features{physical.getFeatures(dldi)}; 498 const auto supported_features{physical.getFeatures(dld)};
499 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 499 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
500 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 500 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
501} 501}
502 502
503void VKDevice::CollectTelemetryParameters() { 503void VKDevice::CollectTelemetryParameters() {
@@ -525,7 +525,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
525} 525}
526 526
527std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( 527std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
528 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { 528 const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) {
529 static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, 529 static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
530 vk::Format::eA8B8G8R8UintPack32, 530 vk::Format::eA8B8G8R8UintPack32,
531 vk::Format::eA8B8G8R8SnormPack32, 531 vk::Format::eA8B8G8R8SnormPack32,
@@ -606,7 +606,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
606 vk::Format::eE5B9G9R9UfloatPack32}; 606 vk::Format::eE5B9G9R9UfloatPack32};
607 std::unordered_map<vk::Format, vk::FormatProperties> format_properties; 607 std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
608 for (const auto format : formats) { 608 for (const auto format : formats) {
609 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 609 format_properties.emplace(format, physical.getFormatProperties(format, dld));
610 } 610 }
611 return format_properties; 611 return format_properties;
612} 612}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 6e656517f..d9d809852 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -22,12 +22,12 @@ const u32 GuestWarpSize = 32;
22/// Handles data specific to a physical device. 22/// Handles data specific to a physical device.
23class VKDevice final { 23class VKDevice final {
24public: 24public:
25 explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 25 explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
26 vk::SurfaceKHR surface); 26 vk::SurfaceKHR surface);
27 ~VKDevice(); 27 ~VKDevice();
28 28
29 /// Initializes the device. Returns true on success. 29 /// Initializes the device. Returns true on success.
30 bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); 30 bool Create(vk::Instance instance);
31 31
32 /** 32 /**
33 * Returns a format supported by the device for the passed requeriments. 33 * Returns a format supported by the device for the passed requeriments.
@@ -188,18 +188,18 @@ public:
188 } 188 }
189 189
190 /// Checks if the physical device is suitable. 190 /// Checks if the physical device is suitable.
191 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 191 static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
192 vk::SurfaceKHR surface); 192 const vk::DispatchLoaderDynamic& dld);
193 193
194private: 194private:
195 /// Loads extensions into a vector and stores available ones in this object. 195 /// Loads extensions into a vector and stores available ones in this object.
196 std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); 196 std::vector<const char*> LoadExtensions();
197 197
198 /// Sets up queue families. 198 /// Sets up queue families.
199 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); 199 void SetupFamilies(vk::SurfaceKHR surface);
200 200
201 /// Sets up device features. 201 /// Sets up device features.
202 void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); 202 void SetupFeatures();
203 203
204 /// Collects telemetry information from the device. 204 /// Collects telemetry information from the device.
205 void CollectTelemetryParameters(); 205 void CollectTelemetryParameters();
@@ -208,8 +208,7 @@ private:
208 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 208 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
209 209
210 /// Returns true if ASTC textures are natively supported. 210 /// Returns true if ASTC textures are natively supported.
211 bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, 211 bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const;
212 const vk::DispatchLoaderDynamic& dldi) const;
213 212
214 /// Returns true if a format is supported. 213 /// Returns true if a format is supported.
215 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 214 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
@@ -217,10 +216,10 @@ private:
217 216
218 /// Returns the device properties for Vulkan formats. 217 /// Returns the device properties for Vulkan formats.
219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 218 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 219 const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical);
221 220
222 const vk::PhysicalDevice physical; ///< Physical device.
223 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 221 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
222 vk::PhysicalDevice physical; ///< Physical device.
224 vk::PhysicalDeviceProperties properties; ///< Device properties. 223 vk::PhysicalDeviceProperties properties; ///< Device properties.
225 UniqueDevice logical; ///< Logical device. 224 UniqueDevice logical; ///< Logical device.
226 vk::Queue graphics_queue; ///< Main graphics queue. 225 vk::Queue graphics_queue; ///< Main graphics queue.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 557b9d662..c2a426aeb 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
158} // Anonymous namespace 158} // Anonymous namespace
159 159
160CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, 160CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, 161 GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
162 ProgramCode program_code, u32 main_offset) 162 u32 main_offset)
163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, 163 : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
164 program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, 164 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
165 shader_ir{this->program_code, main_offset, compiler_settings, registry}, 165 compiler_settings, registry},
166 entries{GenerateShaderEntries(shader_ir)} {} 166 entries{GenerateShaderEntries(shader_ir)} {}
167 167
168CachedShader::~CachedShader() = default; 168CachedShader::~CachedShader() = default;
@@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
201 201
202 auto& memory_manager{system.GPU().MemoryManager()}; 202 auto& memory_manager{system.GPU().MemoryManager()};
203 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 203 const GPUVAddr program_addr{GetShaderAddress(system, program)};
204 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 204 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
205 auto shader = TryGet(host_ptr); 205 ASSERT(cpu_addr);
206 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
206 if (!shader) { 207 if (!shader) {
208 const auto host_ptr{memory_manager.GetPointer(program_addr)};
209
207 // No shader found - create a new one 210 // No shader found - create a new one
208 constexpr u32 stage_offset = 10; 211 constexpr u32 stage_offset = 10;
209 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); 212 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
210 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); 213 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
211 214
212 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
213 ASSERT(cpu_addr);
214
215 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, 215 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
216 host_ptr, std::move(code), stage_offset); 216 std::move(code), stage_offset);
217 Register(shader); 217 Register(shader);
218 } 218 }
219 shaders[index] = std::move(shader); 219 shaders[index] = std::move(shader);
@@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
253 253
254 auto& memory_manager = system.GPU().MemoryManager(); 254 auto& memory_manager = system.GPU().MemoryManager();
255 const auto program_addr = key.shader; 255 const auto program_addr = key.shader;
256 const auto host_ptr = memory_manager.GetPointer(program_addr);
257 256
258 auto shader = TryGet(host_ptr); 257 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
258 ASSERT(cpu_addr);
259
260 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
259 if (!shader) { 261 if (!shader) {
260 // No shader found - create a new one 262 // No shader found - create a new one
261 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 263 const auto host_ptr = memory_manager.GetPointer(program_addr);
262 ASSERT(cpu_addr);
263 264
264 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); 265 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
265 constexpr u32 kernel_main_offset = 0; 266 constexpr u32 kernel_main_offset = 0;
266 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 267 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
267 program_addr, *cpu_addr, host_ptr, std::move(code), 268 program_addr, *cpu_addr, std::move(code),
268 kernel_main_offset); 269 kernel_main_offset);
269 Register(shader); 270 Register(shader);
270 } 271 }
@@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
345 } 346 }
346 347
347 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 348 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
348 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 349 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
349 const auto shader = TryGet(host_ptr); 350 ASSERT(cpu_addr);
351 const auto shader = TryGet(*cpu_addr);
350 ASSERT(shader); 352 ASSERT(shader);
351 353
352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 354 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c4c112290..27c01732f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -113,17 +113,13 @@ namespace Vulkan {
113class CachedShader final : public RasterizerCacheObject { 113class CachedShader final : public RasterizerCacheObject {
114public: 114public:
115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, 115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
116 VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); 116 VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
117 ~CachedShader(); 117 ~CachedShader();
118 118
119 GPUVAddr GetGpuAddr() const { 119 GPUVAddr GetGpuAddr() const {
120 return gpu_addr; 120 return gpu_addr;
121 } 121 }
122 122
123 VAddr GetCpuAddr() const override {
124 return cpu_addr;
125 }
126
127 std::size_t GetSizeInBytes() const override { 123 std::size_t GetSizeInBytes() const override {
128 return program_code.size() * sizeof(u64); 124 return program_code.size() * sizeof(u64);
129 } 125 }
@@ -149,7 +145,6 @@ private:
149 Tegra::Engines::ShaderType stage); 145 Tegra::Engines::ShaderType stage);
150 146
151 GPUVAddr gpu_addr{}; 147 GPUVAddr gpu_addr{};
152 VAddr cpu_addr{};
153 ProgramCode program_code; 148 ProgramCode program_code;
154 VideoCommon::Shader::Registry registry; 149 VideoCommon::Shader::Registry registry;
155 VideoCommon::Shader::ShaderIR shader_ir; 150 VideoCommon::Shader::ShaderIR shader_ir;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 58c69b786..0a2ea4fd4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
495 495
496void RasterizerVulkan::FlushAll() {} 496void RasterizerVulkan::FlushAll() {}
497 497
498void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { 498void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
499 if (addr == 0 || size == 0) {
500 return;
501 }
499 texture_cache.FlushRegion(addr, size); 502 texture_cache.FlushRegion(addr, size);
500 buffer_cache.FlushRegion(addr, size); 503 buffer_cache.FlushRegion(addr, size);
501 query_cache.FlushRegion(addr, size); 504 query_cache.FlushRegion(addr, size);
502} 505}
503 506
504void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { 507void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
508 if (addr == 0 || size == 0) {
509 return;
510 }
505 texture_cache.InvalidateRegion(addr, size); 511 texture_cache.InvalidateRegion(addr, size);
506 pipeline_cache.InvalidateRegion(addr, size); 512 pipeline_cache.InvalidateRegion(addr, size);
507 buffer_cache.InvalidateRegion(addr, size); 513 buffer_cache.InvalidateRegion(addr, size);
508 query_cache.InvalidateRegion(addr, size); 514 query_cache.InvalidateRegion(addr, size);
509} 515}
510 516
511void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 517void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
512 FlushRegion(addr, size); 518 FlushRegion(addr, size);
513 InvalidateRegion(addr, size); 519 InvalidateRegion(addr, size);
514} 520}
@@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
540 return false; 546 return false;
541 } 547 }
542 548
543 const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; 549 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
544 const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
545 if (!surface) { 550 if (!surface) {
546 return false; 551 return false;
547 } 552 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3185868e9..f642dde76 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -118,9 +118,9 @@ public:
118 void ResetCounter(VideoCore::QueryType type) override; 118 void ResetCounter(VideoCore::QueryType type) override;
119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
120 void FlushAll() override; 120 void FlushAll() override;
121 void FlushRegion(CacheAddr addr, u64 size) override; 121 void FlushRegion(VAddr addr, u64 size) override;
122 void InvalidateRegion(CacheAddr addr, u64 size) override; 122 void InvalidateRegion(VAddr addr, u64 size) override;
123 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 123 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
124 void FlushCommands() override; 124 void FlushCommands() override;
125 void TickFrame() override; 125 void TickFrame() override;
126 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 126 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index e6edec459..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -71,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
71 bb.push_back(Operation(OperationCode::Discard)); 71 bb.push_back(Operation(OperationCode::Discard));
72 break; 72 break;
73 } 73 }
74 case OpCode::Id::MOV_SYS: { 74 case OpCode::Id::S2R: {
75 const Node value = [this, instr] { 75 const Node value = [this, instr] {
76 switch (instr.sys20) { 76 switch (instr.sys20) {
77 case SystemVariable::LaneId: 77 case SystemVariable::LaneId:
78 LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); 78 LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
79 return Immediate(0U); 79 return Immediate(0U);
80 case SystemVariable::InvocationId: 80 case SystemVariable::InvocationId:
81 return Operation(OperationCode::InvocationId); 81 return Operation(OperationCode::InvocationId);
82 case SystemVariable::Ydirection: 82 case SystemVariable::Ydirection:
83 return Operation(OperationCode::YNegate); 83 return Operation(OperationCode::YNegate);
84 case SystemVariable::InvocationInfo: 84 case SystemVariable::InvocationInfo:
85 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); 85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorXY:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
89 return Immediate(0U);
90 case SystemVariable::WscaleFactorZ:
91 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
86 return Immediate(0U); 92 return Immediate(0U);
87 case SystemVariable::Tid: { 93 case SystemVariable::Tid: {
88 Node value = Immediate(0); 94 Node value = Immediate(0);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 48350e042..6c4a1358b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
780 // When lod is used always is in gpr20 780 // When lod is used always is in gpr20
781 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 781 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
782 782
783 // Fill empty entries from the guest sampler
784 const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
785 if (type_coord_count != entry_coord_count) {
786 LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
787
788 // When the size is higher we insert zeroes
789 for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
790 coords.push_back(GetRegister(Register::ZeroIndex));
791 }
792
793 // Then we ensure the size matches the number of entries (dropping unused values)
794 coords.resize(entry_coord_count);
795 }
796
797 Node4 values; 783 Node4 values;
798 for (u32 element = 0; element < values.size(); ++element) { 784 for (u32 element = 0; element < values.size(); ++element) {
799 auto coords_copy = coords; 785 auto coords_copy = coords;
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
10 10
11namespace VideoCommon::Shader { 11namespace VideoCommon::Shader {
12 12
13using std::move;
13using Tegra::Shader::Instruction; 14using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 15using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 16using Tegra::Shader::Pred;
16using Tegra::Shader::VideoType; 17using Tegra::Shader::VideoType;
17using Tegra::Shader::VmadShr; 18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
18 21
19u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { 22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 23 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 24 const auto opcode = OpCode::Decode(instr);
22 25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
23 const Node op_a = 31 const Node op_a =
24 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, 32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
25 instr.video.type_a, instr.video.byte_height_a); 33 instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
109 } 117 }
110} 118}
111 119
120void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
121 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
124 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
125 UNIMPLEMENTED_IF(instr.vmnmx.sat);
126 UNIMPLEMENTED_IF(instr.generates_cc);
127
128 Node op_a = GetRegister(instr.gpr8);
129 Node op_b = GetRegister(instr.gpr20);
130 Node op_c = GetRegister(instr.gpr39);
131
132 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
133 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
134
135 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
136 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
137
138 switch (instr.vmnmx.operation) {
139 case VmnmxOperation::Mrg_16H:
140 value = BitfieldInsert(move(op_c), move(value), 16, 16);
141 break;
142 case VmnmxOperation::Mrg_16L:
143 value = BitfieldInsert(move(op_c), move(value), 0, 16);
144 break;
145 case VmnmxOperation::Mrg_8B0:
146 value = BitfieldInsert(move(op_c), move(value), 0, 8);
147 break;
148 case VmnmxOperation::Mrg_8B2:
149 value = BitfieldInsert(move(op_c), move(value), 16, 8);
150 break;
151 case VmnmxOperation::Acc:
152 value = Operation(OperationCode::IAdd, move(value), move(op_c));
153 break;
154 case VmnmxOperation::Min:
155 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
156 break;
157 case VmnmxOperation::Max:
158 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
159 break;
160 case VmnmxOperation::Nop:
161 break;
162 default:
163 UNREACHABLE();
164 break;
165 }
166
167 SetRegister(bb, instr.gpr0, move(value));
168}
169
112} // namespace VideoCommon::Shader 170} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index baf7188d2..8852c8a1b 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
359 switch (cc) { 359 switch (cc) {
360 case Tegra::Shader::ConditionCode::NEU: 360 case Tegra::Shader::ConditionCode::NEU:
361 return GetInternalFlag(InternalFlag::Zero, true); 361 return GetInternalFlag(InternalFlag::Zero, true);
362 case Tegra::Shader::ConditionCode::FCSM_TR:
363 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
364 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
362 default: 365 default:
363 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); 366 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
364 return MakeNode<PredicateNode>(Pred::NeverExecute, false); 367 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ca6c976c9..c6e7bdf50 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -354,6 +354,9 @@ private:
354 /// Marks the usage of a input or output attribute. 354 /// Marks the usage of a input or output attribute.
355 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); 355 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
356 356
357 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
358 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
359
357 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 360 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
358 const Node4& components); 361 const Node4& components);
359 362
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 6fe815135..7af0e792c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
190 MICROPROFILE_SCOPE(GPU_Load_Texture); 190 MICROPROFILE_SCOPE(GPU_Load_Texture);
191 auto& staging_buffer = staging_cache.GetBuffer(0); 191 auto& staging_buffer = staging_cache.GetBuffer(0);
192 u8* host_ptr; 192 u8* host_ptr;
193 is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); 193 // Use an extra temporal buffer
194 194 auto& tmp_buffer = staging_cache.GetBuffer(1);
195 // Handle continuouty 195 tmp_buffer.resize(guest_memory_size);
196 if (is_continuous) { 196 host_ptr = tmp_buffer.data();
197 // Use physical memory directly 197 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
198 host_ptr = memory_manager.GetPointer(gpu_addr);
199 if (!host_ptr) {
200 return;
201 }
202 } else {
203 // Use an extra temporal buffer
204 auto& tmp_buffer = staging_cache.GetBuffer(1);
205 tmp_buffer.resize(guest_memory_size);
206 host_ptr = tmp_buffer.data();
207 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
208 }
209 198
210 if (params.is_tiled) { 199 if (params.is_tiled) {
211 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", 200 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
@@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
257 auto& staging_buffer = staging_cache.GetBuffer(0); 246 auto& staging_buffer = staging_cache.GetBuffer(0);
258 u8* host_ptr; 247 u8* host_ptr;
259 248
260 // Handle continuouty 249 // Use an extra temporal buffer
261 if (is_continuous) { 250 auto& tmp_buffer = staging_cache.GetBuffer(1);
262 // Use physical memory directly 251 tmp_buffer.resize(guest_memory_size);
263 host_ptr = memory_manager.GetPointer(gpu_addr); 252 host_ptr = tmp_buffer.data();
264 if (!host_ptr) {
265 return;
266 }
267 } else {
268 // Use an extra temporal buffer
269 auto& tmp_buffer = staging_cache.GetBuffer(1);
270 tmp_buffer.resize(guest_memory_size);
271 host_ptr = tmp_buffer.data();
272 }
273 253
274 if (params.is_tiled) { 254 if (params.is_tiled) {
275 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); 255 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
@@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
300 } 280 }
301 } 281 }
302 } 282 }
303 if (!is_continuous) { 283 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
304 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
305 }
306} 284}
307 285
308} // namespace VideoCommon 286} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index d7882a031..a39a8661b 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -68,8 +68,8 @@ public:
68 return gpu_addr; 68 return gpu_addr;
69 } 69 }
70 70
71 bool Overlaps(const CacheAddr start, const CacheAddr end) const { 71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cache_addr < end) && (cache_addr_end > start); 72 return (cpu_addr < end) && (cpu_addr_end > start);
73 } 73 }
74 74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { 75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@@ -86,21 +86,13 @@ public:
86 return cpu_addr; 86 return cpu_addr;
87 } 87 }
88 88
89 void SetCpuAddr(const VAddr new_addr) { 89 VAddr GetCpuAddrEnd() const {
90 cpu_addr = new_addr; 90 return cpu_addr_end;
91 }
92
93 CacheAddr GetCacheAddr() const {
94 return cache_addr;
95 }
96
97 CacheAddr GetCacheAddrEnd() const {
98 return cache_addr_end;
99 } 91 }
100 92
101 void SetCacheAddr(const CacheAddr new_addr) { 93 void SetCpuAddr(const VAddr new_addr) {
102 cache_addr = new_addr; 94 cpu_addr = new_addr;
103 cache_addr_end = new_addr + guest_memory_size; 95 cpu_addr_end = new_addr + guest_memory_size;
104 } 96 }
105 97
106 const SurfaceParams& GetSurfaceParams() const { 98 const SurfaceParams& GetSurfaceParams() const {
@@ -119,14 +111,6 @@ public:
119 return mipmap_sizes[level]; 111 return mipmap_sizes[level];
120 } 112 }
121 113
122 void MarkAsContinuous(const bool is_continuous) {
123 this->is_continuous = is_continuous;
124 }
125
126 bool IsContinuous() const {
127 return is_continuous;
128 }
129
130 bool IsLinear() const { 114 bool IsLinear() const {
131 return !params.is_tiled; 115 return !params.is_tiled;
132 } 116 }
@@ -175,10 +159,8 @@ protected:
175 std::size_t guest_memory_size; 159 std::size_t guest_memory_size;
176 std::size_t host_memory_size; 160 std::size_t host_memory_size;
177 GPUVAddr gpu_addr{}; 161 GPUVAddr gpu_addr{};
178 CacheAddr cache_addr{};
179 CacheAddr cache_addr_end{};
180 VAddr cpu_addr{}; 162 VAddr cpu_addr{};
181 bool is_continuous{}; 163 VAddr cpu_addr_end{};
182 bool is_converted{}; 164 bool is_converted{};
183 165
184 std::vector<std::size_t> mipmap_sizes; 166 std::vector<std::size_t> mipmap_sizes;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c8f8d659d..88fe3e25f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
52 52
53template <typename TSurface, typename TView> 53template <typename TSurface, typename TView>
54class TextureCache { 54class TextureCache {
55 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
56 using IntervalType = typename IntervalMap::interval_type;
57 55
58public: 56public:
59 void InvalidateRegion(CacheAddr addr, std::size_t size) { 57 void InvalidateRegion(VAddr addr, std::size_t size) {
60 std::lock_guard lock{mutex}; 58 std::lock_guard lock{mutex};
61 59
62 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 60 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -76,7 +74,7 @@ public:
76 guard_samplers = new_guard; 74 guard_samplers = new_guard;
77 } 75 }
78 76
79 void FlushRegion(CacheAddr addr, std::size_t size) { 77 void FlushRegion(VAddr addr, std::size_t size) {
80 std::lock_guard lock{mutex}; 78 std::lock_guard lock{mutex};
81 79
82 auto surfaces = GetSurfacesInRegion(addr, size); 80 auto surfaces = GetSurfacesInRegion(addr, size);
@@ -99,9 +97,9 @@ public:
99 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 97 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
100 } 98 }
101 99
102 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 100 const std::optional<VAddr> cpu_addr =
103 const auto cache_addr{ToCacheAddr(host_ptr)}; 101 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
104 if (!cache_addr) { 102 if (!cpu_addr) {
105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 103 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
106 } 104 }
107 105
@@ -110,7 +108,7 @@ public:
110 } 108 }
111 109
112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 110 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 111 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
114 if (guard_samplers) { 112 if (guard_samplers) {
115 sampled_textures.push_back(surface); 113 sampled_textures.push_back(surface);
116 } 114 }
@@ -124,13 +122,13 @@ public:
124 if (!gpu_addr) { 122 if (!gpu_addr) {
125 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 123 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
126 } 124 }
127 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 125 const std::optional<VAddr> cpu_addr =
128 const auto cache_addr{ToCacheAddr(host_ptr)}; 126 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
129 if (!cache_addr) { 127 if (!cpu_addr) {
130 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 128 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
131 } 129 }
132 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; 130 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
133 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 131 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
134 if (guard_samplers) { 132 if (guard_samplers) {
135 sampled_textures.push_back(surface); 133 sampled_textures.push_back(surface);
136 } 134 }
@@ -159,14 +157,14 @@ public:
159 SetEmptyDepthBuffer(); 157 SetEmptyDepthBuffer();
160 return {}; 158 return {};
161 } 159 }
162 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 160 const std::optional<VAddr> cpu_addr =
163 const auto cache_addr{ToCacheAddr(host_ptr)}; 161 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
164 if (!cache_addr) { 162 if (!cpu_addr) {
165 SetEmptyDepthBuffer(); 163 SetEmptyDepthBuffer();
166 return {}; 164 return {};
167 } 165 }
168 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; 166 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
169 auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); 167 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
170 if (depth_buffer.target) 168 if (depth_buffer.target)
171 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 169 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
172 depth_buffer.target = surface_view.first; 170 depth_buffer.target = surface_view.first;
@@ -199,15 +197,15 @@ public:
199 return {}; 197 return {};
200 } 198 }
201 199
202 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 200 const std::optional<VAddr> cpu_addr =
203 const auto cache_addr{ToCacheAddr(host_ptr)}; 201 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
204 if (!cache_addr) { 202 if (!cpu_addr) {
205 SetEmptyColorBuffer(index); 203 SetEmptyColorBuffer(index);
206 return {}; 204 return {};
207 } 205 }
208 206
209 auto surface_view = 207 auto surface_view =
210 GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), 208 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
211 preserve_contents, true); 209 preserve_contents, true);
212 if (render_targets[index].target) 210 if (render_targets[index].target)
213 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 211 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
@@ -257,27 +255,26 @@ public:
257 const GPUVAddr src_gpu_addr = src_config.Address(); 255 const GPUVAddr src_gpu_addr = src_config.Address();
258 const GPUVAddr dst_gpu_addr = dst_config.Address(); 256 const GPUVAddr dst_gpu_addr = dst_config.Address();
259 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); 257 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
260 const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; 258 const std::optional<VAddr> dst_cpu_addr =
261 const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; 259 system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
262 const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; 260 const std::optional<VAddr> src_cpu_addr =
263 const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; 261 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
264 std::pair<TSurface, TView> dst_surface = 262 std::pair<TSurface, TView> dst_surface =
265 GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); 263 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
266 std::pair<TSurface, TView> src_surface = 264 std::pair<TSurface, TView> src_surface =
267 GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); 265 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
268 ImageBlit(src_surface.second, dst_surface.second, copy_config); 266 ImageBlit(src_surface.second, dst_surface.second, copy_config);
269 dst_surface.first->MarkAsModified(true, Tick()); 267 dst_surface.first->MarkAsModified(true, Tick());
270 } 268 }
271 269
272 TSurface TryFindFramebufferSurface(const u8* host_ptr) { 270 TSurface TryFindFramebufferSurface(VAddr addr) {
273 const CacheAddr cache_addr = ToCacheAddr(host_ptr); 271 if (!addr) {
274 if (!cache_addr) {
275 return nullptr; 272 return nullptr;
276 } 273 }
277 const CacheAddr page = cache_addr >> registry_page_bits; 274 const VAddr page = addr >> registry_page_bits;
278 std::vector<TSurface>& list = registry[page]; 275 std::vector<TSurface>& list = registry[page];
279 for (auto& surface : list) { 276 for (auto& surface : list) {
280 if (surface->GetCacheAddr() == cache_addr) { 277 if (surface->GetCpuAddr() == addr) {
281 return surface; 278 return surface;
282 } 279 }
283 } 280 }
@@ -338,18 +335,14 @@ protected:
338 335
339 void Register(TSurface surface) { 336 void Register(TSurface surface) {
340 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 337 const GPUVAddr gpu_addr = surface->GetGpuAddr();
341 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
342 const std::size_t size = surface->GetSizeInBytes(); 338 const std::size_t size = surface->GetSizeInBytes();
343 const std::optional<VAddr> cpu_addr = 339 const std::optional<VAddr> cpu_addr =
344 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); 340 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
345 if (!cache_ptr || !cpu_addr) { 341 if (!cpu_addr) {
346 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 342 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
347 gpu_addr); 343 gpu_addr);
348 return; 344 return;
349 } 345 }
350 const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
351 surface->MarkAsContinuous(continuous);
352 surface->SetCacheAddr(cache_ptr);
353 surface->SetCpuAddr(*cpu_addr); 346 surface->SetCpuAddr(*cpu_addr);
354 RegisterInnerCache(surface); 347 RegisterInnerCache(surface);
355 surface->MarkAsRegistered(true); 348 surface->MarkAsRegistered(true);
@@ -634,7 +627,7 @@ private:
634 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 627 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
635 const SurfaceParams& params, 628 const SurfaceParams& params,
636 const GPUVAddr gpu_addr, 629 const GPUVAddr gpu_addr,
637 const CacheAddr cache_addr, 630 const VAddr cpu_addr,
638 bool preserve_contents) { 631 bool preserve_contents) {
639 if (params.target == SurfaceTarget::Texture3D) { 632 if (params.target == SurfaceTarget::Texture3D) {
640 bool failed = false; 633 bool failed = false;
@@ -659,7 +652,7 @@ private:
659 failed = true; 652 failed = true;
660 break; 653 break;
661 } 654 }
662 const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); 655 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
663 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); 656 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
664 modified |= surface->IsModified(); 657 modified |= surface->IsModified();
665 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 658 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@@ -679,7 +672,7 @@ private:
679 } else { 672 } else {
680 for (const auto& surface : overlaps) { 673 for (const auto& surface : overlaps) {
681 if (!surface->MatchTarget(params.target)) { 674 if (!surface->MatchTarget(params.target)) {
682 if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { 675 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
683 if (Settings::values.use_accurate_gpu_emulation) { 676 if (Settings::values.use_accurate_gpu_emulation) {
684 return std::nullopt; 677 return std::nullopt;
685 } 678 }
@@ -688,7 +681,7 @@ private:
688 } 681 }
689 return std::nullopt; 682 return std::nullopt;
690 } 683 }
691 if (surface->GetCacheAddr() != cache_addr) { 684 if (surface->GetCpuAddr() != cpu_addr) {
692 continue; 685 continue;
693 } 686 }
694 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { 687 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
@@ -722,13 +715,13 @@ private:
722 * left blank. 715 * left blank.
723 * @param is_render Whether or not the surface is a render target. 716 * @param is_render Whether or not the surface is a render target.
724 **/ 717 **/
725 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, 718 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
726 const SurfaceParams& params, bool preserve_contents, 719 const SurfaceParams& params, bool preserve_contents,
727 bool is_render) { 720 bool is_render) {
728 // Step 1 721 // Step 1
729 // Check Level 1 Cache for a fast structural match. If candidate surface 722 // Check Level 1 Cache for a fast structural match. If candidate surface
730 // matches at certain level we are pretty much done. 723 // matches at certain level we are pretty much done.
731 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { 724 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
732 TSurface& current_surface = iter->second; 725 TSurface& current_surface = iter->second;
733 const auto topological_result = current_surface->MatchesTopology(params); 726 const auto topological_result = current_surface->MatchesTopology(params);
734 if (topological_result != MatchTopologyResult::FullMatch) { 727 if (topological_result != MatchTopologyResult::FullMatch) {
@@ -755,7 +748,7 @@ private:
755 // Step 2 748 // Step 2
756 // Obtain all possible overlaps in the memory region 749 // Obtain all possible overlaps in the memory region
757 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 750 const std::size_t candidate_size = params.GetGuestSizeInBytes();
758 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; 751 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
759 752
760 // If none are found, we are done. we just load the surface and create it. 753 // If none are found, we are done. we just load the surface and create it.
761 if (overlaps.empty()) { 754 if (overlaps.empty()) {
@@ -777,7 +770,7 @@ private:
777 // Check if it's a 3D texture 770 // Check if it's a 3D texture
778 if (params.block_depth > 0) { 771 if (params.block_depth > 0) {
779 auto surface = 772 auto surface =
780 Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); 773 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
781 if (surface) { 774 if (surface) {
782 return *surface; 775 return *surface;
783 } 776 }
@@ -852,16 +845,16 @@ private:
852 * @param params The parameters on the candidate surface. 845 * @param params The parameters on the candidate surface.
853 **/ 846 **/
854 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 847 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
855 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 848 const std::optional<VAddr> cpu_addr =
856 const auto cache_addr{ToCacheAddr(host_ptr)}; 849 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
857 850
858 if (!cache_addr) { 851 if (!cpu_addr) {
859 Deduction result{}; 852 Deduction result{};
860 result.type = DeductionType::DeductionFailed; 853 result.type = DeductionType::DeductionFailed;
861 return result; 854 return result;
862 } 855 }
863 856
864 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { 857 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
865 TSurface& current_surface = iter->second; 858 TSurface& current_surface = iter->second;
866 const auto topological_result = current_surface->MatchesTopology(params); 859 const auto topological_result = current_surface->MatchesTopology(params);
867 if (topological_result != MatchTopologyResult::FullMatch) { 860 if (topological_result != MatchTopologyResult::FullMatch) {
@@ -880,7 +873,7 @@ private:
880 } 873 }
881 874
882 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 875 const std::size_t candidate_size = params.GetGuestSizeInBytes();
883 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; 876 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
884 877
885 if (overlaps.empty()) { 878 if (overlaps.empty()) {
886 Deduction result{}; 879 Deduction result{};
@@ -1024,10 +1017,10 @@ private:
1024 } 1017 }
1025 1018
1026 void RegisterInnerCache(TSurface& surface) { 1019 void RegisterInnerCache(TSurface& surface) {
1027 const CacheAddr cache_addr = surface->GetCacheAddr(); 1020 const VAddr cpu_addr = surface->GetCpuAddr();
1028 CacheAddr start = cache_addr >> registry_page_bits; 1021 VAddr start = cpu_addr >> registry_page_bits;
1029 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; 1022 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1030 l1_cache[cache_addr] = surface; 1023 l1_cache[cpu_addr] = surface;
1031 while (start <= end) { 1024 while (start <= end) {
1032 registry[start].push_back(surface); 1025 registry[start].push_back(surface);
1033 start++; 1026 start++;
@@ -1035,10 +1028,10 @@ private:
1035 } 1028 }
1036 1029
1037 void UnregisterInnerCache(TSurface& surface) { 1030 void UnregisterInnerCache(TSurface& surface) {
1038 const CacheAddr cache_addr = surface->GetCacheAddr(); 1031 const VAddr cpu_addr = surface->GetCpuAddr();
1039 CacheAddr start = cache_addr >> registry_page_bits; 1032 VAddr start = cpu_addr >> registry_page_bits;
1040 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; 1033 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1041 l1_cache.erase(cache_addr); 1034 l1_cache.erase(cpu_addr);
1042 while (start <= end) { 1035 while (start <= end) {
1043 auto& reg{registry[start]}; 1036 auto& reg{registry[start]};
1044 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1037 reg.erase(std::find(reg.begin(), reg.end(), surface));
@@ -1046,18 +1039,18 @@ private:
1046 } 1039 }
1047 } 1040 }
1048 1041
1049 std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { 1042 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1050 if (size == 0) { 1043 if (size == 0) {
1051 return {}; 1044 return {};
1052 } 1045 }
1053 const CacheAddr cache_addr_end = cache_addr + size; 1046 const VAddr cpu_addr_end = cpu_addr + size;
1054 CacheAddr start = cache_addr >> registry_page_bits; 1047 VAddr start = cpu_addr >> registry_page_bits;
1055 const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; 1048 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1056 std::vector<TSurface> surfaces; 1049 std::vector<TSurface> surfaces;
1057 while (start <= end) { 1050 while (start <= end) {
1058 std::vector<TSurface>& list = registry[start]; 1051 std::vector<TSurface>& list = registry[start];
1059 for (auto& surface : list) { 1052 for (auto& surface : list) {
1060 if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { 1053 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
1061 surface->MarkAsPicked(true); 1054 surface->MarkAsPicked(true);
1062 surfaces.push_back(surface); 1055 surfaces.push_back(surface);
1063 } 1056 }
@@ -1146,14 +1139,14 @@ private:
1146 // large in size. 1139 // large in size.
1147 static constexpr u64 registry_page_bits{20}; 1140 static constexpr u64 registry_page_bits{20};
1148 static constexpr u64 registry_page_size{1 << registry_page_bits}; 1141 static constexpr u64 registry_page_size{1 << registry_page_bits};
1149 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 1142 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1150 1143
1151 static constexpr u32 DEPTH_RT = 8; 1144 static constexpr u32 DEPTH_RT = 8;
1152 static constexpr u32 NO_RT = 0xFFFFFFFF; 1145 static constexpr u32 NO_RT = 0xFFFFFFFF;
1153 1146
1154 // The L1 Cache is used for fast texture lookup before checking the overlaps 1147 // The L1 Cache is used for fast texture lookup before checking the overlaps
1155 // This avoids calculating size and other stuffs. 1148 // This avoids calculating size and other stuffs.
1156 std::unordered_map<CacheAddr, TSurface> l1_cache; 1149 std::unordered_map<VAddr, TSurface> l1_cache;
1157 1150
1158 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1151 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
1159 /// previously been used. This is to prevent surfaces from being constantly created and 1152 /// previously been used. This is to prevent surfaces from being constantly created and
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
new file mode 100644
index 000000000..d1939d744
--- /dev/null
+++ b/src/video_core/textures/texture.cpp
@@ -0,0 +1,80 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7
8#include "core/settings.h"
9#include "video_core/textures/texture.h"
10
11namespace Tegra::Texture {
12
13namespace {
14
15constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
16 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
17 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
18 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
19 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
20 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
21 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
22 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
23 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
24 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
25 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
26 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
27 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
28 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
29 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
30 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
31 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
32 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
33 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
34 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
35 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
36 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
37 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
38 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
39 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
40 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
41 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
42 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
43 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
44 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
45 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
46 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
47 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
48};
49
50unsigned SettingsMinimumAnisotropy() noexcept {
51 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
52 default:
53 case Anisotropy::Default:
54 return 1U;
55 case Anisotropy::Filter2x:
56 return 2U;
57 case Anisotropy::Filter4x:
58 return 4U;
59 case Anisotropy::Filter8x:
60 return 8U;
61 case Anisotropy::Filter16x:
62 return 16U;
63 }
64}
65
66} // Anonymous namespace
67
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
69 if (!srgb_conversion) {
70 return border_color;
71 }
72 return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74}
75
76float TSCEntry::GetMaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78}
79
80} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7edc4abe1..eba05aced 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,7 +8,6 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/settings.h"
12 11
13namespace Tegra::Texture { 12namespace Tegra::Texture {
14 13
@@ -132,6 +131,20 @@ enum class SwizzleSource : u32 {
132 OneFloat = 7, 131 OneFloat = 7,
133}; 132};
134 133
134enum class MsaaMode : u32 {
135 Msaa1x1 = 0,
136 Msaa2x1 = 1,
137 Msaa2x2 = 2,
138 Msaa4x2 = 3,
139 Msaa4x2_D3D = 4,
140 Msaa2x1_D3D = 5,
141 Msaa4x4 = 6,
142 Msaa2x2_VC4 = 8,
143 Msaa2x2_VC12 = 9,
144 Msaa4x2_VC8 = 10,
145 Msaa4x2_VC24 = 11,
146};
147
135union TextureHandle { 148union TextureHandle {
136 TextureHandle(u32 raw) : raw{raw} {} 149 TextureHandle(u32 raw) : raw{raw} {}
137 150
@@ -198,6 +211,7 @@ struct TICEntry {
198 union { 211 union {
199 BitField<0, 4, u32> res_min_mip_level; 212 BitField<0, 4, u32> res_min_mip_level;
200 BitField<4, 4, u32> res_max_mip_level; 213 BitField<4, 4, u32> res_max_mip_level;
214 BitField<8, 4, MsaaMode> msaa_mode;
201 BitField<12, 12, u32> min_lod_clamp; 215 BitField<12, 12, u32> min_lod_clamp;
202 }; 216 };
203 217
@@ -336,24 +350,9 @@ struct TSCEntry {
336 std::array<u8, 0x20> raw; 350 std::array<u8, 0x20> raw;
337 }; 351 };
338 352
339 float GetMaxAnisotropy() const { 353 std::array<float, 4> GetBorderColor() const noexcept;
340 const u32 min_value = [] { 354
341 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { 355 float GetMaxAnisotropy() const noexcept;
342 default:
343 case Anisotropy::Default:
344 return 1U;
345 case Anisotropy::Filter2x:
346 return 2U;
347 case Anisotropy::Filter4x:
348 return 4U;
349 case Anisotropy::Filter8x:
350 return 8U;
351 case Anisotropy::Filter16x:
352 return 16U;
353 }
354 }();
355 return static_cast<float>(std::max(1U << max_anisotropy, min_value));
356 }
357 356
358 float GetMinLod() const { 357 float GetMinLod() const {
359 return static_cast<float>(min_lod_clamp) / 256.0f; 358 return static_cast<float>(min_lod_clamp) / 256.0f;
@@ -368,15 +367,6 @@ struct TSCEntry {
368 constexpr u32 mask = 1U << (13 - 1); 367 constexpr u32 mask = 1U << (13 - 1);
369 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
370 } 369 }
371
372 std::array<float, 4> GetBorderColor() const {
373 if (srgb_conversion) {
374 return {static_cast<float>(srgb_border_color_r) / 255.0f,
375 static_cast<float>(srgb_border_color_g) / 255.0f,
376 static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
377 }
378 return border_color;
379 }
380}; 370};
381static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
382 372
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index d34b47b3f..8b9404718 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core)
150target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) 150target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets)
151target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) 151target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
152 152
153if (ENABLE_VULKAN AND NOT WIN32)
154 target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
155endif()
156
153target_compile_definitions(yuzu PRIVATE 157target_compile_definitions(yuzu PRIVATE
154 # Use QStringBuilder for string concatenation to reduce 158 # Use QStringBuilder for string concatenation to reduce
155 # the overall number of temporary strings created. 159 # the overall number of temporary strings created.
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index eaded2640..1cac2f942 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -14,8 +14,9 @@
14#include <QScreen> 14#include <QScreen>
15#include <QStringList> 15#include <QStringList>
16#include <QWindow> 16#include <QWindow>
17#ifdef HAS_VULKAN 17
18#include <QVulkanWindow> 18#if !defined(WIN32) && HAS_VULKAN
19#include <qpa/qplatformnativeinterface.h>
19#endif 20#endif
20 21
21#include <fmt/format.h> 22#include <fmt/format.h>
@@ -224,7 +225,6 @@ public:
224 } 225 }
225 226
226 context->MakeCurrent(); 227 context->MakeCurrent();
227 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
228 if (Core::System::GetInstance().Renderer().TryPresent(100)) { 228 if (Core::System::GetInstance().Renderer().TryPresent(100)) {
229 context->SwapBuffers(); 229 context->SwapBuffers();
230 glFinish(); 230 glFinish();
@@ -238,16 +238,50 @@ private:
238#ifdef HAS_VULKAN 238#ifdef HAS_VULKAN
239class VulkanRenderWidget : public RenderWidget { 239class VulkanRenderWidget : public RenderWidget {
240public: 240public:
241 explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance) 241 explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {
242 : RenderWidget(parent) {
243 windowHandle()->setSurfaceType(QWindow::VulkanSurface); 242 windowHandle()->setSurfaceType(QWindow::VulkanSurface);
244 windowHandle()->setVulkanInstance(instance);
245 } 243 }
246}; 244};
247#endif 245#endif
248 246
249GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread) 247static Core::Frontend::WindowSystemType GetWindowSystemType() {
250 : QWidget(parent_), emu_thread(emu_thread) { 248 // Determine WSI type based on Qt platform.
249 QString platform_name = QGuiApplication::platformName();
250 if (platform_name == QStringLiteral("windows"))
251 return Core::Frontend::WindowSystemType::Windows;
252 else if (platform_name == QStringLiteral("xcb"))
253 return Core::Frontend::WindowSystemType::X11;
254 else if (platform_name == QStringLiteral("wayland"))
255 return Core::Frontend::WindowSystemType::Wayland;
256
257 LOG_CRITICAL(Frontend, "Unknown Qt platform!");
258 return Core::Frontend::WindowSystemType::Windows;
259}
260
261static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
262 Core::Frontend::EmuWindow::WindowSystemInfo wsi;
263 wsi.type = GetWindowSystemType();
264
265#ifdef HAS_VULKAN
266 // Our Win32 Qt external doesn't have the private API.
267#if defined(WIN32) || defined(__APPLE__)
268 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
269#else
270 QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
271 wsi.display_connection = pni->nativeResourceForWindow("display", window);
272 if (wsi.type == Core::Frontend::WindowSystemType::Wayland)
273 wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
274 else
275 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
276#endif
277 wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
278#endif
279
280 return wsi;
281}
282
283GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_)
284 : QWidget(parent_), emu_thread(emu_thread_) {
251 setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") 285 setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
252 .arg(QString::fromUtf8(Common::g_build_name), 286 .arg(QString::fromUtf8(Common::g_build_name),
253 QString::fromUtf8(Common::g_scm_branch), 287 QString::fromUtf8(Common::g_scm_branch),
@@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() {
460 break; 494 break;
461 } 495 }
462 496
497 // Update the Window System information with the new render target
498 window_info = GetWindowSystemInfo(child_widget->windowHandle());
499
463 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); 500 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
464 layout()->addWidget(child_widget); 501 layout()->addWidget(child_widget);
465 // Reset minimum required size to avoid resizing issues on the main window after restarting. 502 // Reset minimum required size to avoid resizing issues on the main window after restarting.
@@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() {
531 568
532bool GRenderWindow::InitializeVulkan() { 569bool GRenderWindow::InitializeVulkan() {
533#ifdef HAS_VULKAN 570#ifdef HAS_VULKAN
534 vk_instance = std::make_unique<QVulkanInstance>(); 571 auto child = new VulkanRenderWidget(this);
535 vk_instance->setApiVersion(QVersionNumber(1, 1, 0));
536 vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect);
537 if (Settings::values.renderer_debug) {
538 const auto supported_layers{vk_instance->supportedLayers()};
539 const bool found =
540 std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) {
541 constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation";
542 return layer.name == searched_layer;
543 });
544 if (found) {
545 vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation");
546 vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
547 }
548 }
549 if (!vk_instance->create()) {
550 QMessageBox::critical(
551 this, tr("Error while initializing Vulkan 1.1!"),
552 tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the "
553 "latest graphics drivers."));
554 return false;
555 }
556
557 auto child = new VulkanRenderWidget(this, vk_instance.get());
558 child_widget = child; 572 child_widget = child;
559 child_widget->windowHandle()->create(); 573 child_widget->windowHandle()->create();
560 main_context = std::make_unique<DummyContext>(); 574 main_context = std::make_unique<DummyContext>();
@@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() {
567#endif 581#endif
568} 582}
569 583
570void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
571 void* surface) const {
572#ifdef HAS_VULKAN
573 const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr");
574 const VkInstance instance_copy = vk_instance->vkInstance();
575 const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle());
576
577 std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
578 std::memcpy(instance, &instance_copy, sizeof(instance_copy));
579 std::memcpy(surface, &surface_copy, sizeof(surface_copy));
580#else
581 UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan");
582#endif
583}
584
585bool GRenderWindow::LoadOpenGL() { 584bool GRenderWindow::LoadOpenGL() {
586 auto context = CreateSharedContext(); 585 auto context = CreateSharedContext();
587 auto scope = context->Acquire(); 586 auto scope = context->Acquire();
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index d69078df1..3626604ca 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -22,9 +22,6 @@ class GMainWindow;
22class QKeyEvent; 22class QKeyEvent;
23class QTouchEvent; 23class QTouchEvent;
24class QStringList; 24class QStringList;
25#ifdef HAS_VULKAN
26class QVulkanInstance;
27#endif
28 25
29namespace VideoCore { 26namespace VideoCore {
30enum class LoadCallbackStage; 27enum class LoadCallbackStage;
@@ -122,8 +119,6 @@ public:
122 // EmuWindow implementation. 119 // EmuWindow implementation.
123 void PollEvents() override; 120 void PollEvents() override;
124 bool IsShown() const override; 121 bool IsShown() const override;
125 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
126 void* surface) const override;
127 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 122 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
128 123
129 void BackupGeometry(); 124 void BackupGeometry();
@@ -186,10 +181,6 @@ private:
186 // should instead be shared from 181 // should instead be shared from
187 std::shared_ptr<Core::Frontend::GraphicsContext> main_context; 182 std::shared_ptr<Core::Frontend::GraphicsContext> main_context;
188 183
189#ifdef HAS_VULKAN
190 std::unique_ptr<QVulkanInstance> vk_instance;
191#endif
192
193 /// Temporary storage of the screenshot taken 184 /// Temporary storage of the screenshot taken
194 QImage screenshot_image; 185 QImage screenshot_image;
195 186
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index a821c7b3c..ea667caef 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -15,6 +15,10 @@
15#include "ui_configure_graphics.h" 15#include "ui_configure_graphics.h"
16#include "yuzu/configuration/configure_graphics.h" 16#include "yuzu/configuration/configure_graphics.h"
17 17
18#ifdef HAS_VULKAN
19#include "video_core/renderer_vulkan/renderer_vulkan.h"
20#endif
21
18namespace { 22namespace {
19enum class Resolution : int { 23enum class Resolution : int {
20 Auto, 24 Auto,
@@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
165 169
166void ConfigureGraphics::RetrieveVulkanDevices() { 170void ConfigureGraphics::RetrieveVulkanDevices() {
167#ifdef HAS_VULKAN 171#ifdef HAS_VULKAN
168 QVulkanInstance instance; 172 vulkan_devices.clear();
169 instance.setApiVersion(QVersionNumber(1, 1, 0)); 173 for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
170 if (!instance.create()) { 174 vulkan_devices.push_back(QString::fromStdString(name));
171 LOG_INFO(Frontend, "Vulkan 1.1 not available");
172 return;
173 }
174 const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
175 instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))};
176 if (vkEnumeratePhysicalDevices == nullptr) {
177 LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices");
178 return;
179 }
180 u32 physical_device_count;
181 if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) !=
182 VK_SUCCESS) {
183 LOG_INFO(Frontend, "Failed to get physical devices count");
184 return;
185 }
186 std::vector<VkPhysicalDevice> physical_devices(physical_device_count);
187 if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count,
188 physical_devices.data()) != VK_SUCCESS) {
189 LOG_INFO(Frontend, "Failed to get physical devices");
190 return;
191 }
192
193 const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>(
194 instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))};
195 if (vkGetPhysicalDeviceProperties == nullptr) {
196 LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties");
197 return;
198 }
199 for (const auto physical_device : physical_devices) {
200 VkPhysicalDeviceProperties properties;
201 vkGetPhysicalDeviceProperties(physical_device, &properties);
202 vulkan_devices.push_back(QString::fromUtf8(properties.deviceName));
203 } 175 }
204#endif 176#endif
205} 177}
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 96dec50e2..15ac30f12 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick(
541 button->setText(tr("[press key]")); 541 button->setText(tr("[press key]"));
542 button->setFocus(); 542 button->setFocus();
543 543
544 const auto iter = std::find(button_map.begin(), button_map.end(), button); 544 // Keyboard keys can only be used as button devices
545 ASSERT(iter != button_map.end()); 545 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
546 const auto index = std::distance(button_map.begin(), iter); 546 if (want_keyboard_keys) {
547 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); 547 const auto iter = std::find(button_map.begin(), button_map.end(), button);
548 ASSERT(iter != button_map.end());
549 const auto index = std::distance(button_map.begin(), iter);
550 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
551 }
548 552
549 input_setter = new_input_setter; 553 input_setter = new_input_setter;
550 554
551 device_pollers = InputCommon::Polling::GetPollers(type); 555 device_pollers = InputCommon::Polling::GetPollers(type);
552 556
553 // Keyboard keys can only be used as button devices
554 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
555
556 for (auto& poller : device_pollers) { 557 for (auto& poller : device_pollers) {
557 poller->Start(); 558 poller->Start();
558 } 559 }
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp
index ab3a11d30..0e0e8f113 100644
--- a/src/yuzu/configuration/configure_input_simple.cpp
+++ b/src/yuzu/configuration/configure_input_simple.cpp
@@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) {
35// - Open any dialogs 35// - Open any dialogs
36// - Block in any way 36// - Block in any way
37 37
38constexpr std::size_t PLAYER_0_INDEX = 0;
38constexpr std::size_t HANDHELD_INDEX = 8; 39constexpr std::size_t HANDHELD_INDEX = 8;
39 40
40void HandheldOnProfileSelect() { 41void HandheldOnProfileSelect() {
@@ -53,8 +54,8 @@ void HandheldOnProfileSelect() {
53} 54}
54 55
55void DualJoyconsDockedOnProfileSelect() { 56void DualJoyconsDockedOnProfileSelect() {
56 Settings::values.players[0].connected = true; 57 Settings::values.players[PLAYER_0_INDEX].connected = true;
57 Settings::values.players[0].type = Settings::ControllerType::DualJoycon; 58 Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon;
58 59
59 for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) { 60 for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) {
60 Settings::values.players[player].connected = false; 61 Settings::values.players[player].connected = false;
@@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() {
64 Settings::values.keyboard_enabled = false; 65 Settings::values.keyboard_enabled = false;
65 Settings::values.mouse_enabled = false; 66 Settings::values.mouse_enabled = false;
66 Settings::values.debug_pad_enabled = false; 67 Settings::values.debug_pad_enabled = false;
67 Settings::values.touchscreen.enabled = false; 68 Settings::values.touchscreen.enabled = true;
68} 69}
69 70
70// Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure 71// Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure
@@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{
78 }}, 79 }},
79 {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect, 80 {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect,
80 [](ConfigureInputSimple* caller) { 81 [](ConfigureInputSimple* caller) {
81 CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false); 82 CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false);
82 }}, 83 }},
83 {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>}, 84 {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>},
84}}; 85}};
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp
index 0a4abe34f..e0647ea5b 100644
--- a/src/yuzu/configuration/configure_mouse_advanced.cpp
+++ b/src/yuzu/configuration/configure_mouse_advanced.cpp
@@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick(
184 button->setText(tr("[press key]")); 184 button->setText(tr("[press key]"));
185 button->setFocus(); 185 button->setFocus();
186 186
187 const auto iter = std::find(button_map.begin(), button_map.end(), button); 187 // Keyboard keys can only be used as button devices
188 ASSERT(iter != button_map.end()); 188 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
189 const auto index = std::distance(button_map.begin(), iter); 189 if (want_keyboard_keys) {
190 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); 190 const auto iter = std::find(button_map.begin(), button_map.end(), button);
191 ASSERT(iter != button_map.end());
192 const auto index = std::distance(button_map.begin(), iter);
193 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
194 }
191 195
192 input_setter = new_input_setter; 196 input_setter = new_input_setter;
193 197
194 device_pollers = InputCommon::Polling::GetPollers(type); 198 device_pollers = InputCommon::Polling::GetPollers(type);
195 199
196 // Keyboard keys can only be used as button devices
197 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
198
199 for (auto& poller : device_pollers) { 200 for (auto& poller : device_pollers) {
200 poller->Start(); 201 poller->Start();
201 } 202 }
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index a2b88c787..dccbabcbf 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide
315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); 315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type"));
316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); 316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size"));
317 } 317 }
318 item_model->setSortRole(GameListItemPath::TitleRole); 318 item_model->setSortRole(GameListItemPath::SortRole);
319 319
320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); 320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons);
321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); 321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry);
@@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) {
441 if (children_total > 0) { 441 if (children_total > 0) {
442 search_field->setFocus(); 442 search_field->setFocus();
443 } 443 }
444 item_model->sort(tree_view->header()->sortIndicatorSection(),
445 tree_view->header()->sortIndicatorOrder());
444} 446}
445 447
446void GameList::PopupContextMenu(const QPoint& menu_location) { 448void GameList::PopupContextMenu(const QPoint& menu_location) {
@@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() {
666 // so make it as large as possible as default. 668 // so make it as large as possible as default.
667 header->resizeSection(COLUMN_NAME, header->width()); 669 header->resizeSection(COLUMN_NAME, header->width());
668 } 670 }
669
670 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
671} 671}
672 672
673const QStringList GameList::supported_file_extensions = { 673const QStringList GameList::supported_file_extensions = {
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 7cde72d1b..3e6d5a7cd 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -65,10 +65,10 @@ public:
65 */ 65 */
66class GameListItemPath : public GameListItem { 66class GameListItemPath : public GameListItem {
67public: 67public:
68 static const int TitleRole = SortRole; 68 static const int TitleRole = SortRole + 1;
69 static const int FullPathRole = SortRole + 1; 69 static const int FullPathRole = SortRole + 2;
70 static const int ProgramIdRole = SortRole + 2; 70 static const int ProgramIdRole = SortRole + 3;
71 static const int FileTypeRole = SortRole + 3; 71 static const int FileTypeRole = SortRole + 4;
72 72
73 GameListItemPath() = default; 73 GameListItemPath() = default;
74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, 74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data,
@@ -95,7 +95,7 @@ public:
95 } 95 }
96 96
97 QVariant data(int role) const override { 97 QVariant data(int role) const override {
98 if (role == Qt::DisplayRole) { 98 if (role == Qt::DisplayRole || role == SortRole) {
99 std::string filename; 99 std::string filename;
100 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, 100 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename,
101 nullptr); 101 nullptr);
@@ -110,6 +110,9 @@ public:
110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id); 110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
111 const int row2_id = UISettings::values.row_2_text_id; 111 const int row2_id = UISettings::values.row_2_text_id;
112 112
113 if (role == SortRole)
114 return row1.toLower();
115
113 if (row2_id == 4) // None 116 if (row2_id == 4) // None
114 return row1; 117 return row1;
115 118
@@ -123,6 +126,13 @@ public:
123 126
124 return GameListItem::data(role); 127 return GameListItem::data(role);
125 } 128 }
129
130 /**
131 * Override to prevent automatic sorting.
132 */
133 bool operator<(const QStandardItem& other) const override {
134 return false;
135 }
126}; 136};
127 137
128class GameListItemCompat : public GameListItem { 138class GameListItemCompat : public GameListItem {
@@ -289,6 +299,10 @@ public:
289 int type() const override { 299 int type() const override {
290 return static_cast<int>(GameListItemType::AddDir); 300 return static_cast<int>(GameListItemType::AddDir);
291 } 301 }
302
303 bool operator<(const QStandardItem& other) const override {
304 return false;
305 }
292}; 306};
293 307
294class GameList; 308class GameList;
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 3522dcf6d..411e7e647 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() {
156 SDL_GL_DeleteContext(window_context); 156 SDL_GL_DeleteContext(window_context);
157} 157}
158 158
159void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
160 void* surface) const {
161 // Should not have been called from OpenGL
162 UNREACHABLE();
163}
164
165std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { 159std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const {
166 return std::make_unique<SDLGLContext>(); 160 return std::make_unique<SDLGLContext>();
167} 161}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
index e092021d7..48bb41683 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
@@ -15,10 +15,6 @@ public:
15 15
16 void Present() override; 16 void Present() override;
17 17
18 /// Ignored in OpenGL
19 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
20 void* surface) const override;
21
22 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 18 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
23 19
24private: 20private:
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 46d053f04..f2990910e 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -2,102 +2,62 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <cstdlib>
6#include <memory>
6#include <string> 7#include <string>
7#include <vector> 8
8#include <SDL.h>
9#include <SDL_vulkan.h>
10#include <fmt/format.h> 9#include <fmt/format.h>
11#include <vulkan/vulkan.h> 10
12#include "common/assert.h" 11#include "common/assert.h"
13#include "common/logging/log.h" 12#include "common/logging/log.h"
14#include "common/scm_rev.h" 13#include "common/scm_rev.h"
15#include "core/settings.h" 14#include "core/settings.h"
15#include "video_core/renderer_vulkan/renderer_vulkan.h"
16#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" 16#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
17 17
18// Include these late to avoid polluting everything with Xlib macros
19#include <SDL.h>
20#include <SDL_syswm.h>
21
18EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) 22EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
19 : EmuWindow_SDL2{system, fullscreen} { 23 : EmuWindow_SDL2{system, fullscreen} {
20 if (SDL_Vulkan_LoadLibrary(nullptr) != 0) {
21 LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError());
22 exit(EXIT_FAILURE);
23 }
24
25 vkGetInstanceProcAddr =
26 reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr());
27 if (vkGetInstanceProcAddr == nullptr) {
28 LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
29 exit(EXIT_FAILURE);
30 }
31
32 const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, 24 const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name,
33 Common::g_scm_branch, Common::g_scm_desc); 25 Common::g_scm_branch, Common::g_scm_desc);
34 render_window = 26 render_window =
35 SDL_CreateWindow(window_title.c_str(), 27 SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
36 SDL_WINDOWPOS_UNDEFINED, // x position
37 SDL_WINDOWPOS_UNDEFINED, // y position
38 Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, 28 Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
39 SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); 29 SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
40
41 const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr);
42
43 u32 extra_ext_count{};
44 if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) {
45 LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}",
46 SDL_GetError());
47 exit(1);
48 }
49
50 auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count);
51 if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) {
52 LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError());
53 exit(1);
54 }
55 std::vector<const char*> enabled_extensions;
56 enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(),
57 extra_ext_names.get() + extra_ext_count);
58
59 std::vector<const char*> enabled_layers;
60 if (use_standard_layers) {
61 enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
62 enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation");
63 }
64
65 VkApplicationInfo app_info{};
66 app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
67 app_info.apiVersion = VK_API_VERSION_1_1;
68 app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
69 app_info.pApplicationName = "yuzu-emu";
70 app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
71 app_info.pEngineName = "yuzu-emu";
72 30
73 VkInstanceCreateInfo instance_ci{}; 31 SDL_SysWMinfo wm;
74 instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; 32 if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) {
75 instance_ci.pApplicationInfo = &app_info; 33 LOG_CRITICAL(Frontend, "Failed to get information from the window manager");
76 instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); 34 std::exit(EXIT_FAILURE);
77 instance_ci.ppEnabledExtensionNames = enabled_extensions.data();
78 if (Settings::values.renderer_debug) {
79 instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size());
80 instance_ci.ppEnabledLayerNames = enabled_layers.data();
81 } 35 }
82 36
83 const auto vkCreateInstance = 37 switch (wm.subsystem) {
84 reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); 38#ifdef SDL_VIDEO_DRIVER_WINDOWS
85 if (vkCreateInstance == nullptr || 39 case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
86 vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { 40 window_info.type = Core::Frontend::WindowSystemType::Windows;
87 LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); 41 window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window);
88 exit(EXIT_FAILURE); 42 break;
89 } 43#endif
90 44#ifdef SDL_VIDEO_DRIVER_X11
91 vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( 45 case SDL_SYSWM_TYPE::SDL_SYSWM_X11:
92 vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); 46 window_info.type = Core::Frontend::WindowSystemType::X11;
93 if (vkDestroyInstance == nullptr) { 47 window_info.display_connection = wm.info.x11.display;
94 LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); 48 window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window);
95 exit(EXIT_FAILURE); 49 break;
96 } 50#endif
97 51#ifdef SDL_VIDEO_DRIVER_WAYLAND
98 if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { 52 case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND:
99 LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); 53 window_info.type = Core::Frontend::WindowSystemType::Wayland;
100 exit(EXIT_FAILURE); 54 window_info.display_connection = wm.info.wl.display;
55 window_info.render_surface = wm.info.wl.surface;
56 break;
57#endif
58 default:
59 LOG_CRITICAL(Frontend, "Window manager subsystem not implemented");
60 std::exit(EXIT_FAILURE);
101 } 61 }
102 62
103 OnResize(); 63 OnResize();
@@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
107 Common::g_scm_branch, Common::g_scm_desc); 67 Common::g_scm_branch, Common::g_scm_desc);
108} 68}
109 69
110EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { 70EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default;
111 vkDestroyInstance(vk_instance, nullptr);
112}
113
114void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
115 void* surface) const {
116 const auto instance_proc_addr = vkGetInstanceProcAddr;
117 std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
118 std::memcpy(instance, &vk_instance, sizeof(vk_instance));
119 std::memcpy(surface, &vk_surface, sizeof(vk_surface));
120}
121 71
122std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { 72std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const {
123 return nullptr; 73 return nullptr;
124} 74}
125 75
126bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const {
127 if (!Settings::values.renderer_debug) {
128 return false;
129 }
130
131 const auto vkEnumerateInstanceLayerProperties =
132 reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>(
133 vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties"));
134 if (vkEnumerateInstanceLayerProperties == nullptr) {
135 LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
136 return false;
137 }
138
139 u32 available_layers_count{};
140 if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) {
141 LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
142 return false;
143 }
144 std::vector<VkLayerProperties> layers(available_layers_count);
145 if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) {
146 LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
147 return false;
148 }
149
150 return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) {
151 return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation");
152 }) != layers.end();
153}
154
155void EmuWindow_SDL2_VK::Present() { 76void EmuWindow_SDL2_VK::Present() {
156 // TODO (bunnei): ImplementMe 77 // TODO (bunnei): ImplementMe
157} 78}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
index 3dd1f3f61..b8021ebea 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
@@ -4,27 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vulkan/vulkan.h> 7#include <memory>
8
8#include "core/frontend/emu_window.h" 9#include "core/frontend/emu_window.h"
9#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 10#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
10 11
12namespace Core {
13class System;
14}
15
11class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { 16class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 {
12public: 17public:
13 explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); 18 explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen);
14 ~EmuWindow_SDL2_VK(); 19 ~EmuWindow_SDL2_VK();
15 20
16 void Present() override; 21 void Present() override;
17 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
18 void* surface) const override;
19 22
20 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 23 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
21
22private:
23 bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const;
24
25 VkInstance vk_instance{};
26 VkSurfaceKHR vk_surface{};
27
28 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
29 PFN_vkDestroyInstance vkDestroyInstance{};
30}; 24};
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
index a837430cc..8584f6671 100644
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
+++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
@@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const {
116 return false; 116 return false;
117} 117}
118 118
119void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const {
120 UNREACHABLE();
121}
122
123class SDLGLContext : public Core::Frontend::GraphicsContext { 119class SDLGLContext : public Core::Frontend::GraphicsContext {
124public: 120public:
125 explicit SDLGLContext() { 121 explicit SDLGLContext() {
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
index 9f5d04fca..c13a82df2 100644
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
+++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
@@ -19,10 +19,6 @@ public:
19 /// Whether the screen is being shown or not. 19 /// Whether the screen is being shown or not.
20 bool IsShown() const override; 20 bool IsShown() const override;
21 21
22 /// Retrieves Vulkan specific handlers from the window
23 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
24 void* surface) const override;
25
26 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 22 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
27 23
28private: 24private: